source: CIVL/examples/mpi-omp/AMG2013/parcsr_ls/par_relax_more.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 75.6 KB
Line 
1/*BHEADER**********************************************************************
2 * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3 * Produced at the Lawrence Livermore National Laboratory.
4 * This file is part of HYPRE. See file COPYRIGHT for details.
5 *
6 * HYPRE is free software; you can redistribute it and/or modify it under the
7 * terms of the GNU Lesser General Public License (as published by the Free
8 * Software Foundation) version 2.1 dated February 1999.
9 *
10 * $Revision: 2.4 $
11 ***********************************************************************EHEADER*/
12
13
14
15
16/******************************************************************************
17 *
18 * a few more relaxation schemes: Chebychev, FCF-Jacobi, CG and Steepest
19 * Descent
20 *
21 *****************************************************************************/
22
23#include "headers.h"
24#include "float.h"
25
26#define DBL_EPSILON 2.2204460492503131e-16
27
28int hypre_LINPACKcgtql1(int*,double *,double *,int *);
29
30/******************************************************************************
31 *
32 *use max norm to estimate largest eigenvalue
33 *
34 *****************************************************************************/
35
36
37int hypre_ParCSRMaxEigEstimate(hypre_ParCSRMatrix *A, /* matrix to relax with */
38 int scale, /* scale by diagonal?*/
39 double *max_eig)
40{
41
42 double e_max;
43 double row_sum, max_norm;
44 double *col_val;
45 double temp;
46 double diag_value;
47
48 int pos_diag, neg_diag;
49 HYPRE_BigInt start_row, end_row;
50 int row_length;
51 HYPRE_BigInt *col_ind;
52 int j;
53 HYPRE_BigInt i;
54
55
56 /* estimate with the inf-norm of A - should be ok for SPD matrices */
57
58 start_row = hypre_ParCSRMatrixFirstRowIndex(A);
59 end_row = hypre_ParCSRMatrixLastRowIndex(A);
60
61 max_norm = 0.0;
62
63 pos_diag = neg_diag = 0;
64
65 for ( i = start_row; i <= end_row; i++ )
66 {
67 HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) A, i, &row_length, &col_ind, &col_val);
68
69 row_sum = 0.0;
70
71 for (j = 0; j < row_length; j++)
72 {
73 if (j==0) diag_value = fabs(col_val[j]);
74
75 row_sum += fabs(col_val[j]);
76
77 if ( col_ind[j] == i && col_val[j] > 0.0 ) pos_diag++;
78 if ( col_ind[j] == i && col_val[j] < 0.0 ) neg_diag++;
79 }
80 if (scale)
81 {
82 if (diag_value != 0.0)
83 row_sum = row_sum/diag_value;
84 }
85
86
87 if ( row_sum > max_norm ) max_norm = row_sum;
88
89 HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) A, i, &row_length, &col_ind, &col_val);
90 }
91
92 /* get max across procs */
93 MPI_Allreduce(&max_norm, &temp, 1, MPI_DOUBLE, MPI_MAX, hypre_ParCSRMatrixComm(A));
94 max_norm = temp;
95
96 /* from Charles */
97 if ( pos_diag == 0 && neg_diag > 0 ) max_norm = - max_norm;
98
99 /* eig estimates */
100 e_max = max_norm;
101
102 /* return */
103 *max_eig = e_max;
104
105 return hypre_error_flag;
106
107}
108
109/******************************************************************************
110 use CG to get the eigenvalue estimate
111 scale means get eig est of (D^{-1/2} A D^{-1/2}
112******************************************************************************/
113
114int hypre_ParCSRMaxEigEstimateCG(hypre_ParCSRMatrix *A, /* matrix to relax with */
115 int scale, /* scale by diagonal?*/
116 int max_iter,
117 double *max_eig,
118 double *min_eig)
119{
120
121 int i, j, err;
122
123 hypre_ParVector *p;
124 hypre_ParVector *s;
125 hypre_ParVector *r;
126 hypre_ParVector *ds;
127 hypre_ParVector *u;
128
129
130 double *tridiag;
131 double *trioffd;
132
133 double lambda_max, max_row_sum;
134
135 double beta, gamma = 0.0, alpha, sdotp, gamma_old, alphainv;
136
137 double diag;
138
139 double lambda_min;
140
141 double *s_data, *p_data, *ds_data, *u_data;
142
143 int local_size = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
144
145 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
146 double *A_diag_data = hypre_CSRMatrixData(A_diag);
147 int *A_diag_i = hypre_CSRMatrixI(A_diag);
148
149
150 /* check the size of A - don't iterate more than the size */
151 HYPRE_BigInt size = hypre_ParCSRMatrixGlobalNumRows(A);
152
153 if (size < (HYPRE_BigInt) max_iter)
154 max_iter = (int) size;
155
156 /* create some temp vectors: p, s, r , ds, u*/
157
158 r = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
159 hypre_ParCSRMatrixGlobalNumRows(A),
160 hypre_ParCSRMatrixRowStarts(A));
161 hypre_ParVectorInitialize(r);
162 hypre_ParVectorSetPartitioningOwner(r,0);
163
164 p = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
165 hypre_ParCSRMatrixGlobalNumRows(A),
166 hypre_ParCSRMatrixRowStarts(A));
167 hypre_ParVectorInitialize(p);
168 hypre_ParVectorSetPartitioningOwner(p,0);
169
170
171 s = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
172 hypre_ParCSRMatrixGlobalNumRows(A),
173 hypre_ParCSRMatrixRowStarts(A));
174 hypre_ParVectorInitialize(s);
175 hypre_ParVectorSetPartitioningOwner(s,0);
176
177
178 ds = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
179 hypre_ParCSRMatrixGlobalNumRows(A),
180 hypre_ParCSRMatrixRowStarts(A));
181 hypre_ParVectorInitialize(ds);
182 hypre_ParVectorSetPartitioningOwner(ds,0);
183
184 u = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
185 hypre_ParCSRMatrixGlobalNumRows(A),
186 hypre_ParCSRMatrixRowStarts(A));
187 hypre_ParVectorInitialize(u);
188 hypre_ParVectorSetPartitioningOwner(u,0);
189
190
191 /* point to local data */
192 s_data = hypre_VectorData(hypre_ParVectorLocalVector(s));
193 p_data = hypre_VectorData(hypre_ParVectorLocalVector(p));
194 ds_data = hypre_VectorData(hypre_ParVectorLocalVector(ds));
195 u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
196
197 /* make room for tri-diag matrix */
198 tridiag = hypre_CTAlloc(double, max_iter+1);
199 trioffd = hypre_CTAlloc(double, max_iter+1);
200 for (i=0; i < max_iter + 1; i++)
201 {
202 tridiag[i] = 0;
203 trioffd[i] = 0;
204 }
205
206
207 /* set residual to random */
208 hypre_ParVectorSetRandomValues(r,1);
209
210 if (scale)
211 {
212 for (i = 0; i < local_size; i++)
213 {
214 diag = A_diag_data[A_diag_i[i]];
215 ds_data[i] = 1/sqrt(diag);
216 }
217
218 }
219 else
220 {
221 /* set ds to 1 */
222 hypre_ParVectorSetConstantValues(ds,1.0);
223 }
224
225
226 /* gamma = <r,Cr> */
227 gamma = hypre_ParVectorInnerProd(r,p);
228
229 /* for the initial filling of the tridiag matrix */
230 beta = 1.0;
231 max_row_sum = 0.0;
232
233 i = 0;
234 while (i < max_iter)
235 {
236
237 /* s = C*r */
238 /* TO DO: C = diag scale */
239 hypre_ParVectorCopy(r, s);
240
241 /*gamma = <r,Cr> */
242 gamma_old = gamma;
243 gamma = hypre_ParVectorInnerProd(r,s);
244
245 if (i==0)
246 {
247 beta = 1.0;
248 /* p_0 = C*r */
249 hypre_ParVectorCopy(s, p);
250 }
251 else
252 {
253 /* beta = gamma / gamma_old */
254 beta = gamma / gamma_old;
255
256 /* p = s + beta p */
257#ifdef HYPRE_USING_OPENMP
258#pragma omp parallel for private(j) schedule(static)
259#endif
260 for (j=0; j < local_size; j++)
261 {
262 p_data[j] = s_data[j] + beta*p_data[j];
263 }
264 }
265
266 if (scale)
267 {
268 /* s = D^{-1/2}A*D^{-1/2}*p */
269 for (j = 0; j < local_size; j++)
270 {
271 u_data[j] = ds_data[j] * p_data[j];
272 }
273 hypre_ParCSRMatrixMatvec(1.0, A, u, 0.0, s);
274 for (j = 0; j < local_size; j++)
275 {
276 s_data[j] = ds_data[j] * s_data[j];
277 }
278
279
280 }
281 else
282 {
283 /* s = A*p */
284 hypre_ParCSRMatrixMatvec(1.0, A, p, 0.0, s);
285 }
286
287 /* <s,p> */
288 sdotp = hypre_ParVectorInnerProd(s,p);
289
290 /* alpha = gamma / <s,p> */
291 alpha = gamma/sdotp;
292
293 /* get tridiagonal matrix */
294 alphainv = 1.0/alpha;
295
296 tridiag[i+1] = alphainv;
297 tridiag[i] *= beta;
298 tridiag[i] += alphainv;
299
300 trioffd[i+1] = alphainv;
301 trioffd[i] *= sqrt(beta);
302
303 /* x = x + alpha*p */
304 /* don't need */
305
306 /* r = r - alpha*s */
307 hypre_ParVectorAxpy( -alpha, s, r);
308
309 i++;
310
311 }
312
313 /* eispack routine - eigenvalues return in tridiag and ordered*/
314 hypre_LINPACKcgtql1(&i,tridiag,trioffd,&err);
315
316 lambda_max = tridiag[i-1];
317 lambda_min = tridiag[0];
318 /* printf("linpack max eig est = %g\n", lambda_max);*/
319 /* printf("linpack min eig est = %g\n", lambda_min);*/
320
321
322 hypre_ParVectorDestroy(r);
323 hypre_ParVectorDestroy(s);
324 hypre_ParVectorDestroy(p);
325 hypre_ParVectorDestroy(ds);
326 hypre_ParVectorDestroy(u);
327
328 /* return */
329 *max_eig = lambda_max;
330 *min_eig = lambda_min;
331
332 return hypre_error_flag;
333
334}
335
336/******************************************************************************
337
338
339Chebyshev relaxation - iterative implementation
340 (See Saad "Iterative Methods for Sparse Systems", Alg. 12.1
341 plus we can scale residual by inv(M) = 1/diag(A) so that we have Chebyshev
342 accelerated jacobi)
343
344
345NOT USED CURRENTLY
346
347******************************************************************************/
348
349int hypre_ParCSRRelax_Cheby3(hypre_ParCSRMatrix *A, /* matrix to relax with */
350 hypre_ParVector *f, /* right-hand side */
351 double max_eig, /* u.b = max. e-val est.*1.1 */
352 double eig_ratio, /* l.b = max_eig/eig ratio */
353 int order, /* polynomial order */
354 hypre_ParVector *u, /* initial/updated approximation */
355 hypre_ParVector *v /* temporary vector */,
356 hypre_ParVector *v2 /*another temp vector */ )
357{
358
359 /* See Saad "Iterative Methods for Sparse Systems", Alg. 12.1 */
360 /* plus we can scale residual by inv(M) = 1/diag(A) so that we have Chebyshev
361 accelerated jacobi */
362
363 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
364 double *A_diag_data = hypre_CSRMatrixData(A_diag);
365 int *A_diag_i = hypre_CSRMatrixI(A_diag);
366
367 double *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
368 double *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));
369
370 double *dk = hypre_VectorData(hypre_ParVectorLocalVector(v2));
371
372 double theta, delta, sigma;
373 double p_k, p_kp1, temp1, temp2, diag, scale;
374 double upper_bound, lower_bound;
375
376 int i, j;
377 int num_rows = hypre_CSRMatrixNumRows(A_diag);
378
379
380 /* make sure we are large enough - Adams et al. 2003 */
381 upper_bound = max_eig * 1.1;
382 lower_bound = max_eig/eig_ratio;
383
384
385 /* parameters */
386 theta = (upper_bound + lower_bound)/2;
387 delta = (upper_bound - lower_bound)/2;
388 sigma = theta/delta;
389
390 /* set v = f */
391 hypre_ParVectorCopy(f, v);
392
393 /* get residual: v = f-A*u */
394 hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, v);
395
396 /* p_0*/
397 p_k = 1/sigma;
398
399 /*first order */
400 temp1 = 1/theta;
401
402 /*d_0* = 1/theta * inv(M)r_0 - M is Jacobi*/
403 /* x_1 = x_0 + d_0 */
404#ifdef HYPRE_USING_OPENMP
405#pragma omp parallel for private(i,diag,scale) schedule(static)
406#endif
407 for (i = 0; i < num_rows; i++)
408 {
409 diag = A_diag_data[A_diag_i[i]];
410
411 scale = temp1/diag;
412 dk[i] = scale*v_data[i];
413 u_data[i] += dk[i];
414
415 }
416
417 /* higher order */
418 for (j = 1; j < order; j++)
419 {
420 /* get residual: v = f-A*u */
421 hypre_ParVectorCopy(f, v);
422 hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, v);
423
424 p_kp1 = 1.0/(2.0*sigma - p_k);
425 temp1 = p_kp1*p_k;
426 temp2 = 2.0*p_kp1/delta;
427#ifdef HYPRE_USING_OPENMP
428#pragma omp parallel for private(i,diag,scale) schedule(static)
429#endif
430 for (i = 0; i < num_rows; i++)
431 {
432 diag = A_diag_data[A_diag_i[i]];
433
434 scale = temp2/diag;
435 dk[i] = temp1*dk[i] + scale*v_data[i];
436 u_data[i] += dk[i];
437 }
438 p_k = p_kp1;
439 }
440
441
442 return hypre_error_flag;
443
444
445}
446
447
448
449/******************************************************************************
450
451Chebyshev relaxation
452
453
454Can specify order 1-4 (this is the order of the resid polynomial)- here we
455explicitly code the coefficients (instead of
456iteratively determining)
457
458
459variant 0: standard chebyshev
460this is rlx 11 if scale = 0, and 16 if scale == 1
461
462variant 1: modified cheby: T(t)* f(t) where f(t) = (1-b/t)
463this is rlx 15 if scale = 0, and 17 if scale == 1
464
465ratio indicates the percentage of the whole spectrum to use (so .5
466means half, and .1 means 10percent)
467
468
469*******************************************************************************/
470
471int hypre_ParCSRRelax_Cheby(hypre_ParCSRMatrix *A, /* matrix to relax with */
472 hypre_ParVector *f, /* right-hand side */
473 double max_eig,
474 double min_eig,
475 double eig_ratio,
476 int order, /* polynomial order */
477 int scale, /* scale by diagonal?*/
478 int variant,
479 hypre_ParVector *u, /* initial/updated approximation */
480 hypre_ParVector *v /* temporary vector */,
481 hypre_ParVector *r /*another temp vector */ )
482{
483
484
485 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
486 double *A_diag_data = hypre_CSRMatrixData(A_diag);
487 int *A_diag_i = hypre_CSRMatrixI(A_diag);
488
489 double *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
490 double *f_data = hypre_VectorData(hypre_ParVectorLocalVector(f));
491 double *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));
492
493 double *r_data = hypre_VectorData(hypre_ParVectorLocalVector(r));
494
495 double theta, delta;
496
497 double den;
498 double upper_bound, lower_bound;
499
500 int i, j;
501 int num_rows = hypre_CSRMatrixNumRows(A_diag);
502
503 double coefs[5];
504 double mult;
505 double *orig_u;
506
507 double tmp_d;
508
509 int cheby_order;
510
511 double *ds_data, *tmp_data;
512 double diag;
513
514 hypre_ParVector *ds;
515 hypre_ParVector *tmp_vec;
516
517 /* u = u + p(A)r */
518
519 if (order > 4)
520 order = 4;
521 if (order < 1)
522 order = 1;
523
524 /* we are using the order of p(A) */
525 cheby_order = order -1;
526
527 /* make sure we are large enough - Adams et al. 2003 */
528 upper_bound = max_eig * 1.1;
529 /* lower_bound = max_eig/eig_ratio; */
530 lower_bound = (upper_bound - min_eig)* eig_ratio + min_eig;
531
532
533 /* theta and delta */
534 theta = (upper_bound + lower_bound)/2;
535 delta = (upper_bound - lower_bound)/2;
536
537 if (variant == 1 )
538 {
539 switch ( cheby_order ) /* these are the corresponding cheby polynomials: u = u_o + s(A)r_0 - so order is
540 one less that resid poly: r(t) = 1 - t*s(t) */
541 {
542 case 0:
543 coefs[0] = 1.0/theta;
544
545 break;
546
547 case 1: /* (del - t + 2*th)/(th^2 + del*th) */
548 den = (theta*theta + delta*theta);
549
550 coefs[0] = (delta + 2*theta)/den;
551 coefs[1] = -1.0/den;
552
553 break;
554
555 case 2: /* (4*del*th - del^2 - t*(2*del + 6*th) + 2*t^2 + 6*th^2)/(2*del*th^2 - del^2*th - del^3 + 2*th^3)*/
556 den = 2*delta*theta*theta - delta*delta*theta - pow(delta,3) + 2*pow(theta,3);
557
558 coefs[0] = (4*delta*theta - pow(delta,2) + 6*pow(theta,2))/den;
559 coefs[1] = -(2*delta + 6*theta)/den;
560 coefs[2] = 2/den;
561
562 break;
563
564 case 3: /* -(6*del^2*th - 12*del*th^2 - t^2*(4*del + 16*th) + t*(12*del*th - 3*del^2 + 24*th^2) + 3*del^3 + 4*t^3 - 16*th^3)/(4*del*th^3 - 3*del^2*th^2 - 3*del^3*th + 4*th^4)*/
565 den = - (4*delta*pow(theta,3) - 3*pow(delta,2)*pow(theta,2) - 3*pow(delta,3)*theta + 4*pow(theta,4) );
566
567 coefs[0] = (6*pow(delta,2)*theta - 12*delta*pow(theta,2) + 3*pow(delta,3) - 16*pow(theta,3) )/den;
568 coefs[1] = (12*delta*theta - 3*pow(delta,2) + 24*pow(theta,2))/den;
569 coefs[2] = -( 4*delta + 16*theta)/den;
570 coefs[3] = 4/den;
571
572 break;
573 }
574 }
575
576 else /* standard chebyshev */
577 {
578
579 switch ( cheby_order ) /* these are the corresponding cheby polynomials: u = u_o + s(A)r_0 - so order is
580 one less thatn resid poly: r(t) = 1 - t*s(t) */
581 {
582 case 0:
583 coefs[0] = 1.0/theta;
584 break;
585
586 case 1: /* ( 2*t - 4*th)/(del^2 - 2*th^2) */
587 den = delta*delta - 2*theta*theta;
588
589 coefs[0] = -4*theta/den;
590 coefs[1] = 2/den;
591
592 break;
593
594 case 2: /* (3*del^2 - 4*t^2 + 12*t*th - 12*th^2)/(3*del^2*th - 4*th^3)*/
595 den = 3*(delta*delta)*theta - 4*(theta*theta*theta);
596
597 coefs[0] = (3*delta*delta - 12 *theta*theta)/den;
598 coefs[1] = 12*theta/den;
599 coefs[2] = -4/den;
600
601 break;
602
603 case 3: /*(t*(8*del^2 - 48*th^2) - 16*del^2*th + 32*t^2*th - 8*t^3 + 32*th^3)/(del^4 - 8*del^2*th^2 + 8*th^4)*/
604 den = pow(delta,4) - 8*delta*delta*theta*theta + 8*pow(theta,4);
605
606 coefs[0] = (32*pow(theta,3)- 16*delta*delta*theta)/den;
607 coefs[1] = (8*delta*delta - 48*theta*theta)/den;
608 coefs[2] = 32*theta/den;
609 coefs[3] = -8/den;
610
611 break;
612 }
613 }
614
615 orig_u = hypre_CTAlloc(double, num_rows);
616
617 if (!scale)
618 {
619 /* get residual: r = f - A*u */
620 hypre_ParVectorCopy(f, r);
621 hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, r);
622
623
624
625 for ( i = 0; i < num_rows; i++ )
626 {
627 orig_u[i] = u_data[i];
628 u_data[i] = r_data[i] * coefs[cheby_order];
629 }
630 for (i = cheby_order - 1; i >= 0; i-- )
631 {
632 hypre_ParCSRMatrixMatvec(1.0, A, u, 0.0, v);
633 mult = coefs[i];
634
635#ifdef HYPRE_USING_OPENMP
636#pragma omp parallel for private(j) schedule(static)
637#endif
638
639 for ( j = 0; j < num_rows; j++ )
640 {
641 u_data[j] = mult * r_data[j] + v_data[j];
642 }
643
644 }
645
646#ifdef HYPRE_USING_OPENMP
647#pragma omp parallel for private(i) schedule(static)
648#endif
649 for ( i = 0; i < num_rows; i++ )
650 {
651 u_data[i] = orig_u[i] + u_data[i];
652 }
653
654
655 }
656 else /* scaling! */
657 {
658
659 /*grab 1/sqrt(diagonal) */
660
661 ds = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
662 hypre_ParCSRMatrixGlobalNumRows(A),
663 hypre_ParCSRMatrixRowStarts(A));
664 hypre_ParVectorInitialize(ds);
665 hypre_ParVectorSetPartitioningOwner(ds,0);
666 ds_data = hypre_VectorData(hypre_ParVectorLocalVector(ds));
667
668 tmp_vec = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
669 hypre_ParCSRMatrixGlobalNumRows(A),
670 hypre_ParCSRMatrixRowStarts(A));
671 hypre_ParVectorInitialize(tmp_vec);
672 hypre_ParVectorSetPartitioningOwner(tmp_vec,0);
673 tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(tmp_vec));
674
675 /* get ds_data and get scaled residual: r = D^(-1/2)f -
676 * D^(-1/2)A*u */
677
678
679#ifdef HYPRE_USING_OPENMP
680#pragma omp parallel for private(j,diag) schedule(static)
681#endif
682 for (j = 0; j < num_rows; j++)
683 {
684 diag = A_diag_data[A_diag_i[j]];
685 ds_data[j] = 1/sqrt(diag);
686
687 r_data[j] = ds_data[j] * f_data[j];
688 }
689
690 hypre_ParCSRMatrixMatvec(-1.0, A, u, 0.0, tmp_vec);
691#ifdef HYPRE_USING_OPENMP
692#pragma omp parallel for private(j) schedule(static)
693#endif
694 for ( j = 0; j < num_rows; j++ )
695 {
696 r_data[j] += ds_data[j] * tmp_data[j];
697 }
698
699 /* save original u, then start
700 the iteration by multiplying r by the cheby coef.*/
701
702#ifdef HYPRE_USING_OPENMP
703#pragma omp parallel for private(j) schedule(static)
704#endif
705 for ( j = 0; j < num_rows; j++ )
706 {
707 orig_u[j] = u_data[j]; /* orig, unscaled u */
708
709 u_data[j] = r_data[j] * coefs[cheby_order];
710 }
711
712 /* now do the other coefficients */
713 for (i = cheby_order - 1; i >= 0; i-- )
714 {
715 /* v = D^(-1/2)AD^(-1/2)u */
716#ifdef HYPRE_USING_OPENMP
717#pragma omp parallel for private(j) schedule(static)
718#endif
719 for ( j = 0; j < num_rows; j++ )
720 {
721 tmp_data[j] = ds_data[j] * u_data[j];
722 }
723 hypre_ParCSRMatrixMatvec(1.0, A, tmp_vec, 0.0, v);
724
725 /* u_new = coef*r + v*/
726 mult = coefs[i];
727
728#ifdef HYPRE_USING_OPENMP
729#pragma omp parallel for private(j,tmp_d) schedule(static)
730#endif
731 for ( j = 0; j < num_rows; j++ )
732 {
733 tmp_d = ds_data[j]* v_data[j];
734 u_data[j] = mult * r_data[j] + tmp_d;
735 }
736
737 } /* end of cheby_order loop */
738
739
740 /* now we have to scale u_data before adding it to u_orig*/
741
742
743#ifdef HYPRE_USING_OPENMP
744#pragma omp parallel for private(j) schedule(static)
745#endif
746 for ( j = 0; j < num_rows; j++ )
747 {
748 u_data[j] = orig_u[j] + ds_data[j]*u_data[j];
749 }
750
751 hypre_ParVectorDestroy(ds);
752 hypre_ParVectorDestroy(tmp_vec);
753
754
755 }/* end of scaling code */
756
757
758
759 hypre_TFree(orig_u);
760
761
762
763
764 return hypre_error_flag;
765
766
767}
768
769/*--------------------------------------------------------------------------
770 * hypre_BoomerAMGRelax_FCFJacobi
771 *--------------------------------------------------------------------------*/
772
773int hypre_BoomerAMGRelax_FCFJacobi( hypre_ParCSRMatrix *A,
774 hypre_ParVector *f,
775 int *cf_marker,
776 double relax_weight,
777 hypre_ParVector *u,
778 hypre_ParVector *Vtemp)
779{
780
781 int i;
782 int relax_points[3];
783 int relax_type = 0;
784
785 hypre_ParVector *Ztemp = NULL;
786
787
788 relax_points[0] = -1; /*F */
789 relax_points[1] = 1; /*C */
790 relax_points[2] = -1; /*F */
791
792 /* if we are on the coarsest level ,the cf_marker will be null
793 and we just do one sweep regular jacobi */
794 if (cf_marker == NULL)
795 {
796 hypre_BoomerAMGRelax(A,
797 f,
798 cf_marker,
799 relax_type,
800 0,
801 relax_weight,
802 0.0,
803 NULL,
804 u,
805 Vtemp, Ztemp);
806 }
807 else
808 {
809 for (i=0; i < 3; i++)
810 hypre_BoomerAMGRelax(A,
811 f,
812 cf_marker,
813 relax_type,
814 relax_points[i],
815 relax_weight,
816 0.0,
817 NULL,
818 u,
819 Vtemp, Ztemp);
820 }
821
822
823 return hypre_error_flag;
824
825}
826
827/*--------------------------------------------------------------------------
828 * CG Smoother - if the CG setup is cheap, we can just do it here - for
829 * now we are doing it in the setup, so this function is a
830 * bit unnecessary ...
831 *
832 *--------------------------------------------------------------------------*/
833
834int hypre_ParCSRRelax_CG( HYPRE_Solver solver,
835 hypre_ParCSRMatrix *A,
836 hypre_ParVector *f,
837 hypre_ParVector *u,
838 int num_its)
839{
840 int num_iterations;
841 double final_res_norm;
842
843 HYPRE_PCGSetMaxIter(solver, num_its); /* max iterations */
844
845 HYPRE_ParCSRPCGSolve(solver, (HYPRE_ParCSRMatrix)A, (HYPRE_ParVector)f, (HYPRE_ParVector)u);
846
847 HYPRE_PCGGetNumIterations(solver, &num_iterations);
848 HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
849
850#if 0
851 {
852 int myid;
853 MPI_Comm_rank(MPI_COMM_WORLD, &myid);
854 if (myid ==0)
855 {
856 printf(" -----CG PCG Iterations = %d\n", num_iterations);
857 printf(" -----CG PCG Final Relative Residual Norm = %e\n", final_res_norm);
858 }
859
860 }
861#endif
862
863
864
865 return hypre_error_flag;
866
867}
868
869
870
871
872/*--------------------------------------------------------------------------
873 * Steepest Descent (Smoother) (Not used)
874 *
875 * We don't check for convergence - just do a fixed number of iterations
876 *--------------------------------------------------------------------------*/
877
878int hypre_ParCSRRelax_SD( hypre_ParCSRMatrix *A,/* matrix to relax with */
879 hypre_ParVector *f, /* right-hand side */
880 hypre_ParVector *u,/* initial/updated approximation */
881 hypre_ParVector *r, /* temporary vector */
882 hypre_ParVector *p, /*another temp vector */
883 int num_its)
884{
885
886 int i;
887 double alpha, tmp1, tmp2;
888
889
890 /* get residual: r = f - A*u */
891 hypre_ParVectorCopy(f, r); /* copy f into r */
892 hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, r);
893
894 for (i = 0; i < num_its; i++)
895 {
896
897 /*p = A*r */
898 hypre_ParCSRMatrixMatvec(1.0, A, r, 0.0, p);
899
900 /* alpha = (r,r)/(p,r) */
901 tmp1 = hypre_ParVectorInnerProd( r, r);
902
903 tmp2 = hypre_ParVectorInnerProd( p, r);
904
905 if (tmp2 == 0.0)
906 break;
907
908 alpha = tmp1/tmp2;
909
910 /* u = u + alpha*r */
911 hypre_ParVectorAxpy( alpha, r, u);
912
913 /* r = r - alpha * p */
914 hypre_ParVectorAxpy( -alpha, p, r);
915
916
917 }
918
919 return hypre_error_flag;
920
921}
922
923
924
925/* tql1.f --
926
927 this is the eispack translation - from Barry Smith in Petsc
928
929 Note that this routine always uses real numbers (not complex) even
930 if the underlying matrix is Hermitian. This is because the Lanczos
931 process applied to Hermitian matrices always produces a real,
932 symmetric tridiagonal matrix.
933*/
934
935double hypre_LINPACKcgpthy(double*,double*);
936
937
938int hypre_LINPACKcgtql1(int *n,double *d,double *e,int *ierr)
939{
940 /* System generated locals */
941 int i__1,i__2;
942 double d__1,d__2,c_b10 = 1.0;
943
944 /* Local variables */
945 double c,f,g,h;
946 int i,j,l,m;
947 double p,r,s,c2,c3 = 0.0;
948 int l1,l2;
949 double s2 = 0.0;
950 int ii;
951 double dl1,el1;
952 int mml;
953 double tst1,tst2;
954
955/* THIS SUBROUTINE IS A TRANSLATION OF THE ALGOL PROCEDURE TQL1, */
956/* NUM. MATH. 11, 293-306(1968) BY BOWDLER, MARTIN, REINSCH, AND */
957/* WILKINSON. */
958/* HANDBOOK FOR AUTO. COMP., VOL.II-LINEAR ALGEBRA, 227-240(1971). */
959
960/* THIS SUBROUTINE FINDS THE EIGENVALUES OF A SYMMETRIC */
961/* TRIDIAGONAL MATRIX BY THE QL METHOD. */
962
963/* ON INPUT */
964
965/* N IS THE ORDER OF THE MATRIX. */
966
967/* D CONTAINS THE DIAGONAL ELEMENTS OF THE INPUT MATRIX. */
968
969/* E CONTAINS THE SUBDIAGONAL ELEMENTS OF THE INPUT MATRIX */
970/* IN ITS LAST N-1 POSITIONS. E(1) IS ARBITRARY. */
971
972/* ON OUTPUT */
973
974/* D CONTAINS THE EIGENVALUES IN ASCENDING ORDER. IF AN */
975/* ERROR EXIT IS MADE, THE EIGENVALUES ARE CORRECT AND */
976/* ORDERED FOR INDICES 1,2,...IERR-1, BUT MAY NOT BE */
977/* THE SMALLEST EIGENVALUES. */
978
979/* E HAS BEEN DESTROYED. */
980
981/* IERR IS SET TO */
982/* ZERO FOR NORMAL RETURN, */
983/* J IF THE J-TH EIGENVALUE HAS NOT BEEN */
984/* DETERMINED AFTER 30 ITERATIONS. */
985
986/* CALLS CGPTHY FOR DSQRT(A*A + B*B) . */
987
988/* QUESTIONS AND COMMENTS SHOULD BE DIRECTED TO BURTON S. GARBOW, */
989/* MATHEMATICS AND COMPUTER SCIENCE DIV, ARGONNE NATIONAL LABORATORY
990*/
991
992/* THIS VERSION DATED AUGUST 1983. */
993
994/* ------------------------------------------------------------------
995*/
996 double ds;
997
998 --e;
999 --d;
1000
1001 *ierr = 0;
1002 if (*n == 1) {
1003 goto L1001;
1004 }
1005
1006 i__1 = *n;
1007 for (i = 2; i <= i__1; ++i) {
1008 e[i - 1] = e[i];
1009 }
1010
1011 f = 0.;
1012 tst1 = 0.;
1013 e[*n] = 0.;
1014
1015 i__1 = *n;
1016 for (l = 1; l <= i__1; ++l) {
1017 j = 0;
1018 h = (d__1 = d[l],fabs(d__1)) + (d__2 = e[l],fabs(d__2));
1019 if (tst1 < h) {
1020 tst1 = h;
1021 }
1022/* .......... LOOK FOR SMALL SUB-DIAGONAL ELEMENT .......... */
1023 i__2 = *n;
1024 for (m = l; m <= i__2; ++m) {
1025 tst2 = tst1 + (d__1 = e[m],fabs(d__1));
1026 if (tst2 == tst1) {
1027 goto L120;
1028 }
1029/* .......... E(N) IS ALWAYS ZERO,SO THERE IS NO EXIT */
1030/* THROUGH THE BOTTOM OF THE LOOP .......... */
1031 }
1032L120:
1033 if (m == l) {
1034 goto L210;
1035 }
1036L130:
1037 if (j == 30) {
1038 goto L1000;
1039 }
1040 ++j;
1041/* .......... FORM SHIFT .......... */
1042 l1 = l + 1;
1043 l2 = l1 + 1;
1044 g = d[l];
1045 p = (d[l1] - g) / (e[l] * 2.);
1046 r = hypre_LINPACKcgpthy(&p,&c_b10);
1047 ds = 1.0; if (p < 0.0) ds = -1.0;
1048 d[l] = e[l] / (p + ds*r);
1049 d[l1] = e[l] * (p + ds*r);
1050 dl1 = d[l1];
1051 h = g - d[l];
1052 if (l2 > *n) {
1053 goto L145;
1054 }
1055
1056 i__2 = *n;
1057 for (i = l2; i <= i__2; ++i) {
1058 d[i] -= h;
1059 }
1060
1061L145:
1062 f += h;
1063/* .......... QL TRANSFORMATION .......... */
1064 p = d[m];
1065 c = 1.;
1066 c2 = c;
1067 el1 = e[l1];
1068 s = 0.;
1069 mml = m - l;
1070/* .......... FOR I=M-1 STEP -1 UNTIL L DO -- .......... */
1071 i__2 = mml;
1072 for (ii = 1; ii <= i__2; ++ii) {
1073 c3 = c2;
1074 c2 = c;
1075 s2 = s;
1076 i = m - ii;
1077 g = c * e[i];
1078 h = c * p;
1079 r = hypre_LINPACKcgpthy(&p,&e[i]);
1080 e[i + 1] = s * r;
1081 s = e[i] / r;
1082 c = p / r;
1083 p = c * d[i] - s * g;
1084 d[i + 1] = h + s * (c * g + s * d[i]);
1085 }
1086
1087 p = -s * s2 * c3 * el1 * e[l] / dl1;
1088 e[l] = s * p;
1089 d[l] = c * p;
1090 tst2 = tst1 + (d__1 = e[l],fabs(d__1));
1091 if (tst2 > tst1) {
1092 goto L130;
1093 }
1094L210:
1095 p = d[l] + f;
1096/* .......... ORDER EIGENVALUES .......... */
1097 if (l == 1) {
1098 goto L250;
1099 }
1100/* .......... FOR I=L STEP -1 UNTIL 2 DO -- .......... */
1101 i__2 = l;
1102 for (ii = 2; ii <= i__2; ++ii) {
1103 i = l + 2 - ii;
1104 if (p >= d[i - 1]) {
1105 goto L270;
1106 }
1107 d[i] = d[i - 1];
1108 }
1109
1110L250:
1111 i = 1;
1112L270:
1113 d[i] = p;
1114 }
1115
1116 goto L1001;
1117/* .......... SET ERROR -- NO CONVERGENCE TO AN */
1118/* EIGENVALUE AFTER 30 ITERATIONS .......... */
1119L1000:
1120 *ierr = l;
1121L1001:
1122 return 0;
1123
1124} /* cgtql1_ */
1125
1126
1127double hypre_LINPACKcgpthy(double *a,double *b)
1128{
1129 /* System generated locals */
1130 double ret_val,d__1,d__2,d__3;
1131
1132 /* Local variables */
1133 double p,r,s,t,u;
1134
1135
1136/* FINDS DSQRT(A**2+B**2) WITHOUT OVERFLOW OR DESTRUCTIVE UNDERFLOW */
1137
1138
1139/* Computing MAX */
1140 d__1 = fabs(*a),d__2 = fabs(*b);
1141 p = hypre_max(d__1,d__2);
1142 if (!p) {
1143 goto L20;
1144 }
1145/* Computing MIN */
1146 d__2 = fabs(*a),d__3 = fabs(*b);
1147/* Computing 2nd power */
1148 d__1 = hypre_min(d__2,d__3) / p;
1149 r = d__1 * d__1;
1150L10:
1151 t = r + 4.;
1152 if (t == 4.) {
1153 goto L20;
1154 }
1155 s = r / t;
1156 u = s * 2. + 1.;
1157 p = u * p;
1158/* Computing 2nd power */
1159 d__1 = s / u;
1160 r = d__1 * d__1 * r;
1161 goto L10;
1162L20:
1163 ret_val = p;
1164
1165 return ret_val;
1166} /* cgpthy_ */
1167
1168
1169#if 0
1170
1171int hypre_ParCSRRelax_Cheby2(hypre_ParCSRMatrix *A, /* matrix to relax with */
1172 hypre_ParVector *f, /* right-hand side */
1173 double max_eig, /* u.b = max. e-val est.*1.1 */
1174 double eig_ratio, /* l.b = max_eig/eig ratio */
1175 int order, /* polynomial order */
1176 hypre_ParVector *u, /* initial/updated approximation */
1177 hypre_ParVector *v /* temporary vector */,
1178 hypre_ParVector *v2 /*another temp vector */ )
1179{
1180
1181 /* See Saad "Iterative Methods for Sparse Systems", Alg. 12.1 */
1182 /* r_m = Tm(r_0) - plus we scale residual by SCALE = (1-A/u.b.) */
1183
1184 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1185 double *A_diag_data = hypre_CSRMatrixData(A_diag);
1186 int *A_diag_i = hypre_CSRMatrixI(A_diag);
1187
1188 double *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
1189 double *f_data = hypre_VectorData(hypre_ParVectorLocalVector(f));
1190 double *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));
1191
1192 double *dk = hypre_VectorData(hypre_ParVectorLocalVector(v2));
1193
1194 double theta, delta, sigma;
1195 double p_k, p_kp1, temp1, temp2, diag, scale;
1196 double zero = 0.0;
1197 double upper_bound, lower_bound;
1198
1199 int i, j;
1200 int num_rows = hypre_CSRMatrixNumRows(A_diag);
1201
1202 hypre_ParVector *Ztemp;
1203
1204
1205 Ztemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
1206 hypre_ParCSRMatrixGlobalNumRows(A),
1207 hypre_ParCSRMatrixRowStarts(A));
1208 hypre_ParVectorInitialize(Ztemp);
1209 hypre_ParVectorSetPartitioningOwner(Ztemp,0);
1210
1211 /* make sure we are large enough - Adams et al. 2003 */
1212 upper_bound = max_eig * 1.1;
1213 lower_bound = max_eig/eig_ratio;
1214
1215
1216 /* parameters */
1217 theta = (upper_bound + lower_bound)/2;
1218 delta = (upper_bound - lower_bound)/2;
1219 sigma = theta/delta;
1220
1221 /* set v = f */
1222 hypre_ParVectorCopy(f, v);
1223
1224 /* get residual: v = f-A*u */
1225 hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, v);
1226
1227 /* p_0*/
1228 p_k = 1/sigma;
1229
1230 /*first order */
1231 temp1 = 1/theta;
1232
1233 /*d_0* = 1/theta * SCALE*r_0 */
1234 /* x_1 = x_0 + d_0 */
1235
1236 /* NEW PART*/
1237 /* z = A*v */
1238 hypre_ParCSRMatrixMatvec(1.0, A, v, 0.0, Ztemp);
1239 /* v = v - Ztemp/u.b. */
1240 scale = -1.0/upper_bound;
1241 hypre_ParVectorAxpy(scale, Ztemp, v);
1242 /* END NEW */
1243
1244#ifdef HYPRE_USING_OPENMP
1245#pragma omp parallel for private(i,diag,scale) schedule(static)
1246#endif
1247 for (i = 0; i < num_rows; i++)
1248 {
1249 diag = 1;
1250 scale = temp1/diag;
1251 dk[i] = scale*v_data[i];
1252 u_data[i] += dk[i];
1253
1254 }
1255
1256 /* higher order */
1257 for (j = 1; j < order; j++)
1258 {
1259 /* get residual: v = f-A*u */
1260 hypre_ParVectorCopy(f, v);
1261 hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, v);
1262
1263 p_kp1 = 1.0/(2.0*sigma - p_k);
1264 temp1 = p_kp1*p_k;
1265 temp2 = 2.0*p_kp1/delta;
1266
1267 /* NEW PART*/
1268 /* still do jacobi */
1269
1270 /* z = A*v */
1271 hypre_ParCSRMatrixMatvec(1.0, A, v, 0.0, Ztemp);
1272 /* v = v - Ztemp/u.b. */
1273 scale = -1.0/upper_bound;
1274 hypre_ParVectorAxpy(scale, Ztemp, v);
1275 /* END NEW */
1276
1277
1278#ifdef HYPRE_USING_OPENMP
1279#pragma omp parallel for private(i,diag,scale) schedule(static)
1280#endif
1281 for (i = 0; i < num_rows; i++)
1282 {
1283 diag = 1;
1284 scale = temp2/diag;
1285 dk[i] = temp1*dk[i] + scale*v_data[i];
1286 u_data[i] += dk[i];
1287 }
1288 p_k = p_kp1;
1289 }
1290
1291
1292 hypre_ParVectorDestroy(Ztemp);
1293
1294 return hypre_error_flag;
1295
1296
1297}
1298#endif
1299
1300/*------------------------------------------------------------------------
1301
1302 theta = a_ii /sum off_d((a_ij))
1303 we want the min.
1304
1305 *--------------------------------------------------------------------------*/
1306int hypre_ParCSRComputeTheta(hypre_ParCSRMatrix *A,
1307 double *theta_est)
1308
1309{
1310 int i, j;
1311 int num_rows = hypre_ParCSRMatrixNumRows(A);
1312
1313 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1314 int *A_diag_I = hypre_CSRMatrixI(A_diag);
1315 int *A_diag_J = hypre_CSRMatrixJ(A_diag);
1316 double *A_diag_data = hypre_CSRMatrixData(A_diag);
1317
1318 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1319 int *A_offd_I = hypre_CSRMatrixI(A_offd);
1320 int *A_offd_J = hypre_CSRMatrixJ(A_offd);
1321 double *A_offd_data = hypre_CSRMatrixData(A_offd);
1322 int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1323
1324 double diag, offd_sum;
1325 double theta, ratio;
1326 int min_row = 0;
1327
1328 int my_id;
1329 MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
1330
1331 theta = 1e9;
1332
1333
1334 for (i = 0; i < num_rows; i++)
1335 {
1336
1337 /* get the diag element of the ith row */
1338 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1339 {
1340 if (A_diag_J[j] == i)
1341 {
1342 diag = A_diag_data[j];
1343 /* break; */
1344 }
1345 else
1346 {
1347 if (A_diag_data[j] > 0.0)
1348 {
1349 printf("MYID = %d, row = %d, DIAG_col = %d, val = %g \n", my_id, i, A_diag_J[j], A_diag_data[j]);
1350 }
1351 }
1352
1353 }
1354
1355 /* get the offd part of the ith row */
1356 offd_sum = 0.0;
1357 if (num_cols_offd )
1358 {
1359 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1360 {
1361 offd_sum += fabs(A_offd_data[j]);
1362 if (A_offd_data[j] > 0.0)
1363 {
1364 printf("MYID = %d, row = %d, OFFD_col = %d, val = %g \n", my_id, i, A_offd_J[j], A_offd_data[j]);
1365 }
1366
1367
1368 }
1369
1370 }
1371 if (offd_sum > 0.0)
1372 {
1373 ratio = diag/offd_sum;
1374 theta = hypre_min(theta, ratio);
1375
1376 if (theta == ratio)
1377 min_row = i;
1378 }
1379
1380 }
1381
1382 printf("MYID = %d, Min Row = %d\n",my_id, min_row);
1383
1384
1385
1386 *theta_est = theta;
1387
1388 return hypre_error_flag;
1389
1390
1391}
1392
1393
1394/*--------------------------------------------------------------------------
1395 * hypre_ParCSRComputeL1Norms Threads
1396 *
1397 * Compute the l1 norms of the rows of a given matrix, depending on
1398 * the option parameter:
1399 *
1400 * option 1 = Compute the l1 norm of the rows
1401 * option 2 = Compute the l1 norm of the (processor) off-diagonal
1402 * part of the rows plus the diagonal of A
1403 * option 3 = Compute the l2 norm^2 of the rows
1404 * option 4 = Truncated version of option 2 based on Remark 6.2 in "Multigrid
1405 * Smoothers for Ultra-Parallel Computing" (with or without CF)
1406 *--------------------------------------------------------------------------*/
1407
1408int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A,
1409 int option,
1410 int *cf_marker,
1411 double **l1_norm_ptr)
1412{
1413 int i, j;
1414 int num_rows = hypre_ParCSRMatrixNumRows(A);
1415
1416 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1417 int *A_diag_I = hypre_CSRMatrixI(A_diag);
1418 int *A_diag_J = hypre_CSRMatrixJ(A_diag);
1419 double *A_diag_data = hypre_CSRMatrixData(A_diag);
1420
1421 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1422 int *A_offd_I = hypre_CSRMatrixI(A_offd);
1423 int *A_offd_J = hypre_CSRMatrixJ(A_offd);
1424 double *A_offd_data = hypre_CSRMatrixData(A_offd);
1425 int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1426
1427 double *l1_norm = hypre_CTAlloc(double, num_rows);
1428
1429 int *cf_marker_offd = NULL;
1430
1431 int cf_diag;
1432 double diag;
1433
1434
1435 if (cf_marker != NULL)
1436 {
1437 /*-------------------------------------------------------------------
1438 * Get the CF_marker data for the off-processor columns of A
1439 *-------------------------------------------------------------------*/
1440 int index;
1441 int num_sends;
1442 int start;
1443 int *int_buf_data = NULL;
1444
1445 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1446 hypre_ParCSRCommHandle *comm_handle;
1447
1448 if (num_cols_offd)
1449 cf_marker_offd = hypre_CTAlloc(int, num_cols_offd);
1450
1451 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1452
1453 if (hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends))
1454 int_buf_data = hypre_CTAlloc(int,
1455 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1456 index = 0;
1457
1458 for (i = 0; i < num_sends; i++)
1459 {
1460 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1461 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1462 {
1463 int_buf_data[index++] = cf_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1464 }
1465
1466 }
1467 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
1468 cf_marker_offd);
1469
1470 hypre_ParCSRCommHandleDestroy(comm_handle);
1471
1472 hypre_TFree(int_buf_data);
1473 }
1474
1475 if (option == 1)
1476 {
1477 for (i = 0; i < num_rows; i++)
1478 {
1479 l1_norm[i] = 0.0;
1480 if (cf_marker == NULL)
1481 {
1482 /* Add the l1 norm of the diag part of the ith row */
1483 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1484 l1_norm[i] += fabs(A_diag_data[j]);
1485 /* Add the l1 norm of the offd part of the ith row */
1486 if (num_cols_offd)
1487 {
1488 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1489 l1_norm[i] += fabs(A_offd_data[j]);
1490 }
1491 }
1492 else
1493 {
1494 cf_diag = cf_marker[i];
1495 /* Add the CF l1 norm of the diag part of the ith row */
1496 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1497 if (cf_diag == cf_marker[A_diag_J[j]])
1498 l1_norm[i] += fabs(A_diag_data[j]);
1499 /* Add the CF l1 norm of the offd part of the ith row */
1500 if (num_cols_offd)
1501 {
1502 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1503 if (cf_diag == cf_marker_offd[A_offd_J[j]])
1504 l1_norm[i] += fabs(A_offd_data[j]);
1505 }
1506 }
1507 }
1508 }
1509 else if (option == 2)
1510 {
1511 for (i = 0; i < num_rows; i++)
1512 {
1513 /* Add the diag element of the ith row */
1514 l1_norm[i] = fabs(A_diag_data[A_diag_I[i]]);
1515 if (cf_marker == NULL)
1516 {
1517 /* Add the l1 norm of the offd part of the ith row */
1518 if (num_cols_offd)
1519 {
1520 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1521 l1_norm[i] += fabs(A_offd_data[j]);
1522 }
1523 }
1524 else
1525 {
1526 cf_diag = cf_marker[i];
1527 /* Add the CF l1 norm of the offd part of the ith row */
1528 if (num_cols_offd)
1529 {
1530 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1531 if (cf_diag == cf_marker_offd[A_offd_J[j]])
1532 l1_norm[i] += fabs(A_offd_data[j]);
1533 }
1534 }
1535 }
1536 }
1537 else if (option == 3)
1538 {
1539 for (i = 0; i < num_rows; i++)
1540 {
1541 l1_norm[i] = 0.0;
1542 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1543 l1_norm[i] += A_diag_data[j] * A_diag_data[j];
1544 if (num_cols_offd)
1545 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1546 l1_norm[i] += A_offd_data[j] * A_offd_data[j];
1547 }
1548 }
1549 else if (option == 4)
1550 {
1551 for (i = 0; i < num_rows; i++)
1552 {
1553 /* Add the diag element of the ith row */
1554 diag = l1_norm[i] = fabs(A_diag_data[A_diag_I[i]]);
1555 if (cf_marker == NULL)
1556 {
1557 /* Add the scaled l1 norm of the offd part of the ith row */
1558 if (num_cols_offd)
1559 {
1560 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1561 l1_norm[i] += 0.5*fabs(A_offd_data[j]);
1562 }
1563 }
1564 else
1565 {
1566 cf_diag = cf_marker[i];
1567 /* Add the scaled CF l1 norm of the offd part of the ith row */
1568 if (num_cols_offd)
1569 {
1570 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1571 if (cf_diag == cf_marker_offd[A_offd_J[j]])
1572 l1_norm[i] += 0.5*fabs(A_offd_data[j]);
1573 }
1574 }
1575
1576 /* Truncate according to Remark 6.2 */
1577 if (l1_norm[i] <= 4.0/3.0*diag)
1578 l1_norm[i] = diag;
1579 }
1580 }
1581
1582 /* Handle negative definite matrices */
1583 for (i = 0; i < num_rows; i++)
1584 if (A_diag_data[A_diag_I[i]] < 0)
1585 l1_norm[i] = -l1_norm[i];
1586
1587 for (i = 0; i < num_rows; i++)
1588 if (fabs(l1_norm[i]) < DBL_EPSILON)
1589 {
1590 hypre_error_in_arg(1);
1591 break;
1592 }
1593
1594 hypre_TFree(cf_marker_offd);
1595
1596 *l1_norm_ptr = l1_norm;
1597
1598 return hypre_error_flag;
1599}
1600
1601/*--------------------------------------------------------------------------
1602 * hypre_ParCSRComputeL1Norms Threads
1603 *
1604 * Compute the l1 norms of the rows of a given matrix, depending on
1605 * the option parameter:
1606 *
1607 * option 1 = Compute the l1 norm of the rows
1608 * option 2 = Compute the l1 norm of the (processor) off-diagonal
1609 * part of the rows plus the diagonal of A
1610 * option 3 = Compute the l2 norm^2 of the rows
1611 * option 4 = Truncated version of option 2 based on Remark 6.2 in "Multigrid
1612 * Smoothers for Ultra-Parallel Computing" (with or without CF)
1613 *--------------------------------------------------------------------------*/
1614
1615int hypre_ParCSRComputeL1NormsThreads(hypre_ParCSRMatrix *A,
1616 int option,
1617 int num_threads,
1618 int *cf_marker,
1619 double **l1_norm_ptr)
1620{
1621 int i, j, k;
1622 int num_rows = hypre_ParCSRMatrixNumRows(A);
1623
1624 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1625 int *A_diag_I = hypre_CSRMatrixI(A_diag);
1626 int *A_diag_J = hypre_CSRMatrixJ(A_diag);
1627 double *A_diag_data = hypre_CSRMatrixData(A_diag);
1628
1629 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1630 int *A_offd_I = hypre_CSRMatrixI(A_offd);
1631 int *A_offd_J = hypre_CSRMatrixJ(A_offd);
1632 double *A_offd_data = hypre_CSRMatrixData(A_offd);
1633 int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1634
1635 double *l1_norm = hypre_CTAlloc(double, num_rows);
1636 int ii, ns, ne, rest, size;
1637 double res;
1638
1639 int *cf_marker_offd = NULL;
1640
1641 int cf_diag;
1642 double diag;
1643
1644
1645 if (cf_marker != NULL)
1646 {
1647 /*-------------------------------------------------------------------
1648 * Get the CF_marker data for the off-processor columns of A
1649 *-------------------------------------------------------------------*/
1650 int index;
1651 int num_sends;
1652 int start;
1653 int *int_buf_data = NULL;
1654
1655 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1656 hypre_ParCSRCommHandle *comm_handle;
1657
1658 if (num_cols_offd)
1659 cf_marker_offd = hypre_CTAlloc(int, num_cols_offd);
1660
1661 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1662
1663 if (hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends))
1664 int_buf_data = hypre_CTAlloc(int,
1665 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1666 index = 0;
1667
1668 for (i = 0; i < num_sends; i++)
1669 {
1670 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1671 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1672 {
1673 int_buf_data[index++] = cf_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1674 }
1675
1676 }
1677 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
1678 cf_marker_offd);
1679
1680 hypre_ParCSRCommHandleDestroy(comm_handle);
1681
1682 hypre_TFree(int_buf_data);
1683 }
1684
1685
1686#define HYPRE_SMP_PRIVATE i,ii,j,k,ns,ne,res,rest,size,cf_diag,diag
1687#include "../utilities/hypre_smp_forloop.h"
1688 for (k = 0; k < num_threads; k++)
1689 {
1690 size = num_rows/num_threads;
1691 rest = num_rows - size*num_threads;
1692 if (k < rest)
1693 {
1694 ns = k*size+k;
1695 ne = (k+1)*size+k+1;
1696 }
1697 else
1698 {
1699 ns = k*size+rest;
1700 ne = (k+1)*size+rest;
1701 }
1702 if (option == 1)
1703 {
1704 for (i = ns; i < ne; i++)
1705 {
1706 l1_norm[i] = 0.0;
1707 if (cf_marker == NULL)
1708 {
1709 /* Add the l1 norm of the diag part of the ith row */
1710 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1711 l1_norm[i] += fabs(A_diag_data[j]);
1712 /* Add the l1 norm of the offd part of the ith row */
1713 if (num_cols_offd)
1714 {
1715 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1716 l1_norm[i] += fabs(A_offd_data[j]);
1717 }
1718 }
1719 else
1720 {
1721 cf_diag = cf_marker[i];
1722 /* Add the CF l1 norm of the diag part of the ith row */
1723 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1724 if (cf_diag == cf_marker[A_diag_J[j]])
1725 l1_norm[i] += fabs(A_diag_data[j]);
1726 /* Add the CF l1 norm of the offd part of the ith row */
1727 if (num_cols_offd)
1728 {
1729 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1730 if (cf_diag == cf_marker_offd[A_offd_J[j]])
1731 l1_norm[i] += fabs(A_offd_data[j]);
1732 }
1733 }
1734 }
1735 }
1736 else if (option == 2)
1737 {
1738 for (i = ns; i < ne; i++)
1739 {
1740 l1_norm[i] = 0.0;
1741 if (cf_marker == NULL)
1742 {
1743 /* Add the diagonal and the local off-thread part of the ith row */
1744 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1745 {
1746 ii = A_diag_J[j];
1747 if (ii == i || ii < ns || ii >= ne)
1748 l1_norm[i] += fabs(A_diag_data[j]);
1749 }
1750 /* Add the l1 norm of the offd part of the ith row */
1751 if (num_cols_offd)
1752 {
1753 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1754 l1_norm[i] += fabs(A_offd_data[j]);
1755 }
1756 }
1757 else
1758 {
1759 cf_diag = cf_marker[i];
1760 /* Add the diagonal and the local off-thread part of the ith row */
1761 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1762 {
1763 ii = A_diag_J[j];
1764 if ((ii == i || ii < ns || ii >= ne) &&
1765 (cf_diag == cf_marker[A_diag_J[j]]))
1766 l1_norm[i] += fabs(A_diag_data[j]);
1767 }
1768 /* Add the CF l1 norm of the offd part of the ith row */
1769 if (num_cols_offd)
1770 {
1771 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1772 if (cf_diag == cf_marker_offd[A_offd_J[j]])
1773 l1_norm[i] += fabs(A_offd_data[j]);
1774 }
1775 }
1776 }
1777 }
1778 else if (option == 3)
1779 {
1780 for (i = ns; i < ne; i++)
1781 {
1782 l1_norm[i] = 0.0;
1783 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1784 l1_norm[i] += A_diag_data[j] * A_diag_data[j];
1785 if (num_cols_offd)
1786 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1787 l1_norm[i] += A_offd_data[j] * A_offd_data[j];
1788 }
1789 }
1790 else if (option == 4)
1791 {
1792 for (i = ns; i < ne; i++)
1793 {
1794 l1_norm[i] = 0.0;
1795 if (cf_marker == NULL)
1796 {
1797 /* Add the diagonal and the local off-thread part of the ith row */
1798 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1799 {
1800 ii = A_diag_J[j];
1801 if (ii == i || ii < ns || ii >= ne)
1802 {
1803 if (ii == i)
1804 {
1805 diag = fabs(A_diag_data[j]);
1806 l1_norm[i] += fabs(A_diag_data[j]);
1807 }
1808 else
1809 l1_norm[i] += 0.5*fabs(A_diag_data[j]);
1810 }
1811 }
1812 /* Add the l1 norm of the offd part of the ith row */
1813 if (num_cols_offd)
1814 {
1815 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1816 l1_norm[i] += 0.5*fabs(A_offd_data[j]);
1817 }
1818 }
1819 else
1820 {
1821 cf_diag = cf_marker[i];
1822 /* Add the diagonal and the local off-thread part of the ith row */
1823 for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1824 {
1825 ii = A_diag_J[j];
1826 if ((ii == i || ii < ns || ii >= ne) &&
1827 (cf_diag == cf_marker[A_diag_J[j]]))
1828 {
1829 if (ii == i)
1830 {
1831 diag = fabs(A_diag_data[j]);
1832 l1_norm[i] += fabs(A_diag_data[j]);
1833 }
1834 else
1835 l1_norm[i] += 0.5*fabs(A_diag_data[j]);
1836 }
1837 }
1838 /* Add the CF l1 norm of the offd part of the ith row */
1839 if (num_cols_offd)
1840 {
1841 for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1842 if (cf_diag == cf_marker_offd[A_offd_J[j]])
1843 l1_norm[i] += 0.5*fabs(A_offd_data[j]);
1844 }
1845 }
1846
1847 /* Truncate according to Remark 6.2 */
1848 if (l1_norm[i] <= 4.0/3.0*diag)
1849 l1_norm[i] = diag;
1850 }
1851 }
1852
1853 /* Handle negative definite matrices */
1854 for (i = ns; i < ne; i++)
1855 if (A_diag_data[A_diag_I[i]] < 0)
1856 l1_norm[i] = -l1_norm[i];
1857
1858 for (i = ns; i < ne; i++)
1859 if (fabs(l1_norm[i]) < DBL_EPSILON)
1860 {
1861 hypre_error_in_arg(1);
1862 break;
1863 }
1864 }
1865
1866 hypre_TFree(cf_marker_offd);
1867
1868 *l1_norm_ptr = l1_norm;
1869
1870 return hypre_error_flag;
1871}
1872
1873
1874/*--------------------------------------------------------------------------
1875 * hypre_ParCSRRelax_L1 (Symm GS / SSOR)
1876 *--------------------------------------------------------------------------*/
1877
1878int hypre_ParCSRRelax_L1( hypre_ParCSRMatrix *A,
1879 hypre_ParVector *f,
1880 double relax_weight,
1881 double omega,
1882 double *l1_norms,
1883 hypre_ParVector *u,
1884 hypre_ParVector *Vtemp,
1885 hypre_ParVector *Ztemp)
1886{
1887 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1888 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1889 double *A_diag_data = hypre_CSRMatrixData(A_diag);
1890 int *A_diag_i = hypre_CSRMatrixI(A_diag);
1891 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
1892 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1893 int *A_offd_i = hypre_CSRMatrixI(A_offd);
1894 double *A_offd_data = hypre_CSRMatrixData(A_offd);
1895 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
1896 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1897 hypre_ParCSRCommHandle *comm_handle;
1898
1899 int n = hypre_CSRMatrixNumRows(A_diag);
1900 int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1901
1902 hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
1903 double *u_data = hypre_VectorData(u_local);
1904
1905 hypre_Vector *f_local = hypre_ParVectorLocalVector(f);
1906 double *f_data = hypre_VectorData(f_local);
1907
1908 hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
1909 double *Vtemp_data = hypre_VectorData(Vtemp_local);
1910 double *Vext_data;
1911 double *v_buf_data;
1912 double *tmp_data;
1913
1914 int i, j;
1915 int ii, jj;
1916 int ns, ne, size, rest;
1917 int relax_error = 0;
1918 int num_sends;
1919 int index, start;
1920 int num_procs, num_threads, my_id ;
1921
1922 double zero = 0.0;
1923 double res, res2;
1924
1925 hypre_Vector *Ztemp_local;
1926 double *Ztemp_data;
1927
1928
1929 MPI_Comm_size(comm,&num_procs);
1930 MPI_Comm_rank(comm,&my_id);
1931 num_threads = hypre_NumThreads();
1932 /*-----------------------------------------------------------------
1933 * Copy current approximation into temporary vector.
1934 *-----------------------------------------------------------------*/
1935 if (num_procs > 1)
1936 {
1937 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1938
1939 v_buf_data = hypre_CTAlloc(double,
1940 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1941
1942 Vext_data = hypre_CTAlloc(double,num_cols_offd);
1943
1944 if (num_cols_offd)
1945 {
1946 A_offd_j = hypre_CSRMatrixJ(A_offd);
1947 A_offd_data = hypre_CSRMatrixData(A_offd);
1948 }
1949
1950 index = 0;
1951 for (i = 0; i < num_sends; i++)
1952 {
1953 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1954 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
1955 v_buf_data[index++]
1956 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1957 }
1958
1959 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
1960 Vext_data);
1961
1962 /*-----------------------------------------------------------------
1963 * Copy current approximation into temporary vector.
1964 *-----------------------------------------------------------------*/
1965 hypre_ParCSRCommHandleDestroy(comm_handle);
1966 comm_handle = NULL;
1967 }
1968
1969 /*-----------------------------------------------------------------
1970 * Relax all points.
1971 *-----------------------------------------------------------------*/
1972
1973 Ztemp_local = hypre_ParVectorLocalVector(Ztemp);
1974 Ztemp_data = hypre_VectorData(Ztemp_local);
1975
1976
1977 if (relax_weight == 1 && omega == 1)
1978 {
1979 /*tmp_data = hypre_CTAlloc(double,n);*/
1980 tmp_data = Ztemp_data;
1981#define HYPRE_SMP_PRIVATE i
1982#include "../utilities/hypre_smp_forloop.h"
1983 for (i = 0; i < n; i++)
1984 tmp_data[i] = u_data[i];
1985#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
1986#include "../utilities/hypre_smp_forloop.h"
1987 for (j = 0; j < num_threads; j++)
1988 {
1989 size = n/num_threads;
1990 rest = n - size*num_threads;
1991 if (j < rest)
1992 {
1993 ns = j*size+j;
1994 ne = (j+1)*size+j+1;
1995 }
1996 else
1997 {
1998 ns = j*size+rest;
1999 ne = (j+1)*size+rest;
2000 }
2001 for (i = ns; i < ne; i++) /* interior points first */
2002 {
2003
2004 /*-----------------------------------------------------------
2005 * If diagonal is nonzero, relax point i; otherwise, skip it.
2006 *-----------------------------------------------------------*/
2007
2008 if ( A_diag_data[A_diag_i[i]] != zero)
2009 {
2010 res = f_data[i];
2011 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2012 {
2013 ii = A_diag_j[jj];
2014 if (ii >= ns && ii < ne)
2015 {
2016 res -= A_diag_data[jj] * u_data[ii];
2017 }
2018 else
2019 res -= A_diag_data[jj] * tmp_data[ii];
2020 }
2021 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2022 {
2023 ii = A_offd_j[jj];
2024 res -= A_offd_data[jj] * Vext_data[ii];
2025 }
2026 u_data[i] += res / l1_norms[i];
2027 }
2028 }
2029 for (i = ne-1; i > ns-1; i--) /* interior points first */
2030 {
2031
2032 /*-----------------------------------------------------------
2033 * If diagonal is nonzero, relax point i; otherwise, skip it.
2034 *-----------------------------------------------------------*/
2035
2036 if ( A_diag_data[A_diag_i[i]] != zero)
2037 {
2038 res = f_data[i];
2039 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2040 {
2041 ii = A_diag_j[jj];
2042 if (ii >= ns && ii < ne)
2043 {
2044 res -= A_diag_data[jj] * u_data[ii];
2045 }
2046 else
2047 res -= A_diag_data[jj] * tmp_data[ii];
2048 }
2049 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2050 {
2051 ii = A_offd_j[jj];
2052 res -= A_offd_data[jj] * Vext_data[ii];
2053 }
2054 u_data[i] += res / l1_norms[i];
2055 }
2056 }
2057 }
2058 }
2059 else
2060 {
2061 double c1 = omega*relax_weight;
2062 double c2 = omega*(1.0-relax_weight);
2063 /* tmp_data = hypre_CTAlloc(double,n); */
2064 tmp_data = Ztemp_data;
2065#define HYPRE_SMP_PRIVATE i
2066#include "../utilities/hypre_smp_forloop.h"
2067 for (i = 0; i < n; i++)
2068 {
2069 tmp_data[i] = u_data[i];
2070 }
2071#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2072#include "../utilities/hypre_smp_forloop.h"
2073 for (j = 0; j < num_threads; j++)
2074 {
2075 size = n/num_threads;
2076 rest = n - size*num_threads;
2077 if (j < rest)
2078 {
2079 ns = j*size+j;
2080 ne = (j+1)*size+j+1;
2081 }
2082 else
2083 {
2084 ns = j*size+rest;
2085 ne = (j+1)*size+rest;
2086 }
2087 for (i = ns; i < ne; i++) /* interior points first */
2088 {
2089
2090 /*-----------------------------------------------------------
2091 * If diagonal is nonzero, relax point i; otherwise, skip it.
2092 *-----------------------------------------------------------*/
2093
2094 if ( A_diag_data[A_diag_i[i]] != zero)
2095 {
2096 res2 = 0.0;
2097 res = f_data[i];
2098 Vtemp_data[i] = u_data[i];
2099 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2100 {
2101 ii = A_diag_j[jj];
2102 if (ii >= ns && ii < ne)
2103 {
2104 res -= A_diag_data[jj] * u_data[ii];
2105 if (ii < i)
2106 res2 += A_diag_data[jj] * (Vtemp_data[ii] - u_data[ii]);
2107 }
2108 else
2109 res -= A_diag_data[jj] * tmp_data[ii];
2110 }
2111 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2112 {
2113 ii = A_offd_j[jj];
2114 res -= A_offd_data[jj] * Vext_data[ii];
2115 }
2116 u_data[i] += (c1*res + c2*res2) / l1_norms[i];
2117 }
2118 }
2119 for (i = ne-1; i > ns-1; i--) /* interior points first */
2120 {
2121
2122 /*-----------------------------------------------------------
2123 * If diagonal is nonzero, relax point i; otherwise, skip it.
2124 *-----------------------------------------------------------*/
2125
2126 if ( A_diag_data[A_diag_i[i]] != zero)
2127 {
2128 res2 = 0.0;
2129 res = f_data[i];
2130 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2131 {
2132 ii = A_diag_j[jj];
2133 if (ii >= ns && ii < ne)
2134 {
2135 res -= A_diag_data[jj] * u_data[ii];
2136 if (ii > i)
2137 res2 += A_diag_data[jj] * (Vtemp_data[ii] - u_data[ii]);
2138 }
2139 else
2140 res -= A_diag_data[jj] * tmp_data[ii];
2141 }
2142 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2143 {
2144 ii = A_offd_j[jj];
2145 res -= A_offd_data[jj] * Vext_data[ii];
2146 }
2147 u_data[i] += (c1*res + c2*res2) / l1_norms[i];
2148 }
2149 }
2150 }
2151 }
2152 if (num_procs > 1)
2153 {
2154 hypre_TFree(Vext_data);
2155 hypre_TFree(v_buf_data);
2156 }
2157
2158 return(relax_error);
2159}
2160
2161/*--------------------------------------------------------------------------
2162 * hypre_ParCSRRelax_L1_GS (GS / SOR) (NOT SYM)
2163 *--------------------------------------------------------------------------*/
2164
2165int hypre_ParCSRRelax_L1_GS( hypre_ParCSRMatrix *A,
2166 hypre_ParVector *f,
2167 double relax_weight,
2168 double omega,
2169 double *l1_norms,
2170 hypre_ParVector *u,
2171 hypre_ParVector *Vtemp,
2172 hypre_ParVector *Ztemp)
2173{
2174 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
2175 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2176 double *A_diag_data = hypre_CSRMatrixData(A_diag);
2177 int *A_diag_i = hypre_CSRMatrixI(A_diag);
2178 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2179 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2180 int *A_offd_i = hypre_CSRMatrixI(A_offd);
2181 double *A_offd_data = hypre_CSRMatrixData(A_offd);
2182 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2183 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2184 hypre_ParCSRCommHandle *comm_handle;
2185
2186 int n = hypre_CSRMatrixNumRows(A_diag);
2187 int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
2188
2189 hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
2190 double *u_data = hypre_VectorData(u_local);
2191
2192 hypre_Vector *f_local = hypre_ParVectorLocalVector(f);
2193 double *f_data = hypre_VectorData(f_local);
2194
2195 hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
2196 double *Vtemp_data = hypre_VectorData(Vtemp_local);
2197 double *Vext_data;
2198 double *v_buf_data;
2199 double *tmp_data;
2200
2201 int i, j;
2202 int ii, jj;
2203 int ns, ne, size, rest;
2204 int relax_error = 0;
2205 int num_sends;
2206 int index, start;
2207 int num_procs, num_threads, my_id ;
2208
2209 double zero = 0.0;
2210 double res, res2;
2211
2212 hypre_Vector *Ztemp_local;
2213 double *Ztemp_data;
2214
2215
2216 MPI_Comm_size(comm,&num_procs);
2217 MPI_Comm_rank(comm,&my_id);
2218 num_threads = hypre_NumThreads();
2219 /*-----------------------------------------------------------------
2220 * Copy current approximation into temporary vector.
2221 *-----------------------------------------------------------------*/
2222 if (num_procs > 1)
2223 {
2224 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2225
2226 v_buf_data = hypre_CTAlloc(double,
2227 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
2228
2229 Vext_data = hypre_CTAlloc(double,num_cols_offd);
2230
2231 if (num_cols_offd)
2232 {
2233 A_offd_j = hypre_CSRMatrixJ(A_offd);
2234 A_offd_data = hypre_CSRMatrixData(A_offd);
2235 }
2236
2237 index = 0;
2238 for (i = 0; i < num_sends; i++)
2239 {
2240 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2241 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
2242 v_buf_data[index++]
2243 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2244 }
2245
2246 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
2247 Vext_data);
2248
2249 /*-----------------------------------------------------------------
2250 * Copy current approximation into temporary vector.
2251 *-----------------------------------------------------------------*/
2252 hypre_ParCSRCommHandleDestroy(comm_handle);
2253 comm_handle = NULL;
2254 }
2255
2256 /*-----------------------------------------------------------------
2257 * Relax all points.
2258 *-----------------------------------------------------------------*/
2259
2260 Ztemp_local = hypre_ParVectorLocalVector(Ztemp);
2261 Ztemp_data = hypre_VectorData(Ztemp_local);
2262
2263
2264 if (relax_weight == 1 && omega == 1)
2265 {
2266 /*tmp_data = hypre_CTAlloc(double,n);*/
2267 tmp_data = Ztemp_data;
2268#define HYPRE_SMP_PRIVATE i
2269#include "../utilities/hypre_smp_forloop.h"
2270 for (i = 0; i < n; i++)
2271 tmp_data[i] = u_data[i];
2272#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2273#include "../utilities/hypre_smp_forloop.h"
2274 for (j = 0; j < num_threads; j++)
2275 {
2276 size = n/num_threads;
2277 rest = n - size*num_threads;
2278 if (j < rest)
2279 {
2280 ns = j*size+j;
2281 ne = (j+1)*size+j+1;
2282 }
2283 else
2284 {
2285 ns = j*size+rest;
2286 ne = (j+1)*size+rest;
2287 }
2288 for (i = ns; i < ne; i++) /* interior points first */
2289 {
2290
2291 /*-----------------------------------------------------------
2292 * If diagonal is nonzero, relax point i; otherwise, skip it.
2293 *-----------------------------------------------------------*/
2294
2295 if ( A_diag_data[A_diag_i[i]] != zero)
2296 {
2297 res = f_data[i];
2298 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2299 {
2300 ii = A_diag_j[jj];
2301 if (ii >= ns && ii < ne)
2302 {
2303 res -= A_diag_data[jj] * u_data[ii];
2304 }
2305 else
2306 res -= A_diag_data[jj] * tmp_data[ii];
2307 }
2308 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2309 {
2310 ii = A_offd_j[jj];
2311 res -= A_offd_data[jj] * Vext_data[ii];
2312 }
2313 u_data[i] += res / l1_norms[i];
2314 }
2315 }
2316 }
2317 }
2318 else
2319 {
2320 double c1 = omega*relax_weight;
2321 double c2 = omega*(1.0-relax_weight);
2322 /* tmp_data = hypre_CTAlloc(double,n); */
2323 tmp_data = Ztemp_data;
2324#define HYPRE_SMP_PRIVATE i
2325#include "../utilities/hypre_smp_forloop.h"
2326 for (i = 0; i < n; i++)
2327 {
2328 tmp_data[i] = u_data[i];
2329 }
2330#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2331#include "../utilities/hypre_smp_forloop.h"
2332 for (j = 0; j < num_threads; j++)
2333 {
2334 size = n/num_threads;
2335 rest = n - size*num_threads;
2336 if (j < rest)
2337 {
2338 ns = j*size+j;
2339 ne = (j+1)*size+j+1;
2340 }
2341 else
2342 {
2343 ns = j*size+rest;
2344 ne = (j+1)*size+rest;
2345 }
2346 for (i = ns; i < ne; i++) /* interior points first */
2347 {
2348
2349 /*-----------------------------------------------------------
2350 * If diagonal is nonzero, relax point i; otherwise, skip it.
2351 *-----------------------------------------------------------*/
2352
2353 if ( A_diag_data[A_diag_i[i]] != zero)
2354 {
2355 res2 = 0.0;
2356 res = f_data[i];
2357 Vtemp_data[i] = u_data[i];
2358 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2359 {
2360 ii = A_diag_j[jj];
2361 if (ii >= ns && ii < ne)
2362 {
2363 res -= A_diag_data[jj] * u_data[ii];
2364 if (ii < i)
2365 res2 += A_diag_data[jj] * (Vtemp_data[ii] - u_data[ii]);
2366 }
2367 else
2368 res -= A_diag_data[jj] * tmp_data[ii];
2369 }
2370 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2371 {
2372 ii = A_offd_j[jj];
2373 res -= A_offd_data[jj] * Vext_data[ii];
2374 }
2375 u_data[i] += (c1*res + c2*res2) / l1_norms[i];
2376 }
2377 }
2378 }
2379 }
2380 if (num_procs > 1)
2381 {
2382 hypre_TFree(Vext_data);
2383 hypre_TFree(v_buf_data);
2384 }
2385
2386 return(relax_error);
2387}
2388
2389/*--------------------------------------------------------------------------
2390 * hypre_ParCSRRelax_L1_Jacobi (allows CF)
2391
2392
2393 u += w D^{-1}(f - A u), where D_ii = ||A(i,:)||_1
2394 *--------------------------------------------------------------------------*/
2395
2396int hypre_ParCSRRelax_L1_Jacobi( hypre_ParCSRMatrix *A,
2397 hypre_ParVector *f,
2398 int *cf_marker,
2399 int relax_points,
2400 double relax_weight,
2401 double *l1_norms,
2402 hypre_ParVector *u,
2403 hypre_ParVector *Vtemp )
2404
2405{
2406
2407
2408 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
2409 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2410 double *A_diag_data = hypre_CSRMatrixData(A_diag);
2411 int *A_diag_i = hypre_CSRMatrixI(A_diag);
2412 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2413 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2414 int *A_offd_i = hypre_CSRMatrixI(A_offd);
2415 double *A_offd_data = hypre_CSRMatrixData(A_offd);
2416 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2417 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2418 hypre_ParCSRCommHandle *comm_handle;
2419
2420 int n = hypre_CSRMatrixNumRows(A_diag);
2421 int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
2422
2423 hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
2424 double *u_data = hypre_VectorData(u_local);
2425
2426 hypre_Vector *f_local = hypre_ParVectorLocalVector(f);
2427 double *f_data = hypre_VectorData(f_local);
2428
2429 hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
2430 double *Vtemp_data = hypre_VectorData(Vtemp_local);
2431 double *Vext_data;
2432 double *v_buf_data;
2433
2434 int i, j;
2435 int ii, jj;
2436 int num_sends;
2437 int index, start;
2438 int num_procs, my_id ;
2439
2440 double zero = 0.0;
2441 double res;
2442
2443
2444 MPI_Comm_size(comm,&num_procs);
2445 MPI_Comm_rank(comm,&my_id);
2446
2447 if (num_procs > 1)
2448 {
2449 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2450
2451 v_buf_data = hypre_CTAlloc(double,
2452 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
2453
2454 Vext_data = hypre_CTAlloc(double,num_cols_offd);
2455
2456 if (num_cols_offd)
2457 {
2458 A_offd_j = hypre_CSRMatrixJ(A_offd);
2459 A_offd_data = hypre_CSRMatrixData(A_offd);
2460 }
2461
2462 index = 0;
2463 for (i = 0; i < num_sends; i++)
2464 {
2465 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2466 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
2467 v_buf_data[index++]
2468 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2469 }
2470
2471 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
2472 Vext_data);
2473 }
2474
2475 /*-----------------------------------------------------------------
2476 * Copy current approximation into temporary vector.
2477 *-----------------------------------------------------------------*/
2478
2479#define HYPRE_SMP_PRIVATE i
2480#include "../utilities/hypre_smp_forloop.h"
2481 for (i = 0; i < n; i++)
2482 {
2483 Vtemp_data[i] = u_data[i];
2484 }
2485
2486 if (num_procs > 1)
2487 {
2488 hypre_ParCSRCommHandleDestroy(comm_handle);
2489 comm_handle = NULL;
2490 }
2491
2492 /*-----------------------------------------------------------------
2493 * Relax all points.
2494 *-----------------------------------------------------------------*/
2495
2496 if (relax_points == 0 || cf_marker == NULL)
2497 {
2498#define HYPRE_SMP_PRIVATE i,ii,jj,res
2499#include "../utilities/hypre_smp_forloop.h"
2500 for (i = 0; i < n; i++)
2501 {
2502
2503 /*-----------------------------------------------------------
2504 * If diagonal is nonzero, relax point i; otherwise, skip it.
2505 *-----------------------------------------------------------*/
2506 if (A_diag_data[A_diag_i[i]] != zero)
2507 {
2508 res = f_data[i];
2509 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2510 {
2511 ii = A_diag_j[jj];
2512 res -= A_diag_data[jj] * Vtemp_data[ii];
2513 }
2514 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2515 {
2516 ii = A_offd_j[jj];
2517 res -= A_offd_data[jj] * Vext_data[ii];
2518 }
2519 u_data[i] += (relax_weight*res)/l1_norms[i];
2520 }
2521 }
2522 }
2523
2524 /*-----------------------------------------------------------------
2525 * Relax only C or F points as determined by relax_points.
2526 *-----------------------------------------------------------------*/
2527 else
2528 {
2529#define HYPRE_SMP_PRIVATE i,ii,jj,res
2530#include "../utilities/hypre_smp_forloop.h"
2531 for (i = 0; i < n; i++)
2532 {
2533
2534 /*-----------------------------------------------------------
2535 * If i is of the right type ( C or F ) and diagonal is
2536 * nonzero, relax point i; otherwise, skip it.
2537 *-----------------------------------------------------------*/
2538
2539 if (cf_marker[i] == relax_points
2540 && A_diag_data[A_diag_i[i]] != zero)
2541 {
2542 res = f_data[i];
2543 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2544 {
2545 ii = A_diag_j[jj];
2546 res -= A_diag_data[jj] * Vtemp_data[ii];
2547 }
2548 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2549 {
2550 ii = A_offd_j[jj];
2551 res -= A_offd_data[jj] * Vext_data[ii];
2552 }
2553 u_data[i] += (relax_weight * res)/l1_norms[i];
2554 }
2555 }
2556 }
2557 if (num_procs > 1)
2558 {
2559 hypre_TFree(Vext_data);
2560 hypre_TFree(v_buf_data);
2561 }
2562
2563 return 0;
2564
2565}
2566
2567
Note: See TracBrowser for help on using the repository browser.