source: CIVL/examples/mpi-omp/AMG2013/parcsr_ls/par_relax.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 109.6 KB
Line 
1/*BHEADER**********************************************************************
2 * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3 * Produced at the Lawrence Livermore National Laboratory.
4 * This file is part of HYPRE. See file COPYRIGHT for details.
5 *
6 * HYPRE is free software; you can redistribute it and/or modify it under the
7 * terms of the GNU Lesser General Public License (as published by the Free
8 * Software Foundation) version 2.1 dated February 1999.
9 *
10 * $Revision: 2.4 $
11 ***********************************************************************EHEADER*/
12
13
14
15
16/******************************************************************************
17 *
18 * Relaxation scheme
19 *
20 *****************************************************************************/
21
22#include "headers.h"
23
24
25/*--------------------------------------------------------------------------
26 * hypre_BoomerAMGRelax
27 *--------------------------------------------------------------------------*/
28
29int hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
30 hypre_ParVector *f,
31 int *cf_marker,
32 int relax_type,
33 int relax_points,
34 double relax_weight,
35 double omega,
36 double *l1_norms,
37 hypre_ParVector *u,
38 hypre_ParVector *Vtemp,
39 hypre_ParVector *Ztemp )
40{
41 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
42 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
43 double *A_diag_data = hypre_CSRMatrixData(A_diag);
44 int *A_diag_i = hypre_CSRMatrixI(A_diag);
45 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
46 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
47 int *A_offd_i = hypre_CSRMatrixI(A_offd);
48 double *A_offd_data = hypre_CSRMatrixData(A_offd);
49 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
50 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
51 hypre_ParCSRCommHandle *comm_handle;
52
53 HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
54 int n = hypre_CSRMatrixNumRows(A_diag);
55 int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
56 HYPRE_BigInt first_index = hypre_ParVectorFirstIndex(u);
57
58 hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
59 double *u_data = hypre_VectorData(u_local);
60
61
62 hypre_Vector *f_local = hypre_ParVectorLocalVector(f);
63 double *f_data = hypre_VectorData(f_local);
64
65 hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
66 double *Vtemp_data = hypre_VectorData(Vtemp_local);
67 double *Vext_data;
68 double *v_buf_data;
69 double *tmp_data;
70
71 hypre_Vector *Ztemp_local;
72 double *Ztemp_data;
73
74
75
76 hypre_CSRMatrix *A_CSR;
77 int *A_CSR_i;
78 int *A_CSR_j;
79 double *A_CSR_data;
80
81 hypre_Vector *f_vector;
82 double *f_vector_data;
83
84 int i, j, jr;
85 int ii, jj, i1;
86 int ns, ne, size, rest;
87 int column;
88 int relax_error = 0;
89 int num_sends;
90 int num_recvs;
91 int index, start;
92 int num_procs, num_threads, my_id, ip, p;
93 int vec_start, vec_len;
94 int n_coarse;
95 int n_start, n_end;
96 MPI_Status *status;
97 MPI_Request *requests;
98
99 double *A_mat;
100 double *b_vec;
101
102 double zero = 0.0;
103 double res, res0, res2;
104 double one_minus_weight;
105 double one_minus_omega;
106 double prod;
107
108 one_minus_weight = 1.0 - relax_weight;
109 one_minus_omega = 1.0 - omega;
110 MPI_Comm_size(comm,&num_procs);
111 MPI_Comm_rank(comm,&my_id);
112 num_threads = hypre_NumThreads();
113 n_start = 0;
114 n_end = n;
115 if (relax_points < -1)
116 {
117 n_start = -relax_points;
118 relax_points = 0;
119 }
120 if (relax_points > 1)
121 {
122 n_end = relax_points;
123 relax_points = 0;
124 }
125 /*-----------------------------------------------------------------------
126 * Switch statement to direct control based on relax_type:
127 * relax_type = 0 -> Jacobi or CF-Jacobi
128
129 * relax_type = 1 -> Gauss-Seidel <--- very slow, sequential
130 * relax_type = 2 -> Gauss_Seidel: interior points in parallel ,
131 * boundary sequential
132 * relax_type = 3 -> hybrid: SOR-J mix off-processor, SOR on-processor
133 * with outer relaxation parameters (forward solve)
134 * relax_type = 4 -> hybrid: SOR-J mix off-processor, SOR on-processor
135 * with outer relaxation parameters (backward solve)
136 * relax_type = 5 -> hybrid: GS-J mix off-processor, chaotic GS on-node
137 * relax_type = 6 -> hybrid: SSOR-J mix off-processor, SSOR on-processor
138 * with outer relaxation parameters
139 * relax_type = 7 -> Jacobi (uses Matvec), only needed in CGNR
140 * relax_type = 9 -> Direct Solve
141
142 * relax_type = 20 -> L1 hybrid GS (new one)
143 *-----------------------------------------------------------------------*/
144 switch (relax_type)
145 {
146 case 0: /* Weighted Jacobi */
147 {
148 if (num_procs > 1)
149 {
150 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
151
152 v_buf_data = hypre_CTAlloc(double,
153 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
154
155 Vext_data = hypre_CTAlloc(double,num_cols_offd);
156
157 if (num_cols_offd)
158 {
159 A_offd_j = hypre_CSRMatrixJ(A_offd);
160 A_offd_data = hypre_CSRMatrixData(A_offd);
161 }
162
163 index = 0;
164 for (i = 0; i < num_sends; i++)
165 {
166 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
167 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
168 v_buf_data[index++]
169 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
170 }
171
172 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
173 Vext_data);
174 }
175 /*-----------------------------------------------------------------
176 * Copy current approximation into temporary vector.
177 *-----------------------------------------------------------------*/
178
179#define HYPRE_SMP_PRIVATE i
180#include "../utilities/hypre_smp_forloop.h"
181 for (i = 0; i < n; i++)
182 {
183 Vtemp_data[i] = u_data[i];
184 }
185 if (num_procs > 1)
186 {
187 hypre_ParCSRCommHandleDestroy(comm_handle);
188 comm_handle = NULL;
189 }
190
191 /*-----------------------------------------------------------------
192 * Relax all points.
193 *-----------------------------------------------------------------*/
194
195 if (relax_points == 0)
196 {
197#define HYPRE_SMP_PRIVATE i,ii,jj,res
198#include "../utilities/hypre_smp_forloop.h"
199 for (i = 0; i < n; i++)
200 {
201
202 /*-----------------------------------------------------------
203 * If diagonal is nonzero, relax point i; otherwise, skip it.
204 *-----------------------------------------------------------*/
205
206 if (A_diag_data[A_diag_i[i]] != zero)
207 {
208 res = f_data[i];
209 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
210 {
211 ii = A_diag_j[jj];
212 res -= A_diag_data[jj] * Vtemp_data[ii];
213 }
214 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
215 {
216 ii = A_offd_j[jj];
217 res -= A_offd_data[jj] * Vext_data[ii];
218 }
219 u_data[i] *= one_minus_weight;
220 u_data[i] += relax_weight * res / A_diag_data[A_diag_i[i]];
221 }
222 }
223 }
224
225 /*-----------------------------------------------------------------
226 * Relax only C or F points as determined by relax_points.
227 *-----------------------------------------------------------------*/
228
229 else
230 {
231#define HYPRE_SMP_PRIVATE i,ii,jj,res
232#include "../utilities/hypre_smp_forloop.h"
233 for (i = 0; i < n; i++)
234 {
235
236 /*-----------------------------------------------------------
237 * If i is of the right type ( C or F ) and diagonal is
238 * nonzero, relax point i; otherwise, skip it.
239 *-----------------------------------------------------------*/
240
241 if (cf_marker[i] == relax_points
242 && A_diag_data[A_diag_i[i]] != zero)
243 {
244 res = f_data[i];
245 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
246 {
247 ii = A_diag_j[jj];
248 res -= A_diag_data[jj] * Vtemp_data[ii];
249 }
250 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
251 {
252 ii = A_offd_j[jj];
253 res -= A_offd_data[jj] * Vext_data[ii];
254 }
255 u_data[i] *= one_minus_weight;
256 u_data[i] += relax_weight * res / A_diag_data[A_diag_i[i]];
257 }
258 }
259 }
260 if (num_procs > 1)
261 {
262 hypre_TFree(Vext_data);
263 hypre_TFree(v_buf_data);
264 }
265 }
266 break;
267
268 case 20: /* hybrid L1 Symm. Gauss-Seidel */
269 {
270
271 if (num_threads > 1)
272 {
273 Ztemp_local = hypre_ParVectorLocalVector(Ztemp);
274 Ztemp_data = hypre_VectorData(Ztemp_local);
275 }
276
277 /*-----------------------------------------------------------------
278 * Copy current approximation into temporary vector.
279 *-----------------------------------------------------------------*/
280 if (num_procs > 1)
281 {
282 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
283
284 v_buf_data = hypre_CTAlloc(double,
285 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
286
287 Vext_data = hypre_CTAlloc(double,num_cols_offd);
288
289 if (num_cols_offd)
290 {
291 A_offd_j = hypre_CSRMatrixJ(A_offd);
292 A_offd_data = hypre_CSRMatrixData(A_offd);
293 }
294
295 index = 0;
296 for (i = 0; i < num_sends; i++)
297 {
298 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
299 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
300 v_buf_data[index++]
301 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
302 }
303
304 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
305 Vext_data);
306
307 /*-----------------------------------------------------------------
308 * Copy current approximation into temporary vector.
309 *-----------------------------------------------------------------*/
310 hypre_ParCSRCommHandleDestroy(comm_handle);
311 comm_handle = NULL;
312 }
313
314 /*-----------------------------------------------------------------
315 * Relax all points.
316 *-----------------------------------------------------------------*/
317
318 if (relax_weight == 1 && omega == 1)
319 {
320 if (relax_points == 0)
321 {
322 if (num_threads > 1)
323 {
324 tmp_data = Ztemp_data;
325#define HYPRE_SMP_PRIVATE i
326#include "../utilities/hypre_smp_forloop.h"
327 for (i = 0; i < n; i++)
328 tmp_data[i] = u_data[i];
329#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
330#include "../utilities/hypre_smp_forloop.h"
331 for (j = 0; j < num_threads; j++)
332 {
333 size = n/num_threads;
334 rest = n - size*num_threads;
335 if (j < rest)
336 {
337 ns = j*size+j;
338 ne = (j+1)*size+j+1;
339 }
340 else
341 {
342 ns = j*size+rest;
343 ne = (j+1)*size+rest;
344 }
345 for (i = ns; i < ne; i++) /* interior points first */
346 {
347
348 /*-----------------------------------------------------------
349 * If diagonal is nonzero, relax point i; otherwise, skip it.
350 *-----------------------------------------------------------*/
351
352 if ( l1_norms[i] != zero)
353 {
354 res = f_data[i];
355 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
356 {
357 ii = A_diag_j[jj];
358 if (ii >= ns && ii < ne)
359 {
360 res -= A_diag_data[jj] * u_data[ii];
361 }
362 else
363 res -= A_diag_data[jj] * tmp_data[ii];
364 }
365 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
366 {
367 ii = A_offd_j[jj];
368 res -= A_offd_data[jj] * Vext_data[ii];
369 }
370 u_data[i] += res / l1_norms[i];
371 }
372 }
373 for (i = ne-1; i > ns-1; i--) /* interior points first */
374 {
375
376 /*-----------------------------------------------------------
377 * If diagonal is nonzero, relax point i; otherwise, skip it.
378 *-----------------------------------------------------------*/
379
380 if ( l1_norms[i] != zero)
381 {
382 res = f_data[i];
383 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
384 {
385 ii = A_diag_j[jj];
386 if (ii >= ns && ii < ne)
387 {
388 res -= A_diag_data[jj] * u_data[ii];
389 }
390 else
391 res -= A_diag_data[jj] * tmp_data[ii];
392 }
393 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
394 {
395 ii = A_offd_j[jj];
396 res -= A_offd_data[jj] * Vext_data[ii];
397 }
398 u_data[i] += res / l1_norms[i];
399 }
400 }
401 }
402
403 }
404 else
405 {
406 for (i = 0; i < n; i++) /* interior points first */
407 {
408
409 /*-----------------------------------------------------------
410 * If diagonal is nonzero, relax point i; otherwise, skip it.
411 *-----------------------------------------------------------*/
412
413 if ( l1_norms[i] != zero)
414 {
415 res = f_data[i];
416 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
417 {
418 ii = A_diag_j[jj];
419 res -= A_diag_data[jj] * u_data[ii];
420 }
421 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
422 {
423 ii = A_offd_j[jj];
424 res -= A_offd_data[jj] * Vext_data[ii];
425 }
426 u_data[i] += res / l1_norms[i];
427 }
428 }
429 for (i = n-1; i > -1; i--) /* interior points first */
430 {
431
432 /*-----------------------------------------------------------
433 * If diagonal is nonzero, relax point i; otherwise, skip it.
434 *-----------------------------------------------------------*/
435
436 if ( l1_norms[i] != zero)
437 {
438 res = f_data[i];
439 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
440 {
441 ii = A_diag_j[jj];
442 res -= A_diag_data[jj] * u_data[ii];
443 }
444 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
445 {
446 ii = A_offd_j[jj];
447 res -= A_offd_data[jj] * Vext_data[ii];
448 }
449 u_data[i] += res / l1_norms[i];
450 }
451 }
452 }
453 }
454
455 /*-----------------------------------------------------------------
456 * Relax only C or F points as determined by relax_points.
457 *-----------------------------------------------------------------*/
458
459 else
460 {
461 if (num_threads > 1)
462 {
463 tmp_data = Ztemp_data;
464#define HYPRE_SMP_PRIVATE i
465#include "../utilities/hypre_smp_forloop.h"
466 for (i = 0; i < n; i++)
467 tmp_data[i] = u_data[i];
468#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
469#include "../utilities/hypre_smp_forloop.h"
470 for (j = 0; j < num_threads; j++)
471 {
472 size = n/num_threads;
473 rest = n - size*num_threads;
474 if (j < rest)
475 {
476 ns = j*size+j;
477 ne = (j+1)*size+j+1;
478 }
479 else
480 {
481 ns = j*size+rest;
482 ne = (j+1)*size+rest;
483 }
484 for (i = ns; i < ne; i++) /* relax interior points */
485 {
486
487 /*-----------------------------------------------------------
488 * If i is of the right type ( C or F ) and diagonal is
489 * nonzero, relax point i; otherwise, skip it.
490 *-----------------------------------------------------------*/
491
492 if (cf_marker[i] == relax_points
493 && l1_norms[i] != zero)
494 {
495 res = f_data[i];
496 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
497 {
498 ii = A_diag_j[jj];
499 if (ii >= ns && ii < ne)
500 {
501 res -= A_diag_data[jj] * u_data[ii];
502 }
503 else
504 res -= A_diag_data[jj] * tmp_data[ii];
505 }
506 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
507 {
508 ii = A_offd_j[jj];
509 res -= A_offd_data[jj] * Vext_data[ii];
510 }
511 u_data[i] += res / l1_norms[i];
512 }
513 }
514 for (i = ne-1; i > ns-1; i--) /* relax interior points */
515 {
516
517 /*-----------------------------------------------------------
518 * If i is of the right type ( C or F ) and diagonal is
519 * nonzero, relax point i; otherwise, skip it.
520 *-----------------------------------------------------------*/
521
522 if (cf_marker[i] == relax_points
523 && l1_norms[i] != zero)
524 {
525 res = f_data[i];
526 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
527 {
528 ii = A_diag_j[jj];
529 if (ii >= ns && ii < ne)
530 {
531 res -= A_diag_data[jj] * u_data[ii];
532 }
533 else
534 res -= A_diag_data[jj] * tmp_data[ii];
535 }
536 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
537 {
538 ii = A_offd_j[jj];
539 res -= A_offd_data[jj] * Vext_data[ii];
540 }
541 u_data[i] += res / l1_norms[i];
542 }
543 }
544 }
545
546 }
547 else
548 {
549 for (i = 0; i < n; i++) /* relax interior points */
550 {
551
552 /*-----------------------------------------------------------
553 * If i is of the right type ( C or F ) and diagonal is
554
555 * nonzero, relax point i; otherwise, skip it.
556 *-----------------------------------------------------------*/
557
558 if (cf_marker[i] == relax_points
559 && l1_norms[i] != zero)
560 {
561 res = f_data[i];
562 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
563 {
564 ii = A_diag_j[jj];
565 res -= A_diag_data[jj] * u_data[ii];
566 }
567 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
568 {
569 ii = A_offd_j[jj];
570 res -= A_offd_data[jj] * Vext_data[ii];
571 }
572 u_data[i] += res / l1_norms[i];
573 }
574 }
575 for (i = n-1; i > -1; i--) /* relax interior points */
576 {
577
578 /*-----------------------------------------------------------
579 * If i is of the right type ( C or F ) and diagonal is
580
581 * nonzero, relax point i; otherwise, skip it.
582 *-----------------------------------------------------------*/
583
584 if (cf_marker[i] == relax_points
585 && l1_norms[i] != zero)
586 {
587 res = f_data[i];
588 for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
589 {
590 ii = A_diag_j[jj];
591 res -= A_diag_data[jj] * u_data[ii];
592 }
593 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
594 {
595 ii = A_offd_j[jj];
596 res -= A_offd_data[jj] * Vext_data[ii];
597 }
598 u_data[i] += res / l1_norms[i];
599 }
600 }
601 }
602 }
603 }
604 else
605 {
606#define HYPRE_SMP_PRIVATE i
607#include "../utilities/hypre_smp_forloop.h"
608 for (i = 0; i < n; i++)
609 {
610 Vtemp_data[i] = u_data[i];
611 }
612 prod = (1.0-relax_weight*omega);
613 if (relax_points == 0)
614 {
615 if (num_threads > 1)
616 {
617 tmp_data = Ztemp_data;
618#define HYPRE_SMP_PRIVATE i
619#include "../utilities/hypre_smp_forloop.h"
620 for (i = 0; i < n; i++)
621 tmp_data[i] = u_data[i];
622#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
623#include "../utilities/hypre_smp_forloop.h"
624 for (j = 0; j < num_threads; j++)
625 {
626 size = n/num_threads;
627 rest = n - size*num_threads;
628 if (j < rest)
629 {
630 ns = j*size+j;
631 ne = (j+1)*size+j+1;
632 }
633 else
634 {
635 ns = j*size+rest;
636 ne = (j+1)*size+rest;
637 }
638 for (i = ns; i < ne; i++) /* interior points first */
639 {
640
641 /*-----------------------------------------------------------
642 * If diagonal is nonzero, relax point i; otherwise, skip it.
643 *-----------------------------------------------------------*/
644
645 if ( l1_norms[i] != zero)
646 {
647 res0 = 0.0;
648 res2 = 0.0;
649 res = f_data[i];
650 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
651 {
652 ii = A_diag_j[jj];
653 if (ii >= ns && ii < ne)
654 {
655 res0 -= A_diag_data[jj] * u_data[ii];
656 res2 += A_diag_data[jj] * Vtemp_data[ii];
657 }
658 else
659 res -= A_diag_data[jj] * tmp_data[ii];
660 }
661 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
662 {
663 ii = A_offd_j[jj];
664 res -= A_offd_data[jj] * Vext_data[ii];
665 }
666 u_data[i] *= prod;
667 u_data[i] += relax_weight*(omega*res + res0 +
668 one_minus_omega*res2) / l1_norms[i];
669 /*u_data[i] += omega*(relax_weight*res + res0 +
670 one_minus_weight*res2) / l1_norms[i];*/
671 }
672 }
673 for (i = ne-1; i > ns-1; i--) /* interior points first */
674 {
675
676 /*-----------------------------------------------------------
677 * If diagonal is nonzero, relax point i; otherwise, skip it.
678 *-----------------------------------------------------------*/
679
680 if ( l1_norms[i] != zero)
681 {
682 res0 = 0.0;
683 res2 = 0.0;
684 res = f_data[i];
685 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
686 {
687 ii = A_diag_j[jj];
688 if (ii >= ns && ii < ne)
689 {
690 res0 -= A_diag_data[jj] * u_data[ii];
691 res2 += A_diag_data[jj] * Vtemp_data[ii];
692 }
693 else
694 res -= A_diag_data[jj] * tmp_data[ii];
695 }
696 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
697 {
698 ii = A_offd_j[jj];
699 res -= A_offd_data[jj] * Vext_data[ii];
700 }
701 u_data[i] *= prod;
702 u_data[i] += relax_weight*(omega*res + res0 +
703 one_minus_omega*res2) / l1_norms[i];
704 /*u_data[i] += omega*(relax_weight*res + res0 +
705 one_minus_weight*res2) / l1_norms[i];*/
706 }
707 }
708 }
709
710 }
711 else
712 {
713 for (i = 0; i < n; i++) /* interior points first */
714 {
715
716 /*-----------------------------------------------------------
717 * If diagonal is nonzero, relax point i; otherwise, skip it.
718 *-----------------------------------------------------------*/
719
720 if ( l1_norms[i] != zero)
721 {
722 res0 = 0.0;
723 res = f_data[i];
724 res2 = 0.0;
725 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
726 {
727 ii = A_diag_j[jj];
728 res0 -= A_diag_data[jj] * u_data[ii];
729 res2 += A_diag_data[jj] * Vtemp_data[ii];
730 }
731 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
732 {
733 ii = A_offd_j[jj];
734 res -= A_offd_data[jj] * Vext_data[ii];
735 }
736 u_data[i] *= prod;
737 u_data[i] += relax_weight*(omega*res + res0 +
738 one_minus_omega*res2) / l1_norms[i];
739 /*u_data[i] += omega*(relax_weight*res + res0 +
740 one_minus_weight*res2) / l1_norms[i];*/
741 }
742 }
743 for (i = n-1; i > -1; i--) /* interior points first */
744 {
745
746 /*-----------------------------------------------------------
747 * If diagonal is nonzero, relax point i; otherwise, skip it.
748 *-----------------------------------------------------------*/
749
750 if ( l1_norms[i] != zero)
751 {
752 res0 = 0.0;
753 res = f_data[i];
754 res2 = 0.0;
755 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
756 {
757 ii = A_diag_j[jj];
758 res0 -= A_diag_data[jj] * u_data[ii];
759 res2 += A_diag_data[jj] * Vtemp_data[ii];
760 }
761 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
762 {
763 ii = A_offd_j[jj];
764 res -= A_offd_data[jj] * Vext_data[ii];
765 }
766 u_data[i] *= prod;
767 u_data[i] += relax_weight*(omega*res + res0 +
768 one_minus_omega*res2) / l1_norms[i];
769 /*u_data[i] += omega*(relax_weight*res + res0 +
770 one_minus_weight*res2) / l1_norms[i];*/
771 }
772 }
773 }
774 }
775
776 /*-----------------------------------------------------------------
777 * Relax only C or F points as determined by relax_points.
778 *-----------------------------------------------------------------*/
779
780 else
781 {
782 if (num_threads > 1)
783 {
784 tmp_data = Ztemp_data;
785#define HYPRE_SMP_PRIVATE i
786#include "../utilities/hypre_smp_forloop.h"
787 for (i = 0; i < n; i++)
788 tmp_data[i] = u_data[i];
789#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
790#include "../utilities/hypre_smp_forloop.h"
791 for (j = 0; j < num_threads; j++)
792 {
793 size = n/num_threads;
794 rest = n - size*num_threads;
795 if (j < rest)
796 {
797 ns = j*size+j;
798 ne = (j+1)*size+j+1;
799 }
800 else
801 {
802 ns = j*size+rest;
803 ne = (j+1)*size+rest;
804 }
805 for (i = ns; i < ne; i++) /* relax interior points */
806 {
807
808 /*-----------------------------------------------------------
809 * If i is of the right type ( C or F ) and diagonal is
810 * nonzero, relax point i; otherwise, skip it.
811 *-----------------------------------------------------------*/
812
813 if (cf_marker[i] == relax_points
814 && l1_norms[i] != zero)
815 {
816 res0 = 0.0;
817 res2 = 0.0;
818 res = f_data[i];
819 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
820 {
821 ii = A_diag_j[jj];
822 if (ii >= ns && ii < ne)
823 {
824 res2 += A_diag_data[jj] * Vtemp_data[ii];
825 res0 -= A_diag_data[jj] * u_data[ii];
826 }
827 else
828 res -= A_diag_data[jj] * tmp_data[ii];
829 }
830 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
831 {
832 ii = A_offd_j[jj];
833 res -= A_offd_data[jj] * Vext_data[ii];
834 }
835 u_data[i] *= prod;
836 u_data[i] += relax_weight*(omega*res + res0 +
837 one_minus_omega*res2) / l1_norms[i];
838 /*u_data[i] += omega*(relax_weight*res + res0 +
839 one_minus_weight*res2) / l1_norms[i];*/
840 }
841 }
842 for (i = ne-1; i > ns-1; i--) /* relax interior points */
843 {
844
845 /*-----------------------------------------------------------
846 * If i is of the right type ( C or F ) and diagonal is
847 * nonzero, relax point i; otherwise, skip it.
848 *-----------------------------------------------------------*/
849
850 if (cf_marker[i] == relax_points
851 && l1_norms[i] != zero)
852 {
853 res0 = 0.0;
854 res2 = 0.0;
855 res = f_data[i];
856 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
857 {
858 ii = A_diag_j[jj];
859 if (ii >= ns && ii < ne)
860 {
861 res2 += A_diag_data[jj] * Vtemp_data[ii];
862 res0 -= A_diag_data[jj] * u_data[ii];
863 }
864 else
865 res -= A_diag_data[jj] * tmp_data[ii];
866 }
867 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
868 {
869 ii = A_offd_j[jj];
870 res -= A_offd_data[jj] * Vext_data[ii];
871 }
872 u_data[i] *= prod;
873 u_data[i] += relax_weight*(omega*res + res0 +
874 one_minus_omega*res2) / l1_norms[i];
875 /*u_data[i] += omega*(relax_weight*res + res0 +
876 one_minus_weight*res2) / l1_norms[i];*/
877 }
878 }
879 }
880
881 }
882 else
883 {
884 for (i = 0; i < n; i++) /* relax interior points */
885 {
886
887 /*-----------------------------------------------------------
888 * If i is of the right type ( C or F ) and diagonal is
889
890 * nonzero, relax point i; otherwise, skip it.
891 *-----------------------------------------------------------*/
892
893 if (cf_marker[i] == relax_points
894 && l1_norms[i] != zero)
895 {
896 res = f_data[i];
897 res0 = 0.0;
898 res2 = 0.0;
899 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
900 {
901 ii = A_diag_j[jj];
902 res0 -= A_diag_data[jj] * u_data[ii];
903 res2 += A_diag_data[jj] * Vtemp_data[ii];
904 }
905 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
906 {
907 ii = A_offd_j[jj];
908 res -= A_offd_data[jj] * Vext_data[ii];
909 }
910 u_data[i] *= prod;
911 u_data[i] += relax_weight*(omega*res + res0 +
912 one_minus_omega*res2) / l1_norms[i];
913 /*u_data[i] += omega*(relax_weight*res + res0 +
914 one_minus_weight*res2) / l1_norms[i];*/
915 }
916 }
917 for (i = n-1; i > -1; i--) /* relax interior points */
918 {
919
920 /*-----------------------------------------------------------
921 * If i is of the right type ( C or F ) and diagonal is
922
923 * nonzero, relax point i; otherwise, skip it.
924 *-----------------------------------------------------------*/
925
926 if (cf_marker[i] == relax_points
927 && l1_norms[i] != zero)
928 {
929 res = f_data[i];
930 res0 = 0.0;
931 res2 = 0.0;
932 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
933 {
934 ii = A_diag_j[jj];
935 res0 -= A_diag_data[jj] * u_data[ii];
936 res2 += A_diag_data[jj] * Vtemp_data[ii];
937 }
938 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
939 {
940 ii = A_offd_j[jj];
941 res -= A_offd_data[jj] * Vext_data[ii];
942 }
943 u_data[i] *= prod;
944 u_data[i] += relax_weight*(omega*res + res0 +
945 one_minus_omega*res2) / l1_norms[i];
946 /*u_data[i] += omega*(relax_weight*res + res0 +
947 one_minus_weight*res2) / l1_norms[i];*/
948 }
949 }
950 }
951 }
952 }
953 if (num_procs > 1)
954 {
955 hypre_TFree(Vext_data);
956 hypre_TFree(v_buf_data);
957 }
958 }
959 break;
960
961
962
963
964
965 case 5: /* Hybrid: Jacobi off-processor,
966 chaotic Gauss-Seidel on-processor */
967 {
968 if (num_procs > 1)
969 {
970 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
971
972 v_buf_data = hypre_CTAlloc(double,
973 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
974
975 Vext_data = hypre_CTAlloc(double,num_cols_offd);
976
977 if (num_cols_offd)
978 {
979 A_offd_j = hypre_CSRMatrixJ(A_offd);
980 A_offd_data = hypre_CSRMatrixData(A_offd);
981 }
982
983 index = 0;
984 for (i = 0; i < num_sends; i++)
985 {
986 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
987 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
988 v_buf_data[index++]
989 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
990 }
991
992 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
993 Vext_data);
994
995 /*-----------------------------------------------------------------
996 * Copy current approximation into temporary vector.
997 *-----------------------------------------------------------------*/
998 hypre_ParCSRCommHandleDestroy(comm_handle);
999 comm_handle = NULL;
1000 }
1001
1002 /*-----------------------------------------------------------------
1003 * Relax all points.
1004 *-----------------------------------------------------------------*/
1005
1006 if (relax_points == 0)
1007 {
1008#define HYPRE_SMP_PRIVATE i,ii,jj,res
1009#include "../utilities/hypre_smp_forloop.h"
1010 for (i = 0; i < n; i++) /* interior points first */
1011 {
1012
1013 /*-----------------------------------------------------------
1014 * If diagonal is nonzero, relax point i; otherwise, skip it.
1015 *-----------------------------------------------------------*/
1016
1017 if ( A_diag_data[A_diag_i[i]] != zero)
1018 {
1019 res = f_data[i];
1020 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1021 {
1022 ii = A_diag_j[jj];
1023 res -= A_diag_data[jj] * u_data[ii];
1024 }
1025 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1026 {
1027 ii = A_offd_j[jj];
1028 res -= A_offd_data[jj] * Vext_data[ii];
1029 }
1030 u_data[i] = res / A_diag_data[A_diag_i[i]];
1031 }
1032 }
1033 }
1034
1035 /*-----------------------------------------------------------------
1036 * Relax only C or F points as determined by relax_points.
1037 *-----------------------------------------------------------------*/
1038 else if (relax_points > 1)
1039 {
1040 n_coarse = relax_points;
1041#define HYPRE_SMP_PRIVATE i,ii,jj,i1,res
1042#include "../utilities/hypre_smp_forloop.h"
1043 for (i1 = 0; i1 < n_coarse; i1++)
1044 {
1045
1046 /*-----------------------------------------------------------
1047 * If i is of the right type ( C or F ) and diagonal is
1048 * nonzero, relax point i; otherwise, skip it.
1049 *-----------------------------------------------------------*/
1050
1051 i = cf_marker[i1];
1052 if (A_diag_data[A_diag_i[i]] != zero)
1053 {
1054 res = f_data[i];
1055 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1056 {
1057 ii = A_diag_j[jj];
1058 res -= A_diag_data[jj] * Vtemp_data[ii];
1059 }
1060 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1061 {
1062 ii = A_offd_j[jj];
1063 res -= A_offd_data[jj] * Vext_data[ii];
1064 }
1065 u_data[i] *= one_minus_weight;
1066 u_data[i] += relax_weight * res / A_diag_data[A_diag_i[i]];
1067 }
1068 }
1069 }
1070 else
1071 {
1072#define HYPRE_SMP_PRIVATE i,ii,jj,res
1073#include "../utilities/hypre_smp_forloop.h"
1074 for (i = 0; i < n; i++) /* relax interior points */
1075 {
1076
1077 /*-----------------------------------------------------------
1078 * If i is of the right type ( C or F ) and diagonal is
1079 * nonzero, relax point i; otherwise, skip it.
1080 *-----------------------------------------------------------*/
1081
1082 if (cf_marker[i] == relax_points
1083 && A_diag_data[A_diag_i[i]] != zero)
1084 {
1085 res = f_data[i];
1086 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1087 {
1088 ii = A_diag_j[jj];
1089 res -= A_diag_data[jj] * u_data[ii];
1090 }
1091 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1092 {
1093 ii = A_offd_j[jj];
1094 res -= A_offd_data[jj] * Vext_data[ii];
1095 }
1096 u_data[i] = res / A_diag_data[A_diag_i[i]];
1097 }
1098 }
1099 }
1100 if (num_procs > 1)
1101 {
1102 hypre_TFree(Vext_data);
1103 hypre_TFree(v_buf_data);
1104 }
1105 }
1106 break;
1107
1108 case 3: /* Hybrid: Jacobi off-processor,
1109 Gauss-Seidel on-processor
1110 (forward loop) */
1111 {
1112
1113 Ztemp_local = hypre_ParVectorLocalVector(Ztemp);
1114 Ztemp_data = hypre_VectorData(Ztemp_local);
1115
1116
1117 if (num_procs > 1)
1118 {
1119 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1120
1121 v_buf_data = hypre_CTAlloc(double,
1122 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1123
1124 Vext_data = hypre_CTAlloc(double,num_cols_offd);
1125
1126 if (num_cols_offd)
1127 {
1128 A_offd_j = hypre_CSRMatrixJ(A_offd);
1129 A_offd_data = hypre_CSRMatrixData(A_offd);
1130 }
1131
1132 index = 0;
1133 for (i = 0; i < num_sends; i++)
1134 {
1135 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1136 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
1137 v_buf_data[index++]
1138 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1139 }
1140
1141 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
1142 Vext_data);
1143
1144 /*-----------------------------------------------------------------
1145 * Copy current approximation into temporary vector.
1146 *-----------------------------------------------------------------*/
1147 hypre_ParCSRCommHandleDestroy(comm_handle);
1148 comm_handle = NULL;
1149 }
1150
1151 /*-----------------------------------------------------------------
1152 * Relax all points.
1153 *-----------------------------------------------------------------*/
1154
1155 if (relax_weight == 1 && omega == 1)
1156 {
1157 if (relax_points == 0)
1158 {
1159 if (num_threads > 1)
1160 {
1161 /* tmp_data = hypre_CTAlloc(double,n); */
1162 tmp_data = Ztemp_data;
1163
1164#define HYPRE_SMP_PRIVATE i
1165#include "../utilities/hypre_smp_forloop.h"
1166 for (i = 0; i < n; i++)
1167 tmp_data[i] = u_data[i];
1168#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
1169#include "../utilities/hypre_smp_forloop.h"
1170 for (j = 0; j < num_threads; j++)
1171 {
1172 size = n/num_threads;
1173 rest = n - size*num_threads;
1174 if (j < rest)
1175 {
1176 ns = j*size+j;
1177 ne = (j+1)*size+j+1;
1178 }
1179 else
1180 {
1181 ns = j*size+rest;
1182 ne = (j+1)*size+rest;
1183 }
1184 for (i = ns; i < ne; i++) /* interior points first */
1185 {
1186
1187 /*-----------------------------------------------------------
1188 * If diagonal is nonzero, relax point i; otherwise, skip it.
1189 *-----------------------------------------------------------*/
1190
1191 if ( A_diag_data[A_diag_i[i]] != zero)
1192 {
1193 res = f_data[i];
1194 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1195 {
1196 ii = A_diag_j[jj];
1197 if (ii >= ns && ii < ne)
1198 res -= A_diag_data[jj] * u_data[ii];
1199 else
1200 res -= A_diag_data[jj] * tmp_data[ii];
1201 }
1202 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1203 {
1204 ii = A_offd_j[jj];
1205 res -= A_offd_data[jj] * Vext_data[ii];
1206 }
1207 u_data[i] = res / A_diag_data[A_diag_i[i]];
1208 }
1209 }
1210 }
1211 /* hypre_TFree(tmp_data);*/
1212 }
1213 else
1214 {
1215 for (i = 0; i < n; i++) /* interior points first */
1216 {
1217
1218 /*-----------------------------------------------------------
1219 * If diagonal is nonzero, relax point i; otherwise, skip it.
1220 *-----------------------------------------------------------*/
1221
1222 if ( A_diag_data[A_diag_i[i]] != zero)
1223 {
1224 res = f_data[i];
1225 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1226 {
1227 ii = A_diag_j[jj];
1228 res -= A_diag_data[jj] * u_data[ii];
1229 }
1230 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1231 {
1232 ii = A_offd_j[jj];
1233 res -= A_offd_data[jj] * Vext_data[ii];
1234 }
1235 u_data[i] = res / A_diag_data[A_diag_i[i]];
1236 }
1237 }
1238 }
1239 }
1240
1241 /*-----------------------------------------------------------------
1242 * Relax only C or F points as determined by relax_points.
1243 *-----------------------------------------------------------------*/
1244
1245 else
1246 {
1247 if (num_threads > 1)
1248 {
1249
1250 /* tmp_data = hypre_CTAlloc(double,n); */
1251 tmp_data = Ztemp_data;
1252
1253#define HYPRE_SMP_PRIVATE i
1254#include "../utilities/hypre_smp_forloop.h"
1255 for (i = 0; i < n; i++)
1256 tmp_data[i] = u_data[i];
1257#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
1258#include "../utilities/hypre_smp_forloop.h"
1259 for (j = 0; j < num_threads; j++)
1260 {
1261 size = n/num_threads;
1262 rest = n - size*num_threads;
1263 if (j < rest)
1264 {
1265 ns = j*size+j;
1266 ne = (j+1)*size+j+1;
1267 }
1268 else
1269 {
1270 ns = j*size+rest;
1271 ne = (j+1)*size+rest;
1272 }
1273 for (i = ns; i < ne; i++) /* relax interior points */
1274 {
1275
1276 /*-----------------------------------------------------------
1277 * If i is of the right type ( C or F ) and diagonal is
1278 * nonzero, relax point i; otherwise, skip it.
1279 *-----------------------------------------------------------*/
1280
1281 if (cf_marker[i] == relax_points
1282 && A_diag_data[A_diag_i[i]] != zero)
1283 {
1284 res = f_data[i];
1285 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1286 {
1287 ii = A_diag_j[jj];
1288 if (ii >= ns && ii < ne)
1289 res -= A_diag_data[jj] * u_data[ii];
1290 else
1291 res -= A_diag_data[jj] * tmp_data[ii];
1292 }
1293 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1294 {
1295 ii = A_offd_j[jj];
1296 res -= A_offd_data[jj] * Vext_data[ii];
1297 }
1298 u_data[i] = res / A_diag_data[A_diag_i[i]];
1299 }
1300 }
1301 }
1302 /* hypre_TFree(tmp_data); */
1303 }
1304 else
1305 {
1306 for (i = 0; i < n; i++) /* relax interior points */
1307 {
1308
1309 /*-----------------------------------------------------------
1310 * If i is of the right type ( C or F ) and diagonal is
1311
1312 * nonzero, relax point i; otherwise, skip it.
1313 *-----------------------------------------------------------*/
1314
1315 if (cf_marker[i] == relax_points
1316 && A_diag_data[A_diag_i[i]] != zero)
1317 {
1318 res = f_data[i];
1319 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1320 {
1321 ii = A_diag_j[jj];
1322 res -= A_diag_data[jj] * u_data[ii];
1323 }
1324 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1325 {
1326 ii = A_offd_j[jj];
1327 res -= A_offd_data[jj] * Vext_data[ii];
1328 }
1329 u_data[i] = res / A_diag_data[A_diag_i[i]];
1330 }
1331 }
1332 }
1333 }
1334 }
1335 else
1336 {
1337#define HYPRE_SMP_PRIVATE i
1338#include "../utilities/hypre_smp_forloop.h"
1339 for (i = 0; i < n; i++)
1340 {
1341 Vtemp_data[i] = u_data[i];
1342 }
1343 prod = (1.0-relax_weight*omega);
1344 if (relax_points == 0)
1345 {
1346 if (num_threads > 1)
1347 {
1348 /* tmp_data = hypre_CTAlloc(double,n); */
1349 tmp_data = Ztemp_data;
1350
1351
1352#define HYPRE_SMP_PRIVATE i
1353#include "../utilities/hypre_smp_forloop.h"
1354 for (i = 0; i < n; i++)
1355 tmp_data[i] = u_data[i];
1356#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
1357#include "../utilities/hypre_smp_forloop.h"
1358 for (j = 0; j < num_threads; j++)
1359 {
1360 size = n/num_threads;
1361 rest = n - size*num_threads;
1362 if (j < rest)
1363 {
1364 ns = j*size+j;
1365 ne = (j+1)*size+j+1;
1366 }
1367 else
1368 {
1369 ns = j*size+rest;
1370 ne = (j+1)*size+rest;
1371 }
1372 for (i = ns; i < ne; i++) /* interior points first */
1373 {
1374
1375 /*-----------------------------------------------------------
1376 * If diagonal is nonzero, relax point i; otherwise, skip it.
1377 *-----------------------------------------------------------*/
1378
1379 if ( A_diag_data[A_diag_i[i]] != zero)
1380 {
1381 res = f_data[i];
1382 res0 = 0.0;
1383 res2 = 0.0;
1384 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1385 {
1386 ii = A_diag_j[jj];
1387 if (ii >= ns && ii < ne)
1388 {
1389 res0 -= A_diag_data[jj] * u_data[ii];
1390 res2 += A_diag_data[jj] * Vtemp_data[ii];
1391 }
1392 else
1393 res -= A_diag_data[jj] * tmp_data[ii];
1394 }
1395 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1396 {
1397 ii = A_offd_j[jj];
1398 res -= A_offd_data[jj] * Vext_data[ii];
1399 }
1400 u_data[i] *= prod;
1401 u_data[i] += relax_weight*(omega*res + res0 +
1402 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
1403 /*u_data[i] += omega*(relax_weight*res + res0 +
1404 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
1405 }
1406 }
1407 }
1408 /* hypre_TFree(tmp_data); */
1409 }
1410 else
1411 {
1412 for (i = 0; i < n; i++) /* interior points first */
1413 {
1414
1415 /*-----------------------------------------------------------
1416 * If diagonal is nonzero, relax point i; otherwise, skip it.
1417 *-----------------------------------------------------------*/
1418
1419 if ( A_diag_data[A_diag_i[i]] != zero)
1420 {
1421 res0 = 0.0;
1422 res2 = 0.0;
1423 res = f_data[i];
1424 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1425 {
1426 ii = A_diag_j[jj];
1427 res0 -= A_diag_data[jj] * u_data[ii];
1428 res2 += A_diag_data[jj] * Vtemp_data[ii];
1429 }
1430 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1431 {
1432 ii = A_offd_j[jj];
1433 res -= A_offd_data[jj] * Vext_data[ii];
1434 }
1435 u_data[i] *= prod;
1436 u_data[i] += relax_weight*(omega*res + res0 +
1437 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
1438 /*u_data[i] += omega*(relax_weight*res + res0 +
1439 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
1440 }
1441 }
1442 }
1443 }
1444
1445 /*-----------------------------------------------------------------
1446 * Relax only C or F points as determined by relax_points.
1447 *-----------------------------------------------------------------*/
1448
1449 else
1450 {
1451 if (num_threads > 1)
1452 {
1453
1454 /*tmp_data = hypre_CTAlloc(double,n);*/
1455 tmp_data = Ztemp_data;
1456
1457#define HYPRE_SMP_PRIVATE i
1458#include "../utilities/hypre_smp_forloop.h"
1459 for (i = 0; i < n; i++)
1460 tmp_data[i] = u_data[i];
1461#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
1462#include "../utilities/hypre_smp_forloop.h"
1463 for (j = 0; j < num_threads; j++)
1464 {
1465 size = n/num_threads;
1466 rest = n - size*num_threads;
1467 if (j < rest)
1468 {
1469 ns = j*size+j;
1470 ne = (j+1)*size+j+1;
1471 }
1472 else
1473 {
1474 ns = j*size+rest;
1475 ne = (j+1)*size+rest;
1476 }
1477 for (i = ns; i < ne; i++) /* relax interior points */
1478 {
1479
1480 /*-----------------------------------------------------------
1481 * If i is of the right type ( C or F ) and diagonal is
1482 * nonzero, relax point i; otherwise, skip it.
1483 *-----------------------------------------------------------*/
1484
1485 if (cf_marker[i] == relax_points
1486 && A_diag_data[A_diag_i[i]] != zero)
1487 {
1488 res0 = 0.0;
1489 res2 = 0.0;
1490 res = f_data[i];
1491 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1492 {
1493 ii = A_diag_j[jj];
1494 if (ii >= ns && ii < ne)
1495 {
1496 res0 -= A_diag_data[jj] * u_data[ii];
1497 res2 += A_diag_data[jj] * Vtemp_data[ii];
1498 }
1499 else
1500 res -= A_diag_data[jj] * tmp_data[ii];
1501 }
1502 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1503 {
1504 ii = A_offd_j[jj];
1505 res -= A_offd_data[jj] * Vext_data[ii];
1506 }
1507 u_data[i] *= prod;
1508 u_data[i] += relax_weight*(omega*res + res0 +
1509 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
1510 /*u_data[i] += omega*(relax_weight*res + res0 +
1511 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
1512 }
1513 }
1514 }
1515 /* hypre_TFree(tmp_data); */
1516 }
1517 else
1518 {
1519 for (i = 0; i < n; i++) /* relax interior points */
1520 {
1521
1522 /*-----------------------------------------------------------
1523 * If i is of the right type ( C or F ) and diagonal is
1524
1525 * nonzero, relax point i; otherwise, skip it.
1526 *-----------------------------------------------------------*/
1527
1528 if (cf_marker[i] == relax_points
1529 && A_diag_data[A_diag_i[i]] != zero)
1530 {
1531 res = f_data[i];
1532 res0 = 0.0;
1533 res2 = 0.0;
1534 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1535 {
1536 ii = A_diag_j[jj];
1537 res0 -= A_diag_data[jj] * u_data[ii];
1538 res2 += A_diag_data[jj] * Vtemp_data[ii];
1539 }
1540 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1541 {
1542 ii = A_offd_j[jj];
1543 res -= A_offd_data[jj] * Vext_data[ii];
1544 }
1545 u_data[i] *= prod;
1546 u_data[i] += relax_weight*(omega*res + res0 +
1547 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
1548 /*u_data[i] += omega*(relax_weight*res + res0 +
1549 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
1550 }
1551 }
1552 }
1553 }
1554 }
1555 if (num_procs > 1)
1556 {
1557 hypre_TFree(Vext_data);
1558 hypre_TFree(v_buf_data);
1559 }
1560 }
1561 break;
1562
1563 case 1: /* Gauss-Seidel VERY SLOW */
1564 {
1565 if (num_procs > 1)
1566 {
1567 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1568 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
1569
1570 v_buf_data = hypre_CTAlloc(double,
1571 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1572
1573 Vext_data = hypre_CTAlloc(double,num_cols_offd);
1574
1575 status = hypre_CTAlloc(MPI_Status,num_recvs+num_sends);
1576 requests= hypre_CTAlloc(MPI_Request, num_recvs+num_sends);
1577
1578 if (num_cols_offd)
1579 {
1580 A_offd_j = hypre_CSRMatrixJ(A_offd);
1581 A_offd_data = hypre_CSRMatrixData(A_offd);
1582 }
1583
1584 /*-----------------------------------------------------------------
1585 * Copy current approximation into temporary vector.
1586 *-----------------------------------------------------------------*/
1587 /*
1588 for (i = 0; i < n; i++)
1589 {
1590 Vtemp_data[i] = u_data[i];
1591 } */
1592
1593 }
1594 /*-----------------------------------------------------------------
1595 * Relax all points.
1596 *-----------------------------------------------------------------*/
1597 for (p = 0; p < num_procs; p++)
1598 {
1599 jr = 0;
1600 if (p != my_id)
1601 {
1602 for (i = 0; i < num_sends; i++)
1603 {
1604 ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
1605 if (ip == p)
1606 {
1607 vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1608 vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
1609 for (j=vec_start; j < vec_start+vec_len; j++)
1610 v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1611 MPI_Isend(&v_buf_data[vec_start], vec_len, MPI_DOUBLE,
1612 ip, 0, comm, &requests[jr++]);
1613 }
1614 }
1615 MPI_Waitall(jr,requests,status);
1616 MPI_Barrier(comm);
1617 }
1618 else
1619 {
1620 if (num_procs > 1)
1621 {
1622 for (i = 0; i < num_recvs; i++)
1623 {
1624 ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
1625 vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
1626 vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
1627 MPI_Irecv(&Vext_data[vec_start], vec_len, MPI_DOUBLE,
1628 ip, 0, comm, &requests[jr++]);
1629 }
1630 MPI_Waitall(jr,requests,status);
1631 }
1632 if (relax_points == 0)
1633 {
1634 for (i = 0; i < n; i++)
1635 {
1636
1637 /*-----------------------------------------------------------
1638 * If diagonal is nonzero, relax point i; otherwise, skip it.
1639 *-----------------------------------------------------------*/
1640
1641 if ( A_diag_data[A_diag_i[i]] != zero)
1642 {
1643 res = f_data[i];
1644 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1645 {
1646 ii = A_diag_j[jj];
1647 res -= A_diag_data[jj] * u_data[ii];
1648 }
1649 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1650 {
1651 ii = A_offd_j[jj];
1652 res -= A_offd_data[jj] * Vext_data[ii];
1653 }
1654 u_data[i] = res / A_diag_data[A_diag_i[i]];
1655 }
1656 }
1657 }
1658
1659 /*-----------------------------------------------------------------
1660 * Relax only C or F points as determined by relax_points.
1661 *-----------------------------------------------------------------*/
1662
1663 else
1664 {
1665 for (i = 0; i < n; i++)
1666 {
1667
1668 /*-----------------------------------------------------------
1669 * If i is of the right type ( C or F ) and diagonal is
1670 * nonzero, relax point i; otherwise, skip it.
1671 *-----------------------------------------------------------*/
1672
1673 if (cf_marker[i] == relax_points
1674 && A_diag_data[A_diag_i[i]] != zero)
1675 {
1676 res = f_data[i];
1677 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1678 {
1679 ii = A_diag_j[jj];
1680 res -= A_diag_data[jj] * u_data[ii];
1681 }
1682 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1683 {
1684 ii = A_offd_j[jj];
1685 res -= A_offd_data[jj] * Vext_data[ii];
1686 }
1687 u_data[i] = res / A_diag_data[A_diag_i[i]];
1688 }
1689 }
1690 }
1691 if (num_procs > 1)
1692 MPI_Barrier(comm);
1693 }
1694 }
1695 if (num_procs > 1)
1696 {
1697 hypre_TFree(Vext_data);
1698 hypre_TFree(v_buf_data);
1699 hypre_TFree(status);
1700 hypre_TFree(requests);
1701 }
1702 }
1703 break;
1704
1705 case 2: /* Gauss-Seidel: relax interior points in parallel, boundary
1706 sequentially */
1707 {
1708 if (num_procs > 1)
1709 {
1710 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1711 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
1712
1713 v_buf_data = hypre_CTAlloc(double,
1714 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1715
1716 Vext_data = hypre_CTAlloc(double,num_cols_offd);
1717
1718 status = hypre_CTAlloc(MPI_Status,num_recvs+num_sends);
1719 requests= hypre_CTAlloc(MPI_Request, num_recvs+num_sends);
1720
1721 if (num_cols_offd)
1722 {
1723 A_offd_j = hypre_CSRMatrixJ(A_offd);
1724 A_offd_data = hypre_CSRMatrixData(A_offd);
1725 }
1726 }
1727
1728 /*-----------------------------------------------------------------
1729 * Copy current approximation into temporary vector.
1730 *-----------------------------------------------------------------*/
1731 /*
1732 for (i = 0; i < n; i++)
1733 {
1734 Vtemp_data[i] = u_data[i];
1735 } */
1736
1737 /*-----------------------------------------------------------------
1738 * Relax interior points first
1739 *-----------------------------------------------------------------*/
1740 if (relax_points == 0)
1741 {
1742 for (i = 0; i < n; i++)
1743 {
1744
1745 /*-----------------------------------------------------------
1746 * If diagonal is nonzero, relax point i; otherwise, skip it.
1747 *-----------------------------------------------------------*/
1748
1749 if ((A_offd_i[i+1]-A_offd_i[i]) == zero &&
1750 A_diag_data[A_diag_i[i]] != zero)
1751 {
1752 res = f_data[i];
1753 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1754 {
1755 ii = A_diag_j[jj];
1756 res -= A_diag_data[jj] * u_data[ii];
1757 }
1758 u_data[i] = res / A_diag_data[A_diag_i[i]];
1759 }
1760 }
1761 }
1762 else
1763 {
1764 for (i = 0; i < n; i++)
1765 {
1766
1767 /*-----------------------------------------------------------
1768 * If i is of the right type ( C or F ) and diagonal is
1769 * nonzero, relax point i; otherwise, skip it.
1770 *-----------------------------------------------------------*/
1771
1772 if (cf_marker[i] == relax_points
1773 && (A_offd_i[i+1]-A_offd_i[i]) == zero
1774 && A_diag_data[A_diag_i[i]] != zero)
1775 {
1776 res = f_data[i];
1777 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1778 {
1779 ii = A_diag_j[jj];
1780 res -= A_diag_data[jj] * u_data[ii];
1781 }
1782 u_data[i] = res / A_diag_data[A_diag_i[i]];
1783 }
1784 }
1785 }
1786 for (p = 0; p < num_procs; p++)
1787 {
1788 jr = 0;
1789 if (p != my_id)
1790 {
1791 for (i = 0; i < num_sends; i++)
1792 {
1793 ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i);
1794 if (ip == p)
1795 {
1796 vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1797 vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start;
1798 for (j=vec_start; j < vec_start+vec_len; j++)
1799 v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1800 MPI_Isend(&v_buf_data[vec_start], vec_len, MPI_DOUBLE,
1801 ip, 0, comm, &requests[jr++]);
1802 }
1803 }
1804 MPI_Waitall(jr,requests,status);
1805 MPI_Barrier(comm);
1806 }
1807 else
1808 {
1809 if (num_procs > 1)
1810 {
1811 for (i = 0; i < num_recvs; i++)
1812 {
1813 ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
1814 vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i);
1815 vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start;
1816 MPI_Irecv(&Vext_data[vec_start], vec_len, MPI_DOUBLE,
1817 ip, 0, comm, &requests[jr++]);
1818 }
1819 MPI_Waitall(jr,requests,status);
1820 }
1821 if (relax_points == 0)
1822 {
1823 for (i = 0; i < n; i++)
1824 {
1825
1826 /*-----------------------------------------------------------
1827 * If diagonal is nonzero, relax point i; otherwise, skip it.
1828 *-----------------------------------------------------------*/
1829
1830 if ((A_offd_i[i+1]-A_offd_i[i]) != zero &&
1831 A_diag_data[A_diag_i[i]] != zero)
1832 {
1833 res = f_data[i];
1834 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1835 {
1836 ii = A_diag_j[jj];
1837 res -= A_diag_data[jj] * u_data[ii];
1838 }
1839 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1840 {
1841 ii = A_offd_j[jj];
1842 res -= A_offd_data[jj] * Vext_data[ii];
1843 }
1844 u_data[i] = res / A_diag_data[A_diag_i[i]];
1845 }
1846 }
1847 }
1848
1849 /*-----------------------------------------------------------------
1850 * Relax only C or F points as determined by relax_points.
1851 *-----------------------------------------------------------------*/
1852
1853 else
1854 {
1855 for (i = 0; i < n; i++)
1856 {
1857
1858 /*-----------------------------------------------------------
1859 * If i is of the right type ( C or F ) and diagonal is
1860 * nonzero, relax point i; otherwise, skip it.
1861 *-----------------------------------------------------------*/
1862
1863 if (cf_marker[i] == relax_points
1864 && (A_offd_i[i+1]-A_offd_i[i]) != zero
1865 && A_diag_data[A_diag_i[i]] != zero)
1866 {
1867 res = f_data[i];
1868 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1869 {
1870 ii = A_diag_j[jj];
1871 res -= A_diag_data[jj] * u_data[ii];
1872 }
1873 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1874 {
1875 ii = A_offd_j[jj];
1876 res -= A_offd_data[jj] * Vext_data[ii];
1877 }
1878 u_data[i] = res / A_diag_data[A_diag_i[i]];
1879 }
1880 }
1881 }
1882 if (num_procs > 1)
1883 MPI_Barrier(comm);
1884 }
1885 }
1886 if (num_procs > 1)
1887 {
1888 hypre_TFree(Vext_data);
1889 hypre_TFree(v_buf_data);
1890 hypre_TFree(status);
1891 hypre_TFree(requests);
1892 }
1893 }
1894 break;
1895
1896 case 4: /* Hybrid: Jacobi off-processor,
1897 Gauss-Seidel/SOR on-processor
1898 (backward loop) */
1899 {
1900 if (num_procs > 1)
1901 {
1902 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1903
1904 v_buf_data = hypre_CTAlloc(double,
1905 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1906
1907 Vext_data = hypre_CTAlloc(double,num_cols_offd);
1908
1909 if (num_cols_offd)
1910 {
1911 A_offd_j = hypre_CSRMatrixJ(A_offd);
1912 A_offd_data = hypre_CSRMatrixData(A_offd);
1913 }
1914
1915 index = 0;
1916 for (i = 0; i < num_sends; i++)
1917 {
1918 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1919 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
1920 v_buf_data[index++]
1921 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1922 }
1923
1924 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
1925 Vext_data);
1926
1927 /*-----------------------------------------------------------------
1928 * Copy current approximation into temporary vector.
1929 *-----------------------------------------------------------------*/
1930 hypre_ParCSRCommHandleDestroy(comm_handle);
1931 comm_handle = NULL;
1932 }
1933
1934 /*-----------------------------------------------------------------
1935 * Relax all points.
1936 *-----------------------------------------------------------------*/
1937
1938 if (relax_weight == 1 && omega == 1)
1939 {
1940 if (relax_points == 0)
1941 {
1942 if (num_threads > 1)
1943 {
1944 tmp_data = hypre_CTAlloc(double,n);
1945#define HYPRE_SMP_PRIVATE i
1946#include "../utilities/hypre_smp_forloop.h"
1947 for (i = 0; i < n; i++)
1948 tmp_data[i] = u_data[i];
1949#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
1950#include "../utilities/hypre_smp_forloop.h"
1951 for (j = 0; j < num_threads; j++)
1952 {
1953 size = n/num_threads;
1954 rest = n - size*num_threads;
1955 if (j < rest)
1956 {
1957 ns = j*size+j;
1958 ne = (j+1)*size+j+1;
1959 }
1960 else
1961 {
1962 ns = j*size+rest;
1963 ne = (j+1)*size+rest;
1964 }
1965 for (i = ne-1; i > ns-1; i--) /* interior points first */
1966 {
1967
1968 /*-----------------------------------------------------------
1969 * If diagonal is nonzero, relax point i; otherwise, skip it.
1970 *-----------------------------------------------------------*/
1971
1972 if ( A_diag_data[A_diag_i[i]] != zero)
1973 {
1974 res = f_data[i];
1975 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1976 {
1977 ii = A_diag_j[jj];
1978 if (ii >= ns && ii < ne)
1979 res -= A_diag_data[jj] * u_data[ii];
1980 else
1981 res -= A_diag_data[jj] * tmp_data[ii];
1982 }
1983 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1984 {
1985 ii = A_offd_j[jj];
1986 res -= A_offd_data[jj] * Vext_data[ii];
1987 }
1988 u_data[i] = res / A_diag_data[A_diag_i[i]];
1989 }
1990 }
1991 }
1992 hypre_TFree(tmp_data);
1993 }
1994 else
1995 {
1996 for (i = n-1; i > -1; i--) /* interior points first */
1997 {
1998
1999 /*-----------------------------------------------------------
2000 * If diagonal is nonzero, relax point i; otherwise, skip it.
2001 *-----------------------------------------------------------*/
2002
2003 if ( A_diag_data[A_diag_i[i]] != zero)
2004 {
2005 res = f_data[i];
2006 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2007 {
2008 ii = A_diag_j[jj];
2009 res -= A_diag_data[jj] * u_data[ii];
2010 }
2011 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2012 {
2013 ii = A_offd_j[jj];
2014 res -= A_offd_data[jj] * Vext_data[ii];
2015 }
2016 u_data[i] = res / A_diag_data[A_diag_i[i]];
2017 }
2018 }
2019 }
2020 }
2021
2022 /*-----------------------------------------------------------------
2023 * Relax only C or F points as determined by relax_points.
2024 *-----------------------------------------------------------------*/
2025
2026 else
2027 {
2028 if (num_threads > 1)
2029 {
2030 tmp_data = hypre_CTAlloc(double,n);
2031#define HYPRE_SMP_PRIVATE i
2032#include "../utilities/hypre_smp_forloop.h"
2033 for (i = 0; i < n; i++)
2034 tmp_data[i] = u_data[i];
2035#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2036#include "../utilities/hypre_smp_forloop.h"
2037 for (j = 0; j < num_threads; j++)
2038 {
2039 size = n/num_threads;
2040 rest = n - size*num_threads;
2041 if (j < rest)
2042 {
2043 ns = j*size+j;
2044 ne = (j+1)*size+j+1;
2045 }
2046 else
2047 {
2048 ns = j*size+rest;
2049 ne = (j+1)*size+rest;
2050 }
2051 for (i = ne-1; i > ns-1; i--) /* relax interior points */
2052 {
2053
2054 /*-----------------------------------------------------------
2055 * If i is of the right type ( C or F ) and diagonal is
2056 * nonzero, relax point i; otherwise, skip it.
2057 *-----------------------------------------------------------*/
2058
2059 if (cf_marker[i] == relax_points
2060 && A_diag_data[A_diag_i[i]] != zero)
2061 {
2062 res = f_data[i];
2063 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2064 {
2065 ii = A_diag_j[jj];
2066 if (ii >= ns && ii < ne)
2067 res -= A_diag_data[jj] * u_data[ii];
2068 else
2069 res -= A_diag_data[jj] * tmp_data[ii];
2070 }
2071 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2072 {
2073 ii = A_offd_j[jj];
2074 res -= A_offd_data[jj] * Vext_data[ii];
2075 }
2076 u_data[i] = res / A_diag_data[A_diag_i[i]];
2077 }
2078 }
2079 }
2080 hypre_TFree(tmp_data);
2081 }
2082 else
2083 {
2084 for (i = n-1; i > -1; i--) /* relax interior points */
2085 {
2086
2087 /*-----------------------------------------------------------
2088 * If i is of the right type ( C or F ) and diagonal is
2089
2090 * nonzero, relax point i; otherwise, skip it.
2091 *-----------------------------------------------------------*/
2092
2093 if (cf_marker[i] == relax_points
2094 && A_diag_data[A_diag_i[i]] != zero)
2095 {
2096 res = f_data[i];
2097 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2098 {
2099 ii = A_diag_j[jj];
2100 res -= A_diag_data[jj] * u_data[ii];
2101 }
2102 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2103 {
2104 ii = A_offd_j[jj];
2105 res -= A_offd_data[jj] * Vext_data[ii];
2106 }
2107 u_data[i] = res / A_diag_data[A_diag_i[i]];
2108 }
2109 }
2110 }
2111 }
2112 }
2113 else
2114 {
2115#define HYPRE_SMP_PRIVATE i
2116#include "../utilities/hypre_smp_forloop.h"
2117 for (i = 0; i < n; i++)
2118 {
2119 Vtemp_data[i] = u_data[i];
2120 }
2121 prod = (1.0-relax_weight*omega);
2122 if (relax_points == 0)
2123 {
2124 if (num_threads > 1)
2125 {
2126 tmp_data = hypre_CTAlloc(double,n);
2127#define HYPRE_SMP_PRIVATE i
2128#include "../utilities/hypre_smp_forloop.h"
2129 for (i = 0; i < n; i++)
2130 tmp_data[i] = u_data[i];
2131#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2132#include "../utilities/hypre_smp_forloop.h"
2133 for (j = 0; j < num_threads; j++)
2134 {
2135 size = n/num_threads;
2136 rest = n - size*num_threads;
2137 if (j < rest)
2138 {
2139 ns = j*size+j;
2140 ne = (j+1)*size+j+1;
2141 }
2142 else
2143 {
2144 ns = j*size+rest;
2145 ne = (j+1)*size+rest;
2146 }
2147 for (i = ne-1; i > ns-1; i--) /* interior points first */
2148 {
2149
2150 /*-----------------------------------------------------------
2151 * If diagonal is nonzero, relax point i; otherwise, skip it.
2152 *-----------------------------------------------------------*/
2153
2154 if ( A_diag_data[A_diag_i[i]] != zero)
2155 {
2156 res = f_data[i];
2157 res0 = 0.0;
2158 res2 = 0.0;
2159 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2160 {
2161 ii = A_diag_j[jj];
2162 if (ii >= ns && ii < ne)
2163 {
2164 res0 -= A_diag_data[jj] * u_data[ii];
2165 res2 += A_diag_data[jj] * Vtemp_data[ii];
2166 }
2167 else
2168 res -= A_diag_data[jj] * tmp_data[ii];
2169 }
2170 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2171 {
2172 ii = A_offd_j[jj];
2173 res -= A_offd_data[jj] * Vext_data[ii];
2174 }
2175 u_data[i] *= prod;
2176 u_data[i] += relax_weight*(omega*res + res0 +
2177 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2178 /*u_data[i] += omega*(relax_weight*res + res0 +
2179 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2180 }
2181 }
2182 }
2183 hypre_TFree(tmp_data);
2184 }
2185 else
2186 {
2187 for (i = n-1; i > -1; i--) /* interior points first */
2188 {
2189
2190 /*-----------------------------------------------------------
2191 * If diagonal is nonzero, relax point i; otherwise, skip it.
2192 *-----------------------------------------------------------*/
2193
2194 if ( A_diag_data[A_diag_i[i]] != zero)
2195 {
2196 res0 = 0.0;
2197 res2 = 0.0;
2198 res = f_data[i];
2199 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2200 {
2201 ii = A_diag_j[jj];
2202 res0 -= A_diag_data[jj] * u_data[ii];
2203 res2 += A_diag_data[jj] * Vtemp_data[ii];
2204 }
2205 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2206 {
2207 ii = A_offd_j[jj];
2208 res -= A_offd_data[jj] * Vext_data[ii];
2209 }
2210 u_data[i] *= prod;
2211 u_data[i] += relax_weight*(omega*res + res0 +
2212 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2213 /*u_data[i] += omega*(relax_weight*res + res0 +
2214 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2215 }
2216 }
2217 }
2218 }
2219
2220 /*-----------------------------------------------------------------
2221 * Relax only C or F points as determined by relax_points.
2222 *-----------------------------------------------------------------*/
2223
2224 else
2225 {
2226 if (num_threads > 1)
2227 {
2228 tmp_data = hypre_CTAlloc(double,n);
2229#define HYPRE_SMP_PRIVATE i
2230#include "../utilities/hypre_smp_forloop.h"
2231 for (i = 0; i < n; i++)
2232 tmp_data[i] = u_data[i];
2233#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2234#include "../utilities/hypre_smp_forloop.h"
2235 for (j = 0; j < num_threads; j++)
2236 {
2237 size = n/num_threads;
2238 rest = n - size*num_threads;
2239 if (j < rest)
2240 {
2241 ns = j*size+j;
2242 ne = (j+1)*size+j+1;
2243 }
2244 else
2245 {
2246 ns = j*size+rest;
2247 ne = (j+1)*size+rest;
2248 }
2249 for (i = ne-1; i > ns-1; i--) /* relax interior points */
2250 {
2251
2252 /*-----------------------------------------------------------
2253 * If i is of the right type ( C or F ) and diagonal is
2254 * nonzero, relax point i; otherwise, skip it.
2255 *-----------------------------------------------------------*/
2256
2257 if (cf_marker[i] == relax_points
2258 && A_diag_data[A_diag_i[i]] != zero)
2259 {
2260 res0 = 0.0;
2261 res2 = 0.0;
2262 res = f_data[i];
2263 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2264 {
2265 ii = A_diag_j[jj];
2266 if (ii >= ns && ii < ne)
2267 {
2268 res0 -= A_diag_data[jj] * u_data[ii];
2269 res2 += A_diag_data[jj] * Vtemp_data[ii];
2270 }
2271 else
2272 res -= A_diag_data[jj] * tmp_data[ii];
2273 }
2274 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2275 {
2276 ii = A_offd_j[jj];
2277 res -= A_offd_data[jj] * Vext_data[ii];
2278 }
2279 u_data[i] *= prod;
2280 u_data[i] += relax_weight*(omega*res + res0 +
2281 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2282 /*u_data[i] += omega*(relax_weight*res + res0 +
2283 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2284 }
2285 }
2286 }
2287 hypre_TFree(tmp_data);
2288 }
2289 else
2290 {
2291 for (i = n-1; i > -1; i--) /* relax interior points */
2292 {
2293
2294 /*-----------------------------------------------------------
2295 * If i is of the right type ( C or F ) and diagonal is
2296
2297 * nonzero, relax point i; otherwise, skip it.
2298 *-----------------------------------------------------------*/
2299
2300 if (cf_marker[i] == relax_points
2301 && A_diag_data[A_diag_i[i]] != zero)
2302 {
2303 res = f_data[i];
2304 res0 = 0.0;
2305 res2 = 0.0;
2306 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2307 {
2308 ii = A_diag_j[jj];
2309 res0 -= A_diag_data[jj] * u_data[ii];
2310 res2 += A_diag_data[jj] * Vtemp_data[ii];
2311 }
2312 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2313 {
2314 ii = A_offd_j[jj];
2315 res -= A_offd_data[jj] * Vext_data[ii];
2316 }
2317 u_data[i] *= prod;
2318 u_data[i] += relax_weight*(omega*res + res0 +
2319 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2320 /*u_data[i] += omega*(relax_weight*res + res0 +
2321 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2322 }
2323 }
2324 }
2325 }
2326 }
2327 if (num_procs > 1)
2328 {
2329 hypre_TFree(Vext_data);
2330 hypre_TFree(v_buf_data);
2331 }
2332 }
2333 break;
2334
2335 case 6: /* Hybrid: Jacobi off-processor,
2336 Symm. Gauss-Seidel/ SSOR on-processor
2337 with outer relaxation parameter */
2338 {
2339
2340 Ztemp_local = hypre_ParVectorLocalVector(Ztemp);
2341 Ztemp_data = hypre_VectorData(Ztemp_local);
2342
2343 /*-----------------------------------------------------------------
2344 * Copy current approximation into temporary vector.
2345 *-----------------------------------------------------------------*/
2346 if (num_procs > 1)
2347 {
2348 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2349
2350 v_buf_data = hypre_CTAlloc(double,
2351 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
2352
2353 Vext_data = hypre_CTAlloc(double,num_cols_offd);
2354
2355 if (num_cols_offd)
2356 {
2357 A_offd_j = hypre_CSRMatrixJ(A_offd);
2358 A_offd_data = hypre_CSRMatrixData(A_offd);
2359 }
2360
2361 index = 0;
2362 for (i = 0; i < num_sends; i++)
2363 {
2364 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2365 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
2366 v_buf_data[index++]
2367 = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2368 }
2369
2370 comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
2371 Vext_data);
2372
2373 /*-----------------------------------------------------------------
2374 * Copy current approximation into temporary vector.
2375 *-----------------------------------------------------------------*/
2376 hypre_ParCSRCommHandleDestroy(comm_handle);
2377 comm_handle = NULL;
2378 }
2379
2380 /*-----------------------------------------------------------------
2381 * Relax all points.
2382 *-----------------------------------------------------------------*/
2383
2384 if (relax_weight == 1 && omega == 1)
2385 {
2386 if (relax_points == 0)
2387 {
2388 if (num_threads > 1)
2389 {
2390 /* tmp_data = hypre_CTAlloc(double,n); */
2391 tmp_data = Ztemp_data;
2392#define HYPRE_SMP_PRIVATE i
2393#include "../utilities/hypre_smp_forloop.h"
2394 for (i = 0; i < n; i++)
2395 tmp_data[i] = u_data[i];
2396#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2397#include "../utilities/hypre_smp_forloop.h"
2398 for (j = 0; j < num_threads; j++)
2399 {
2400 size = n/num_threads;
2401 rest = n - size*num_threads;
2402 if (j < rest)
2403 {
2404 ns = j*size+j;
2405 ne = (j+1)*size+j+1;
2406 }
2407 else
2408 {
2409 ns = j*size+rest;
2410 ne = (j+1)*size+rest;
2411 }
2412 for (i = ns; i < ne; i++) /* interior points first */
2413 {
2414
2415 /*-----------------------------------------------------------
2416 * If diagonal is nonzero, relax point i; otherwise, skip it.
2417 *-----------------------------------------------------------*/
2418
2419 if ( A_diag_data[A_diag_i[i]] != zero)
2420 {
2421 res = f_data[i];
2422 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2423 {
2424 ii = A_diag_j[jj];
2425 if (ii >= ns && ii < ne)
2426 {
2427 res -= A_diag_data[jj] * u_data[ii];
2428 }
2429 else
2430 res -= A_diag_data[jj] * tmp_data[ii];
2431 }
2432 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2433 {
2434 ii = A_offd_j[jj];
2435 res -= A_offd_data[jj] * Vext_data[ii];
2436 }
2437 u_data[i] = res / A_diag_data[A_diag_i[i]];
2438 }
2439 }
2440 for (i = ne-1; i > ns-1; i--) /* interior points first */
2441 {
2442
2443 /*-----------------------------------------------------------
2444 * If diagonal is nonzero, relax point i; otherwise, skip it.
2445 *-----------------------------------------------------------*/
2446
2447 if ( A_diag_data[A_diag_i[i]] != zero)
2448 {
2449 res = f_data[i];
2450 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2451 {
2452 ii = A_diag_j[jj];
2453 if (ii >= ns && ii < ne)
2454 {
2455 res -= A_diag_data[jj] * u_data[ii];
2456 }
2457 else
2458 res -= A_diag_data[jj] * tmp_data[ii];
2459 }
2460 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2461 {
2462 ii = A_offd_j[jj];
2463 res -= A_offd_data[jj] * Vext_data[ii];
2464 }
2465 u_data[i] = res / A_diag_data[A_diag_i[i]];
2466 }
2467 }
2468 }
2469 /* hypre_TFree(tmp_data); */
2470 }
2471 else
2472 {
2473 for (i = 0; i < n; i++) /* interior points first */
2474 {
2475
2476 /*-----------------------------------------------------------
2477 * If diagonal is nonzero, relax point i; otherwise, skip it.
2478 *-----------------------------------------------------------*/
2479
2480 if ( A_diag_data[A_diag_i[i]] != zero)
2481 {
2482 res = f_data[i];
2483 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2484 {
2485 ii = A_diag_j[jj];
2486 res -= A_diag_data[jj] * u_data[ii];
2487 }
2488 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2489 {
2490 ii = A_offd_j[jj];
2491 res -= A_offd_data[jj] * Vext_data[ii];
2492 }
2493 u_data[i] = res / A_diag_data[A_diag_i[i]];
2494 }
2495 }
2496 for (i = n-1; i > -1; i--) /* interior points first */
2497 {
2498
2499 /*-----------------------------------------------------------
2500 * If diagonal is nonzero, relax point i; otherwise, skip it.
2501 *-----------------------------------------------------------*/
2502
2503 if ( A_diag_data[A_diag_i[i]] != zero)
2504 {
2505 res = f_data[i];
2506 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2507 {
2508 ii = A_diag_j[jj];
2509 res -= A_diag_data[jj] * u_data[ii];
2510 }
2511 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2512 {
2513 ii = A_offd_j[jj];
2514 res -= A_offd_data[jj] * Vext_data[ii];
2515 }
2516 u_data[i] = res / A_diag_data[A_diag_i[i]];
2517 }
2518 }
2519 }
2520 }
2521
2522 /*-----------------------------------------------------------------
2523 * Relax only C or F points as determined by relax_points.
2524 *-----------------------------------------------------------------*/
2525
2526 else
2527 {
2528 if (num_threads > 1)
2529 {
2530 /* tmp_data = hypre_CTAlloc(double,n); */
2531 tmp_data = Ztemp_data;
2532#define HYPRE_SMP_PRIVATE i
2533#include "../utilities/hypre_smp_forloop.h"
2534 for (i = 0; i < n; i++)
2535 tmp_data[i] = u_data[i];
2536#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2537#include "../utilities/hypre_smp_forloop.h"
2538 for (j = 0; j < num_threads; j++)
2539 {
2540 size = n/num_threads;
2541 rest = n - size*num_threads;
2542 if (j < rest)
2543 {
2544 ns = j*size+j;
2545 ne = (j+1)*size+j+1;
2546 }
2547 else
2548 {
2549 ns = j*size+rest;
2550 ne = (j+1)*size+rest;
2551 }
2552 for (i = ns; i < ne; i++) /* relax interior points */
2553 {
2554
2555 /*-----------------------------------------------------------
2556 * If i is of the right type ( C or F ) and diagonal is
2557 * nonzero, relax point i; otherwise, skip it.
2558 *-----------------------------------------------------------*/
2559
2560 if (cf_marker[i] == relax_points
2561 && A_diag_data[A_diag_i[i]] != zero)
2562 {
2563 res = f_data[i];
2564 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2565 {
2566 ii = A_diag_j[jj];
2567 if (ii >= ns && ii < ne)
2568 {
2569 res -= A_diag_data[jj] * u_data[ii];
2570 }
2571 else
2572 res -= A_diag_data[jj] * tmp_data[ii];
2573 }
2574 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2575 {
2576 ii = A_offd_j[jj];
2577 res -= A_offd_data[jj] * Vext_data[ii];
2578 }
2579 u_data[i] = res / A_diag_data[A_diag_i[i]];
2580 }
2581 }
2582 for (i = ne-1; i > ns-1; i--) /* relax interior points */
2583 {
2584
2585 /*-----------------------------------------------------------
2586 * If i is of the right type ( C or F ) and diagonal is
2587 * nonzero, relax point i; otherwise, skip it.
2588 *-----------------------------------------------------------*/
2589
2590 if (cf_marker[i] == relax_points
2591 && A_diag_data[A_diag_i[i]] != zero)
2592 {
2593 res = f_data[i];
2594 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2595 {
2596 ii = A_diag_j[jj];
2597 if (ii >= ns && ii < ne)
2598 {
2599 res -= A_diag_data[jj] * u_data[ii];
2600 }
2601 else
2602 res -= A_diag_data[jj] * tmp_data[ii];
2603 }
2604 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2605 {
2606 ii = A_offd_j[jj];
2607 res -= A_offd_data[jj] * Vext_data[ii];
2608 }
2609 u_data[i] = res / A_diag_data[A_diag_i[i]];
2610 }
2611 }
2612 }
2613 /* hypre_TFree(tmp_data);*/
2614 }
2615 else
2616 {
2617 for (i = 0; i < n; i++) /* relax interior points */
2618 {
2619
2620 /*-----------------------------------------------------------
2621 * If i is of the right type ( C or F ) and diagonal is
2622
2623 * nonzero, relax point i; otherwise, skip it.
2624 *-----------------------------------------------------------*/
2625
2626 if (cf_marker[i] == relax_points
2627 && A_diag_data[A_diag_i[i]] != zero)
2628 {
2629 res = f_data[i];
2630 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2631 {
2632 ii = A_diag_j[jj];
2633 res -= A_diag_data[jj] * u_data[ii];
2634 }
2635 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2636 {
2637 ii = A_offd_j[jj];
2638 res -= A_offd_data[jj] * Vext_data[ii];
2639 }
2640 u_data[i] = res / A_diag_data[A_diag_i[i]];
2641 }
2642 }
2643 for (i = n-1; i > -1; i--) /* relax interior points */
2644 {
2645
2646 /*-----------------------------------------------------------
2647 * If i is of the right type ( C or F ) and diagonal is
2648
2649 * nonzero, relax point i; otherwise, skip it.
2650 *-----------------------------------------------------------*/
2651
2652 if (cf_marker[i] == relax_points
2653 && A_diag_data[A_diag_i[i]] != zero)
2654 {
2655 res = f_data[i];
2656 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2657 {
2658 ii = A_diag_j[jj];
2659 res -= A_diag_data[jj] * u_data[ii];
2660 }
2661 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2662 {
2663 ii = A_offd_j[jj];
2664 res -= A_offd_data[jj] * Vext_data[ii];
2665 }
2666 u_data[i] = res / A_diag_data[A_diag_i[i]];
2667 }
2668 }
2669 }
2670 }
2671 }
2672 else
2673 {
2674#define HYPRE_SMP_PRIVATE i
2675#include "../utilities/hypre_smp_forloop.h"
2676 for (i = 0; i < n; i++)
2677 {
2678 Vtemp_data[i] = u_data[i];
2679 }
2680 prod = (1.0-relax_weight*omega);
2681 if (relax_points == 0)
2682 {
2683 if (num_threads > 1)
2684 {
2685 /* tmp_data = hypre_CTAlloc(double,n); */
2686 tmp_data = Ztemp_data;
2687#define HYPRE_SMP_PRIVATE i
2688#include "../utilities/hypre_smp_forloop.h"
2689 for (i = 0; i < n; i++)
2690 tmp_data[i] = u_data[i];
2691#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2692#include "../utilities/hypre_smp_forloop.h"
2693 for (j = 0; j < num_threads; j++)
2694 {
2695 size = n/num_threads;
2696 rest = n - size*num_threads;
2697 if (j < rest)
2698 {
2699 ns = j*size+j;
2700 ne = (j+1)*size+j+1;
2701 }
2702 else
2703 {
2704 ns = j*size+rest;
2705 ne = (j+1)*size+rest;
2706 }
2707 for (i = ns; i < ne; i++) /* interior points first */
2708 {
2709
2710 /*-----------------------------------------------------------
2711 * If diagonal is nonzero, relax point i; otherwise, skip it.
2712 *-----------------------------------------------------------*/
2713
2714 if ( A_diag_data[A_diag_i[i]] != zero)
2715 {
2716 res0 = 0.0;
2717 res2 = 0.0;
2718 res = f_data[i];
2719 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2720 {
2721 ii = A_diag_j[jj];
2722 if (ii >= ns && ii < ne)
2723 {
2724 res0 -= A_diag_data[jj] * u_data[ii];
2725 res2 += A_diag_data[jj] * Vtemp_data[ii];
2726 }
2727 else
2728 res -= A_diag_data[jj] * tmp_data[ii];
2729 }
2730 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2731 {
2732 ii = A_offd_j[jj];
2733 res -= A_offd_data[jj] * Vext_data[ii];
2734 }
2735 u_data[i] *= prod;
2736 u_data[i] += relax_weight*(omega*res + res0 +
2737 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2738 /*u_data[i] += omega*(relax_weight*res + res0 +
2739 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2740 }
2741 }
2742 for (i = ne-1; i > ns-1; i--) /* interior points first */
2743 {
2744
2745 /*-----------------------------------------------------------
2746 * If diagonal is nonzero, relax point i; otherwise, skip it.
2747 *-----------------------------------------------------------*/
2748
2749 if ( A_diag_data[A_diag_i[i]] != zero)
2750 {
2751 res0 = 0.0;
2752 res2 = 0.0;
2753 res = f_data[i];
2754 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2755 {
2756 ii = A_diag_j[jj];
2757 if (ii >= ns && ii < ne)
2758 {
2759 res0 -= A_diag_data[jj] * u_data[ii];
2760 res2 += A_diag_data[jj] * Vtemp_data[ii];
2761 }
2762 else
2763 res -= A_diag_data[jj] * tmp_data[ii];
2764 }
2765 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2766 {
2767 ii = A_offd_j[jj];
2768 res -= A_offd_data[jj] * Vext_data[ii];
2769 }
2770 u_data[i] *= prod;
2771 u_data[i] += relax_weight*(omega*res + res0 +
2772 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2773 /*u_data[i] += omega*(relax_weight*res + res0 +
2774 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2775 }
2776 }
2777 }
2778 /* hypre_TFree(tmp_data); */
2779 }
2780 else
2781 {
2782 for (i = 0; i < n; i++) /* interior points first */
2783 {
2784
2785 /*-----------------------------------------------------------
2786 * If diagonal is nonzero, relax point i; otherwise, skip it.
2787 *-----------------------------------------------------------*/
2788
2789 if ( A_diag_data[A_diag_i[i]] != zero)
2790 {
2791 res0 = 0.0;
2792 res = f_data[i];
2793 res2 = 0.0;
2794 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2795 {
2796 ii = A_diag_j[jj];
2797 res0 -= A_diag_data[jj] * u_data[ii];
2798 res2 += A_diag_data[jj] * Vtemp_data[ii];
2799 }
2800 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2801 {
2802 ii = A_offd_j[jj];
2803 res -= A_offd_data[jj] * Vext_data[ii];
2804 }
2805 u_data[i] *= prod;
2806 u_data[i] += relax_weight*(omega*res + res0 +
2807 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2808 /*u_data[i] += omega*(relax_weight*res + res0 +
2809 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2810 }
2811 }
2812 for (i = n-1; i > -1; i--) /* interior points first */
2813 {
2814
2815 /*-----------------------------------------------------------
2816 * If diagonal is nonzero, relax point i; otherwise, skip it.
2817 *-----------------------------------------------------------*/
2818
2819 if ( A_diag_data[A_diag_i[i]] != zero)
2820 {
2821 res0 = 0.0;
2822 res = f_data[i];
2823 res2 = 0.0;
2824 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2825 {
2826 ii = A_diag_j[jj];
2827 res0 -= A_diag_data[jj] * u_data[ii];
2828 res2 += A_diag_data[jj] * Vtemp_data[ii];
2829 }
2830 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2831 {
2832 ii = A_offd_j[jj];
2833 res -= A_offd_data[jj] * Vext_data[ii];
2834 }
2835 u_data[i] *= prod;
2836 u_data[i] += relax_weight*(omega*res + res0 +
2837 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2838 /*u_data[i] += omega*(relax_weight*res + res0 +
2839 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2840 }
2841 }
2842 }
2843 }
2844
2845 /*-----------------------------------------------------------------
2846 * Relax only C or F points as determined by relax_points.
2847 *-----------------------------------------------------------------*/
2848
2849 else
2850 {
2851 if (num_threads > 1)
2852 {
2853 /* tmp_data = hypre_CTAlloc(double,n); */
2854 tmp_data = Ztemp_data;
2855#define HYPRE_SMP_PRIVATE i
2856#include "../utilities/hypre_smp_forloop.h"
2857 for (i = 0; i < n; i++)
2858 tmp_data[i] = u_data[i];
2859#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2860#include "../utilities/hypre_smp_forloop.h"
2861 for (j = 0; j < num_threads; j++)
2862 {
2863 size = n/num_threads;
2864 rest = n - size*num_threads;
2865 if (j < rest)
2866 {
2867 ns = j*size+j;
2868 ne = (j+1)*size+j+1;
2869 }
2870 else
2871 {
2872 ns = j*size+rest;
2873 ne = (j+1)*size+rest;
2874 }
2875 for (i = ns; i < ne; i++) /* relax interior points */
2876 {
2877
2878 /*-----------------------------------------------------------
2879 * If i is of the right type ( C or F ) and diagonal is
2880 * nonzero, relax point i; otherwise, skip it.
2881 *-----------------------------------------------------------*/
2882
2883 if (cf_marker[i] == relax_points
2884 && A_diag_data[A_diag_i[i]] != zero)
2885 {
2886 res0 = 0.0;
2887 res2 = 0.0;
2888 res = f_data[i];
2889 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2890 {
2891 ii = A_diag_j[jj];
2892 if (ii >= ns && ii < ne)
2893 {
2894 res2 += A_diag_data[jj] * Vtemp_data[ii];
2895 res0 -= A_diag_data[jj] * u_data[ii];
2896 }
2897 else
2898 res -= A_diag_data[jj] * tmp_data[ii];
2899 }
2900 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2901 {
2902 ii = A_offd_j[jj];
2903 res -= A_offd_data[jj] * Vext_data[ii];
2904 }
2905 u_data[i] *= prod;
2906 u_data[i] += relax_weight*(omega*res + res0 +
2907 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2908 /*u_data[i] += omega*(relax_weight*res + res0 +
2909 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2910 }
2911 }
2912 for (i = ne-1; i > ns-1; i--) /* relax interior points */
2913 {
2914
2915 /*-----------------------------------------------------------
2916 * If i is of the right type ( C or F ) and diagonal is
2917 * nonzero, relax point i; otherwise, skip it.
2918 *-----------------------------------------------------------*/
2919
2920 if (cf_marker[i] == relax_points
2921 && A_diag_data[A_diag_i[i]] != zero)
2922 {
2923 res0 = 0.0;
2924 res2 = 0.0;
2925 res = f_data[i];
2926 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2927 {
2928 ii = A_diag_j[jj];
2929 if (ii >= ns && ii < ne)
2930 {
2931 res2 += A_diag_data[jj] * Vtemp_data[ii];
2932 res0 -= A_diag_data[jj] * u_data[ii];
2933 }
2934 else
2935 res -= A_diag_data[jj] * tmp_data[ii];
2936 }
2937 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2938 {
2939 ii = A_offd_j[jj];
2940 res -= A_offd_data[jj] * Vext_data[ii];
2941 }
2942 u_data[i] *= prod;
2943 u_data[i] += relax_weight*(omega*res + res0 +
2944 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2945 /*u_data[i] += omega*(relax_weight*res + res0 +
2946 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2947 }
2948 }
2949 }
2950 /* hypre_TFree(tmp_data); */
2951 }
2952 else
2953 {
2954 for (i = 0; i < n; i++) /* relax interior points */
2955 {
2956
2957 /*-----------------------------------------------------------
2958 * If i is of the right type ( C or F ) and diagonal is
2959
2960 * nonzero, relax point i; otherwise, skip it.
2961 *-----------------------------------------------------------*/
2962
2963 if (cf_marker[i] == relax_points
2964 && A_diag_data[A_diag_i[i]] != zero)
2965 {
2966 res = f_data[i];
2967 res0 = 0.0;
2968 res2 = 0.0;
2969 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2970 {
2971 ii = A_diag_j[jj];
2972 res0 -= A_diag_data[jj] * u_data[ii];
2973 res2 += A_diag_data[jj] * Vtemp_data[ii];
2974 }
2975 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2976 {
2977 ii = A_offd_j[jj];
2978 res -= A_offd_data[jj] * Vext_data[ii];
2979 }
2980 u_data[i] *= prod;
2981 u_data[i] += relax_weight*(omega*res + res0 +
2982 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
2983 /*u_data[i] += omega*(relax_weight*res + res0 +
2984 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
2985 }
2986 }
2987 for (i = n-1; i > -1; i--) /* relax interior points */
2988 {
2989
2990 /*-----------------------------------------------------------
2991 * If i is of the right type ( C or F ) and diagonal is
2992
2993 * nonzero, relax point i; otherwise, skip it.
2994 *-----------------------------------------------------------*/
2995
2996 if (cf_marker[i] == relax_points
2997 && A_diag_data[A_diag_i[i]] != zero)
2998 {
2999 res = f_data[i];
3000 res0 = 0.0;
3001 res2 = 0.0;
3002 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
3003 {
3004 ii = A_diag_j[jj];
3005 res0 -= A_diag_data[jj] * u_data[ii];
3006 res2 += A_diag_data[jj] * Vtemp_data[ii];
3007 }
3008 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
3009 {
3010 ii = A_offd_j[jj];
3011 res -= A_offd_data[jj] * Vext_data[ii];
3012 }
3013 u_data[i] *= prod;
3014 u_data[i] += relax_weight*(omega*res + res0 +
3015 one_minus_omega*res2) / A_diag_data[A_diag_i[i]];
3016 /*u_data[i] += omega*(relax_weight*res + res0 +
3017 one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/
3018 }
3019 }
3020 }
3021 }
3022 }
3023 if (num_procs > 1)
3024 {
3025 hypre_TFree(Vext_data);
3026 hypre_TFree(v_buf_data);
3027 }
3028 }
3029 break;
3030
3031 case 7: /* Jacobi (uses ParMatvec) */
3032 {
3033
3034 /*-----------------------------------------------------------------
3035 * Copy f into temporary vector.
3036 *-----------------------------------------------------------------*/
3037
3038 hypre_ParVectorCopy(f,Vtemp);
3039
3040 /*-----------------------------------------------------------------
3041 * Perform Matvec Vtemp=f-Au
3042 *-----------------------------------------------------------------*/
3043
3044 hypre_ParCSRMatrixMatvec(-1.0,A, u, 1.0, Vtemp);
3045 for (i = 0; i < n; i++)
3046 {
3047
3048 /*-----------------------------------------------------------
3049 * If diagonal is nonzero, relax point i; otherwise, skip it.
3050 *-----------------------------------------------------------*/
3051
3052 if (A_diag_data[A_diag_i[i]] != zero)
3053 {
3054 u_data[i] += relax_weight * Vtemp_data[i]
3055 / A_diag_data[A_diag_i[i]];
3056 }
3057 }
3058 }
3059 break;
3060
3061 case 9: /* Direct solve: use gaussian elimination */
3062 {
3063 int n_global = (int)global_num_rows;
3064 /*-----------------------------------------------------------------
3065 * Generate CSR matrix from ParCSRMatrix A
3066 *-----------------------------------------------------------------*/
3067#ifdef HYPRE_NO_GLOBAL_PARTITION
3068 /* all processors are needed for these routines */
3069 A_CSR = hypre_ParCSRMatrixToCSRMatrixAll(A);
3070 f_vector = hypre_ParVectorToVectorAll(f);
3071 if (n)
3072 {
3073
3074#else
3075 if (n)
3076 {
3077 A_CSR = hypre_ParCSRMatrixToCSRMatrixAll(A);
3078 f_vector = hypre_ParVectorToVectorAll(f);
3079#endif
3080 A_CSR_i = hypre_CSRMatrixI(A_CSR);
3081 A_CSR_j = hypre_CSRMatrixJ(A_CSR);
3082 A_CSR_data = hypre_CSRMatrixData(A_CSR);
3083 f_vector_data = hypre_VectorData(f_vector);
3084
3085 A_mat = hypre_CTAlloc(double, n_global*n_global);
3086 b_vec = hypre_CTAlloc(double, n_global);
3087
3088 /*---------------------------------------------------------------
3089 * Load CSR matrix into A_mat.
3090 *---------------------------------------------------------------*/
3091
3092 for (i = 0; i < n_global; i++)
3093 {
3094 for (jj = A_CSR_i[i]; jj < A_CSR_i[i+1]; jj++)
3095 {
3096 column = A_CSR_j[jj];
3097 A_mat[i*n_global+column] = A_CSR_data[jj];
3098 }
3099 b_vec[i] = f_vector_data[i];
3100 }
3101
3102 relax_error = gselim(A_mat,b_vec,n_global);
3103
3104 for (i = 0; i < n; i++)
3105 {
3106 u_data[i] = b_vec[first_index+i];
3107 }
3108
3109 hypre_TFree(A_mat);
3110 hypre_TFree(b_vec);
3111 hypre_CSRMatrixDestroy(A_CSR);
3112 A_CSR = NULL;
3113 hypre_SeqVectorDestroy(f_vector);
3114 f_vector = NULL;
3115
3116 }
3117#ifdef HYPRE_NO_GLOBAL_PARTITION
3118 else
3119 {
3120
3121 hypre_CSRMatrixDestroy(A_CSR);
3122 A_CSR = NULL;
3123 hypre_SeqVectorDestroy(f_vector);
3124 f_vector = NULL;
3125 }
3126#endif
3127
3128 }
3129 break;
3130
3131 }
3132
3133 return(relax_error);
3134}
3135
3136/*-------------------------------------------------------------------------
3137 *
3138 * Gaussian Elimination
3139 *
3140 *------------------------------------------------------------------------ */
3141
3142int gselim(A,x,n)
3143double *A;
3144double *x;
3145int n;
3146{
3147 int err_flag = 0;
3148 int j,k,m;
3149 double factor;
3150
3151 if (n==1) /* A is 1x1 */
3152 {
3153 if (A[0] != 0.0)
3154 {
3155 x[0] = x[0]/A[0];
3156 return(err_flag);
3157 }
3158 else
3159 {
3160 err_flag = 1;
3161 return(err_flag);
3162 }
3163 }
3164 else /* A is nxn. Forward elimination */
3165 {
3166 for (k = 0; k < n-1; k++)
3167 {
3168 if (A[k*n+k] != 0.0)
3169 {
3170 for (j = k+1; j < n; j++)
3171 {
3172 if (A[j*n+k] != 0.0)
3173 {
3174 factor = A[j*n+k]/A[k*n+k];
3175 for (m = k+1; m < n; m++)
3176 {
3177 A[j*n+m] -= factor * A[k*n+m];
3178 }
3179 /* Elimination step for rhs */
3180 x[j] -= factor * x[k];
3181 }
3182 }
3183 }
3184 }
3185 /* Back Substitution */
3186 for (k = n-1; k > 0; --k)
3187 {
3188 x[k] /= A[k*n+k];
3189 for (j = 0; j < k; j++)
3190 {
3191 if (A[j*n+k] != 0.0)
3192 {
3193 x[j] -= x[k] * A[j*n+k];
3194 }
3195 }
3196 }
3197 x[0] /= A[0];
3198 return(err_flag);
3199 }
3200}
3201
3202
3203
3204
3205
Note: See TracBrowser for help on using the repository browser.