source: CIVL/examples/mpi-omp/AMG2013/parcsr_ls/par_interp.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 49.5 KB
RevLine 
[2aa6644]1/*BHEADER**********************************************************************
2 * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3 * Produced at the Lawrence Livermore National Laboratory.
4 * This file is part of HYPRE. See file COPYRIGHT for details.
5 *
6 * HYPRE is free software; you can redistribute it and/or modify it under the
7 * terms of the GNU Lesser General Public License (as published by the Free
8 * Software Foundation) version 2.1 dated February 1999.
9 *
10 * $Revision: 2.4 $
11 ***********************************************************************EHEADER*/
12
13
14
15#include "headers.h"
16
17/*---------------------------------------------------------------------------
18 * hypre_BoomerAMGBuildInterp
19 *--------------------------------------------------------------------------*/
20
21int
22hypre_BoomerAMGBuildInterp( hypre_ParCSRMatrix *A,
23 int *CF_marker,
24 hypre_ParCSRMatrix *S,
25 HYPRE_BigInt *num_cpts_global,
26 int num_functions,
27 int *dof_func,
28 int debug_flag,
29 double trunc_factor,
30 int max_elmts,
31 int *col_offd_S_to_A,
32 hypre_ParCSRMatrix **P_ptr)
33{
34
35 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
36 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
37 hypre_ParCSRCommHandle *comm_handle;
38
39 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
40 double *A_diag_data = hypre_CSRMatrixData(A_diag);
41 int *A_diag_i = hypre_CSRMatrixI(A_diag);
42 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
43
44 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
45 double *A_offd_data = hypre_CSRMatrixData(A_offd);
46 int *A_offd_i = hypre_CSRMatrixI(A_offd);
47 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
48 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
49
50 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
51 int *S_diag_i = hypre_CSRMatrixI(S_diag);
52 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
53
54 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
55 int *S_offd_i = hypre_CSRMatrixI(S_offd);
56 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
57
58 hypre_ParCSRMatrix *P;
59 HYPRE_BigInt *col_map_offd_P = NULL;
60 int *tmp_map_offd = NULL;
61
62 int *CF_marker_offd = NULL;
63 int *dof_func_offd = NULL;
64
65 hypre_CSRMatrix *A_ext;
66
67 double *A_ext_data;
68 int *A_ext_i;
69 int *A_ext_j;
70
71 hypre_CSRMatrix *P_diag;
72 hypre_CSRMatrix *P_offd;
73
74 double *P_diag_data = NULL;
75 int *P_diag_i;
76 int *P_diag_j = NULL;
77 double *P_offd_data = NULL;
78 int *P_offd_i;
79 int *P_offd_j = NULL;
80
81 int P_diag_size, P_offd_size;
82
83 int *P_marker = NULL;
84 int *P_marker_offd = NULL;
85
86 int jj_counter,jj_counter_offd;
87 int *jj_count, *jj_count_offd;
88 int jj_begin_row,jj_begin_row_offd;
89 int jj_end_row,jj_end_row_offd;
90
91 int start_indexing = 0; /* start indexing for P_data at 0 */
92
93 int n_fine = hypre_CSRMatrixNumRows(A_diag);
94
95 int strong_f_marker;
96
97 int *fine_to_coarse;
98 /*int *fine_to_coarse_offd;*/
99 int *coarse_counter;
100 int coarse_shift;
101 HYPRE_BigInt total_global_cpts;
102 HYPRE_BigInt my_first_cpt;
103 int num_cols_P_offd;
104
105 int i,i1,i2;
106 int j,jl,jj,jj1;
107 int start;
108 int sgn;
109 int c_num;
110
111 double diagonal;
112 double sum;
113 double distribute;
114
115 double zero = 0.0;
116 double one = 1.0;
117
118 int my_id;
119 int num_procs;
[e0b7443]120 int num_threadsID;
[2aa6644]121 int num_sends;
122 int index;
123 int ns, ne, size, rest;
124 int *int_buf_data = NULL;
125
126 double wall_time; /* for debugging instrumentation */
127
128 MPI_Comm_size(comm, &num_procs);
129 MPI_Comm_rank(comm,&my_id);
[e0b7443]130 num_threadsID = hypre_NumThreads();
[2aa6644]131
132
133#ifdef HYPRE_NO_GLOBAL_PARTITION
134 my_first_cpt = num_cpts_global[0];
135 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
136 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
137#else
138 my_first_cpt = num_cpts_global[my_id];
139 total_global_cpts = num_cpts_global[num_procs];
140#endif
141
142 /*-------------------------------------------------------------------
143 * Get the CF_marker data for the off-processor columns
144 *-------------------------------------------------------------------*/
145
146 if (debug_flag==4) wall_time = time_getWallclockSeconds();
147
148 if (num_cols_A_offd) CF_marker_offd = hypre_CTAlloc(int, num_cols_A_offd);
149 if (num_functions > 1 && num_cols_A_offd)
150 dof_func_offd = hypre_CTAlloc(int, num_cols_A_offd);
151
152 if (!comm_pkg)
153 {
154#ifdef HYPRE_NO_GLOBAL_PARTITION
155 hypre_NewCommPkgCreate(A);
156#else
157 hypre_MatvecCommPkgCreate(A);
158#endif
159 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
160 }
161
162 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
163 if (hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends))
164 int_buf_data = hypre_CTAlloc(int,
165 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
166
167 index = 0;
168 for (i = 0; i < num_sends; i++)
169 {
170 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
171 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
172 int_buf_data[index++]
173 = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
174 }
175
176 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
177 CF_marker_offd);
178
179 hypre_ParCSRCommHandleDestroy(comm_handle);
180 if (num_functions > 1)
181 {
182 index = 0;
183 for (i = 0; i < num_sends; i++)
184 {
185 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
186 for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
187 int_buf_data[index++]
188 = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
189 }
190
191 comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
192 dof_func_offd);
193
194 hypre_ParCSRCommHandleDestroy(comm_handle);
195 }
196
197 if (debug_flag==4)
198 {
199 wall_time = time_getWallclockSeconds() - wall_time;
200 printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
201 my_id, wall_time);
202 fflush(NULL);
203 }
204
205 /*----------------------------------------------------------------------
206 * Get the ghost rows of A
207 *---------------------------------------------------------------------*/
208
209 if (debug_flag==4) wall_time = time_getWallclockSeconds();
210
211 if (num_procs > 1)
212 {
213 A_ext = hypre_ParCSRMatrixExtractConvBExt(A,A,1);
214 A_ext_i = hypre_CSRMatrixI(A_ext);
215 A_ext_j = hypre_CSRMatrixJ(A_ext);
216 A_ext_data = hypre_CSRMatrixData(A_ext);
217 }
218
219 /*index = 0;
220 for (i=0; i < num_cols_A_offd; i++)
221 {
222 for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
223 {
224 k = A_ext_j[j];
225 if (k >= col_1 && k < col_n)
226 {
227 A_ext_j[index] = k - col_1;
228 A_ext_data[index++] = A_ext_data[j];
229 }
230 else
231 {
232 kc = hypre_BinarySearch(col_map_offd,k,num_cols_A_offd);
233 if (kc > -1)
234 {
235 A_ext_j[index] = -kc-1;
236 A_ext_data[index++] = A_ext_data[j];
237 }
238 }
239 }
240 A_ext_i[i] = index;
241 }
242 for (i = num_cols_A_offd; i > 0; i--)
243 A_ext_i[i] = A_ext_i[i-1];
244 if (num_procs > 1) A_ext_i[0] = 0;*/
245
246 if (debug_flag==4)
247 {
248 wall_time = time_getWallclockSeconds() - wall_time;
249 printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
250 my_id, wall_time);
251 fflush(NULL);
252 }
253
254
255 /*-----------------------------------------------------------------------
256 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
257 *-----------------------------------------------------------------------*/
258
259 /*-----------------------------------------------------------------------
260 * Intialize counters and allocate mapping vector.
261 *-----------------------------------------------------------------------*/
262
[e0b7443]263 coarse_counter = hypre_CTAlloc(int, num_threadsID);
264 jj_count = hypre_CTAlloc(int, num_threadsID);
265 jj_count_offd = hypre_CTAlloc(int, num_threadsID);
[2aa6644]266
267 fine_to_coarse = hypre_CTAlloc(int, n_fine);
268/*#define HYPRE_SMP_PRIVATE i
269#include "../utilities/hypre_smp_forloop.h"*/
270 for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
271
272 jj_counter = start_indexing;
273 jj_counter_offd = start_indexing;
274
275 /*-----------------------------------------------------------------------
276 * Loop over fine grid.
277 *-----------------------------------------------------------------------*/
278
279/* RDF: this looks a little tricky, but doable */
280#define HYPRE_SMP_PRIVATE i,j,i1,jj,ns,ne,size,rest
281#include "../utilities/hypre_smp_forloop.h"
[e0b7443]282 for (j = 0; j < num_threadsID; j++)
[2aa6644]283 {
[e0b7443]284 size = n_fine/num_threadsID;
285 rest = n_fine - size*num_threadsID;
[2aa6644]286 if (j < rest)
287 {
288 ns = j*size+j;
289 ne = (j+1)*size+j+1;
290 }
291 else
292 {
293 ns = j*size+rest;
294 ne = (j+1)*size+rest;
295 }
296 for (i = ns; i < ne; i++)
297 {
298
299 /*--------------------------------------------------------------------
300 * If i is a C-point, interpolation is the identity. Also set up
301 * mapping vector.
302 *--------------------------------------------------------------------*/
303
304 if (CF_marker[i] >= 0)
305 {
306 jj_count[j]++;
307 fine_to_coarse[i] = coarse_counter[j];
308 coarse_counter[j]++;
309 }
310
311 /*--------------------------------------------------------------------
312 * If i is an F-point, interpolation is from the C-points that
313 * strongly influence i.
314 *--------------------------------------------------------------------*/
315
316 else
317 {
318 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
319 {
320 i1 = S_diag_j[jj];
321 if (CF_marker[i1] >= 0)
322 {
323 jj_count[j]++;
324 }
325 }
326
327 if (num_procs > 1)
328 {
329 if (col_offd_S_to_A)
330 {
331 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
332 {
333 i1 = col_offd_S_to_A[S_offd_j[jj]];
334 if (CF_marker_offd[i1] >= 0)
335 {
336 jj_count_offd[j]++;
337 }
338 }
339 }
340 else
341 {
342 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
343 {
344 i1 = S_offd_j[jj];
345 if (CF_marker_offd[i1] >= 0)
346 {
347 jj_count_offd[j]++;
348 }
349 }
350 }
351 }
352 }
353 }
354 }
355
356 /*-----------------------------------------------------------------------
357 * Allocate arrays.
358 *-----------------------------------------------------------------------*/
359
[e0b7443]360 for (i=0; i < num_threadsID-1; i++)
[2aa6644]361 {
362 coarse_counter[i+1] += coarse_counter[i];
363 jj_count[i+1] += jj_count[i];
364 jj_count_offd[i+1] += jj_count_offd[i];
365 }
[e0b7443]366 i = num_threadsID-1;
[2aa6644]367 jj_counter = jj_count[i];
368 jj_counter_offd = jj_count_offd[i];
369
370 P_diag_size = jj_counter;
371
372 P_diag_i = hypre_CTAlloc(int, n_fine+1);
373 if (P_diag_size)
374 {
375 P_diag_j = hypre_CTAlloc(int, P_diag_size);
376 P_diag_data = hypre_CTAlloc(double, P_diag_size);
377 }
378
379 P_diag_i[n_fine] = jj_counter;
380
381
382 P_offd_size = jj_counter_offd;
383
384 P_offd_i = hypre_CTAlloc(int, n_fine+1);
385 if (P_offd_size)
386 {
387 P_offd_j = hypre_CTAlloc(int, P_offd_size);
388 P_offd_data = hypre_CTAlloc(double, P_offd_size);
389 }
390 /*-----------------------------------------------------------------------
391 * Intialize some stuff.
392 *-----------------------------------------------------------------------*/
393
394 jj_counter = start_indexing;
395 jj_counter_offd = start_indexing;
396
397 if (debug_flag==4)
398 {
399 wall_time = time_getWallclockSeconds() - wall_time;
400 printf("Proc = %d Interp: Internal work 1 = %f\n",
401 my_id, wall_time);
402 fflush(NULL);
403 }
404
405 /*-----------------------------------------------------------------------
406 * Send and receive fine_to_coarse info.
407 *-----------------------------------------------------------------------*/
408
409 if (debug_flag==4) wall_time = time_getWallclockSeconds();
410
411 /*fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd); */
412
413#define HYPRE_SMP_PRIVATE i,j,ns,ne,size,rest,coarse_shift
414#include "../utilities/hypre_smp_forloop.h"
[e0b7443]415 for (j = 0; j < num_threadsID; j++)
[2aa6644]416 {
417 coarse_shift = 0;
418 if (j > 0) coarse_shift = coarse_counter[j-1];
[e0b7443]419 size = n_fine/num_threadsID;
420 rest = n_fine - size*num_threadsID;
[2aa6644]421 if (j < rest)
422 {
423 ns = j*size+j;
424 ne = (j+1)*size+j+1;
425 }
426 else
427 {
428 ns = j*size+rest;
429 ne = (j+1)*size+rest;
430 }
431 for (i = ns; i < ne; i++)
432 fine_to_coarse[i] += coarse_shift;
433 }
434
435/* index = 0;
436 for (i = 0; i < num_sends; i++)
437 {
438 start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
439 for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
440 big_buf_data[index++] = my_first_cpt+
441 (HYPRE_BigInt)fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
442 }
443
444 comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data,
445 fine_to_coarse_offd);
446
447 hypre_ParCSRCommHandleDestroy(comm_handle);
448
449 if (debug_flag==4)
450 {
451 wall_time = time_getWallclockSeconds() - wall_time;
452 printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
453 my_id, wall_time);
454 fflush(NULL);
455 }
456*/
457 if (debug_flag==4) wall_time = time_getWallclockSeconds();
458
459 /*-----------------------------------------------------------------------
460 * Loop over fine grid points.
461 *-----------------------------------------------------------------------*/
462
463#define HYPRE_SMP_PRIVATE i,j,jl,i1,i2,jj,jj1,ns,ne,size,rest,sum,diagonal,distribute,P_marker,P_marker_offd,strong_f_marker,jj_counter,jj_counter_offd,sgn,c_num,jj_begin_row,jj_end_row,jj_begin_row_offd,jj_end_row_offd
464#include "../utilities/hypre_smp_forloop.h"
[e0b7443]465 for (jl = 0; jl < num_threadsID; jl++)
[2aa6644]466 {
[e0b7443]467 size = n_fine/num_threadsID;
468 rest = n_fine - size*num_threadsID;
[2aa6644]469 if (jl < rest)
470 {
471 ns = jl*size+jl;
472 ne = (jl+1)*size+jl+1;
473 }
474 else
475 {
476 ns = jl*size+rest;
477 ne = (jl+1)*size+rest;
478 }
479 jj_counter = 0;
480 if (jl > 0) jj_counter = jj_count[jl-1];
481 jj_counter_offd = 0;
482 if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
483
484 if (n_fine) P_marker = hypre_CTAlloc(int, n_fine);
485 else P_marker = NULL;
486 if (num_cols_A_offd) P_marker_offd = hypre_CTAlloc(int, num_cols_A_offd);
487 else P_marker_offd = NULL;
488
489 for (i = 0; i < n_fine; i++)
490 {
491 P_marker[i] = -1;
492 }
493 for (i = 0; i < num_cols_A_offd; i++)
494 {
495 P_marker_offd[i] = -1;
496 }
497 strong_f_marker = -2;
498
499 for (i = ns; i < ne; i++)
500 {
501
502 /*--------------------------------------------------------------------
503 * If i is a c-point, interpolation is the identity.
504 *--------------------------------------------------------------------*/
505
506 if (CF_marker[i] >= 0)
507 {
508 P_diag_i[i] = jj_counter;
509 P_diag_j[jj_counter] = fine_to_coarse[i];
510 P_diag_data[jj_counter] = one;
511 jj_counter++;
512 }
513
514 /*--------------------------------------------------------------------
515 * If i is an F-point, build interpolation.
516 *--------------------------------------------------------------------*/
517
518 else
519 {
520 /* Diagonal part of P */
521 P_diag_i[i] = jj_counter;
522 jj_begin_row = jj_counter;
523
524 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
525 {
526 i1 = S_diag_j[jj];
527
528 /*--------------------------------------------------------------
529 * If neighbor i1 is a C-point, set column number in P_diag_j
530 * and initialize interpolation weight to zero.
531 *--------------------------------------------------------------*/
532
533 if (CF_marker[i1] >= 0)
534 {
535 P_marker[i1] = jj_counter;
536 P_diag_j[jj_counter] = fine_to_coarse[i1];
537 P_diag_data[jj_counter] = zero;
538 jj_counter++;
539 }
540
541 /*--------------------------------------------------------------
542 * If neighbor i1 is an F-point, mark it as a strong F-point
543 * whose connection needs to be distributed.
544 *--------------------------------------------------------------*/
545
546 else if (CF_marker[i1] != -3)
547 {
548 P_marker[i1] = strong_f_marker;
549 }
550 }
551 jj_end_row = jj_counter;
552
553 /* Off-Diagonal part of P */
554 P_offd_i[i] = jj_counter_offd;
555 jj_begin_row_offd = jj_counter_offd;
556
557
558 if (num_procs > 1)
559 {
560 if (col_offd_S_to_A)
561 {
562 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
563 {
564 i1 = col_offd_S_to_A[S_offd_j[jj]];
565
566 /*-----------------------------------------------------------
567 * If neighbor i1 is a C-point, set column number in P_offd_j
568 * and initialize interpolation weight to zero.
569 *-----------------------------------------------------------*/
570
571 if (CF_marker_offd[i1] >= 0)
572 {
573 P_marker_offd[i1] = jj_counter_offd;
574 /*P_offd_j[jj_counter_offd] = fine_to_coarse_offd[i1];*/
575 P_offd_j[jj_counter_offd] = i1;
576 P_offd_data[jj_counter_offd] = zero;
577 jj_counter_offd++;
578 }
579
580 /*-----------------------------------------------------------
581 * If neighbor i1 is an F-point, mark it as a strong F-point
582 * whose connection needs to be distributed.
583 *-----------------------------------------------------------*/
584
585 else if (CF_marker_offd[i1] != -3)
586 {
587 P_marker_offd[i1] = strong_f_marker;
588 }
589 }
590 }
591 else
592 {
593 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
594 {
595 i1 = S_offd_j[jj];
596
597 /*-----------------------------------------------------------
598 * If neighbor i1 is a C-point, set column number in P_offd_j
599 * and initialize interpolation weight to zero.
600 *-----------------------------------------------------------*/
601
602 if (CF_marker_offd[i1] >= 0)
603 {
604 P_marker_offd[i1] = jj_counter_offd;
605 /*P_offd_j[jj_counter_offd] = fine_to_coarse_offd[i1];*/
606 P_offd_j[jj_counter_offd] = i1;
607 P_offd_data[jj_counter_offd] = zero;
608 jj_counter_offd++;
609 }
610
611 /*-----------------------------------------------------------
612 * If neighbor i1 is an F-point, mark it as a strong F-point
613 * whose connection needs to be distributed.
614 *-----------------------------------------------------------*/
615
616 else if (CF_marker_offd[i1] != -3)
617 {
618 P_marker_offd[i1] = strong_f_marker;
619 }
620 }
621 }
622 }
623
624 jj_end_row_offd = jj_counter_offd;
625
626 diagonal = A_diag_data[A_diag_i[i]];
627
628
629 /* Loop over ith row of A. First, the diagonal part of A */
630
631 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
632 {
633 i1 = A_diag_j[jj];
634
635 /*--------------------------------------------------------------
636 * Case 1: neighbor i1 is a C-point and strongly influences i,
637 * accumulate a_{i,i1} into the interpolation weight.
638 *--------------------------------------------------------------*/
639
640 if (P_marker[i1] >= jj_begin_row)
641 {
642 P_diag_data[P_marker[i1]] += A_diag_data[jj];
643 }
644
645 /*--------------------------------------------------------------
646 * Case 2: neighbor i1 is an F-point and strongly influences i,
647 * distribute a_{i,i1} to C-points that strongly infuence i.
648 * Note: currently no distribution to the diagonal in this case.
649 *--------------------------------------------------------------*/
650
651 else if (P_marker[i1] == strong_f_marker)
652 {
653 sum = zero;
654
655 /*-----------------------------------------------------------
656 * Loop over row of A for point i1 and calculate the sum
657 * of the connections to c-points that strongly influence i.
658 *-----------------------------------------------------------*/
659 sgn = 1;
660 if (A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
661 /* Diagonal block part of row i1 */
662 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
663 {
664 i2 = A_diag_j[jj1];
665 if (P_marker[i2] >= jj_begin_row &&
666 (sgn*A_diag_data[jj1]) < 0)
667 {
668 sum += A_diag_data[jj1];
669 }
670 }
671
672 /* Off-Diagonal block part of row i1 */
673 if (num_procs > 1)
674 {
675 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
676 {
677 i2 = A_offd_j[jj1];
678 if (P_marker_offd[i2] >= jj_begin_row_offd
679 && (sgn*A_offd_data[jj1]) < 0)
680 {
681 sum += A_offd_data[jj1];
682 }
683 }
684 }
685
686 if (sum != 0)
687 {
688 distribute = A_diag_data[jj] / sum;
689
690 /*-----------------------------------------------------------
691 * Loop over row of A for point i1 and do the distribution.
692 *-----------------------------------------------------------*/
693
694 /* Diagonal block part of row i1 */
695 for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
696 {
697 i2 = A_diag_j[jj1];
698 if (P_marker[i2] >= jj_begin_row
699 && (sgn*A_diag_data[jj1]) < 0)
700 {
701 P_diag_data[P_marker[i2]]
702 += distribute * A_diag_data[jj1];
703 }
704 }
705
706 /* Off-Diagonal block part of row i1 */
707 if (num_procs > 1)
708 {
709 for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
710 {
711 i2 = A_offd_j[jj1];
712 if (P_marker_offd[i2] >= jj_begin_row_offd
713 && (sgn*A_offd_data[jj1]) < 0)
714 {
715 P_offd_data[P_marker_offd[i2]]
716 += distribute * A_offd_data[jj1];
717 }
718 }
719 }
720 }
721 else
722 {
723 if (num_functions == 1 || dof_func[i] == dof_func[i1])
724 diagonal += A_diag_data[jj];
725 }
726 }
727
728 /*--------------------------------------------------------------
729 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
730 * into the diagonal.
731 *--------------------------------------------------------------*/
732
733 else if (CF_marker[i1] != -3)
734 {
735 if (num_functions == 1 || dof_func[i] == dof_func[i1])
736 diagonal += A_diag_data[jj];
737 }
738
739 }
740
741
742 /*----------------------------------------------------------------
743 * Still looping over ith row of A. Next, loop over the
744 * off-diagonal part of A
745 *---------------------------------------------------------------*/
746
747 if (num_procs > 1)
748 {
749 for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
750 {
751 i1 = A_offd_j[jj];
752
753 /*--------------------------------------------------------------
754 * Case 1: neighbor i1 is a C-point and strongly influences i,
755 * accumulate a_{i,i1} into the interpolation weight.
756 *--------------------------------------------------------------*/
757
758 if (P_marker_offd[i1] >= jj_begin_row_offd)
759 {
760 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
761 }
762
763 /*------------------------------------------------------------
764 * Case 2: neighbor i1 is an F-point and strongly influences i,
765 * distribute a_{i,i1} to C-points that strongly infuence i.
766 * Note: currently no distribution to the diagonal in this case.
767 *-----------------------------------------------------------*/
768
769 else if (P_marker_offd[i1] == strong_f_marker)
770 {
771 sum = zero;
772
773 /*---------------------------------------------------------
774 * Loop over row of A_ext for point i1 and calculate the sum
775 * of the connections to c-points that strongly influence i.
776 *---------------------------------------------------------*/
777
778 /* find row number */
779 c_num = A_offd_j[jj];
780
781 sgn = 1;
782 if (A_ext_data[A_ext_i[c_num]] < 0) sgn = -1;
783 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
784 {
785 i2 = A_ext_j[jj1];
786
787 if (i2 > -1)
788 {
789 /* in the diagonal block */
790 if (P_marker[i2] >= jj_begin_row
791 && (sgn*A_ext_data[jj1]) < 0)
792 {
793 sum += A_ext_data[jj1];
794 }
795 }
796 else
797 {
798 /* in the off_diagonal block */
799 if (P_marker_offd[-i2-1] >= jj_begin_row_offd
800 && (sgn*A_ext_data[jj1]) < 0)
801 {
802 sum += A_ext_data[jj1];
803 }
804
805 }
806
807 }
808
809 if (sum != 0)
810 {
811 distribute = A_offd_data[jj] / sum;
812 /*---------------------------------------------------------
813 * Loop over row of A_ext for point i1 and do
814 * the distribution.
815 *--------------------------------------------------------*/
816
817 /* Diagonal block part of row i1 */
818
819 for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
820 {
821 i2 = A_ext_j[jj1];
822
823 if (i2 > -1) /* in the diagonal block */
824 {
825 if (P_marker[i2] >= jj_begin_row
826 && (sgn*A_ext_data[jj1]) < 0)
827 {
828 P_diag_data[P_marker[i2]]
829 += distribute * A_ext_data[jj1];
830 }
831 }
832 else
833 {
834 /* in the off_diagonal block */
835 if (P_marker_offd[-i2-1] >= jj_begin_row_offd
836 && (sgn*A_ext_data[jj1]) < 0)
837 P_offd_data[P_marker_offd[-i2-1]]
838 += distribute * A_ext_data[jj1];
839 }
840 }
841 }
842 else
843 {
844 if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
845 diagonal += A_offd_data[jj];
846 }
847 }
848
849 /*-----------------------------------------------------------
850 * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
851 * into the diagonal.
852 *-----------------------------------------------------------*/
853
854 else if (CF_marker_offd[i1] != -3)
855 {
856 if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
857 diagonal += A_offd_data[jj];
858 }
859
860 }
861 }
862
863 /*-----------------------------------------------------------------
864 * Set interpolation weight by dividing by the diagonal.
865 *-----------------------------------------------------------------*/
866
867 if (diagonal == 0.0)
868 {
869 printf(" Warning! zero diagonal! Proc id %d row %d\n", my_id,i);
870 diagonal = A_diag_data[A_diag_i[i]];
871 }
872
873 for (jj = jj_begin_row; jj < jj_end_row; jj++)
874 {
875 P_diag_data[jj] /= -diagonal;
876 }
877
878 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
879 {
880 P_offd_data[jj] /= -diagonal;
881 }
882
883 }
884
885 strong_f_marker--;
886
887 P_offd_i[i+1] = jj_counter_offd;
888 }
889 hypre_TFree(P_marker);
890 hypre_TFree(P_marker_offd);
891 }
892
893 P = hypre_ParCSRMatrixCreate(comm,
894 hypre_ParCSRMatrixGlobalNumRows(A),
895 total_global_cpts,
896 hypre_ParCSRMatrixColStarts(A),
897 num_cpts_global,
898 0,
899 P_diag_i[n_fine],
900 P_offd_i[n_fine]);
901
902 P_diag = hypre_ParCSRMatrixDiag(P);
903 hypre_CSRMatrixData(P_diag) = P_diag_data;
904 hypre_CSRMatrixI(P_diag) = P_diag_i;
905 hypre_CSRMatrixJ(P_diag) = P_diag_j;
906 P_offd = hypre_ParCSRMatrixOffd(P);
907 hypre_CSRMatrixData(P_offd) = P_offd_data;
908 hypre_CSRMatrixI(P_offd) = P_offd_i;
909 hypre_CSRMatrixJ(P_offd) = P_offd_j;
910 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
911
912 /* Compress P, removing coefficients smaller than trunc_factor * Max */
913
914 if (trunc_factor != 0.0 || max_elmts > 0)
915 {
916 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
917 P_diag_data = hypre_CSRMatrixData(P_diag);
918 P_diag_i = hypre_CSRMatrixI(P_diag);
919 P_diag_j = hypre_CSRMatrixJ(P_diag);
920 P_offd_data = hypre_CSRMatrixData(P_offd);
921 P_offd_i = hypre_CSRMatrixI(P_offd);
922 P_offd_j = hypre_CSRMatrixJ(P_offd);
923 P_diag_size = P_diag_i[n_fine];
924 P_offd_size = P_offd_i[n_fine];
925 }
926
927 num_cols_P_offd = 0;
928 if (P_offd_size)
929 {
930 if (num_cols_A_offd) P_marker = hypre_CTAlloc(int, num_cols_A_offd);
931
932/*#define HYPRE_SMP_PRIVATE i
933#include "../utilities/hypre_smp_forloop.h"*/
934 for (i=0; i < num_cols_A_offd; i++)
935 P_marker[i] = 0;
936
937 num_cols_P_offd = 0;
938 for (i=0; i < P_offd_size; i++)
939 {
940 index = P_offd_j[i];
941 if (!P_marker[index])
942 {
943 num_cols_P_offd++;
944 P_marker[index] = 1;
945 }
946 }
947
948 if (num_cols_P_offd)
949 {
950 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt,num_cols_P_offd);
951 tmp_map_offd = hypre_CTAlloc(int,num_cols_P_offd);
952 }
953
954 index = 0;
955 for (i=0; i < num_cols_P_offd; i++)
956 {
957 while (P_marker[index]==0) index++;
958 tmp_map_offd[i] = index++;
959 }
960
961/*#define HYPRE_SMP_PRIVATE i
962#include "../utilities/hypre_smp_forloop.h"*/
963 for (i=0; i < P_offd_size; i++)
964 P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
965 P_offd_j[i],
966 num_cols_P_offd);
967 hypre_TFree(P_marker);
968 }
969
970 for (i=0; i < n_fine; i++)
971 if (CF_marker[i] == -3) CF_marker[i] = -1;
972
973 if (num_cols_P_offd)
974 {
975 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
976 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
977 }
978
979 if (num_procs > 1)
980 hypre_GetCommPkgRTFromCommPkgA(P,A, fine_to_coarse, tmp_map_offd);
981
982
983 *P_ptr = P;
984
985 hypre_TFree(tmp_map_offd);
986 hypre_TFree(CF_marker_offd);
987 hypre_TFree(dof_func_offd);
988 hypre_TFree(int_buf_data);
989 hypre_TFree(fine_to_coarse);
990 /*hypre_TFree(fine_to_coarse_offd);*/
991 hypre_TFree(coarse_counter);
992 hypre_TFree(jj_count);
993 hypre_TFree(jj_count_offd);
994
995 if (num_procs > 1) hypre_CSRMatrixDestroy(A_ext);
996
997 return(0);
998
999}
1000
1001int
1002hypre_BoomerAMGInterpTruncation( hypre_ParCSRMatrix *P,
1003 double trunc_factor,
1004 int max_elmts)
1005{
1006 hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
1007 int *P_diag_i = hypre_CSRMatrixI(P_diag);
1008 int *P_diag_j = hypre_CSRMatrixJ(P_diag);
1009 double *P_diag_data = hypre_CSRMatrixData(P_diag);
1010 int *P_diag_j_new;
1011 double *P_diag_data_new;
1012
1013 hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
1014 int *P_offd_i = hypre_CSRMatrixI(P_offd);
1015 int *P_offd_j = hypre_CSRMatrixJ(P_offd);
1016 double *P_offd_data = hypre_CSRMatrixData(P_offd);
1017 int *P_offd_j_new;
1018 double *P_offd_data_new;
1019
1020 int n_fine = hypre_CSRMatrixNumRows(P_diag);
1021 int num_cols = hypre_CSRMatrixNumCols(P_diag);
1022 int i, j, start_j;
1023 int ierr = 0;
1024 double max_coef;
1025 int next_open = 0;
1026 int now_checking = 0;
1027 int next_open_offd = 0;
1028 int now_checking_offd = 0;
1029 int num_lost = 0;
1030 int num_lost_offd = 0;
1031 int num_lost_global = 0;
1032 int num_lost_global_offd = 0;
1033 int P_diag_size;
1034 int P_offd_size;
1035 int num_elmts;
1036 int cnt, cnt_diag, cnt_offd;
1037 double row_sum;
1038 double scale;
1039
1040 /* Threading variables. Entry i of num_lost_(offd_)per_thread holds the
1041 * number of dropped entries over thread i's row range. Cum_lost_per_thread
1042 * will temporarily store the cumulative number of dropped entries up to
1043 * each thread. */
[e0b7443]1044 int my_thread_num, num_threadsID, start, stop;
[2aa6644]1045 int * max_num_threads = hypre_CTAlloc(int, 1);
1046 int * cum_lost_per_thread;
1047 int * num_lost_per_thread;
1048 int * num_lost_offd_per_thread;
1049
1050 /* Initialize threading variables */
1051 max_num_threads[0] = hypre_NumThreads();
1052 cum_lost_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
1053 num_lost_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
1054 num_lost_offd_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
1055 for(i=0; i < max_num_threads[0]; i++)
1056 {
1057 num_lost_per_thread[i] = 0;
1058 num_lost_offd_per_thread[i] = 0;
1059 }
1060
1061#ifdef HYPRE_USING_OPENMP
[e0b7443]1062#pragma omp parallel private(i,my_thread_num,num_threadsID,max_coef,j,start_j,row_sum,scale,num_lost,now_checking,next_open,num_lost_offd,now_checking_offd,next_open_offd,start,stop,cnt_diag,cnt_offd,num_elmts,cnt)
[2aa6644]1063#endif
1064 {
1065 my_thread_num = hypre_GetThreadNum();
[e0b7443]1066 num_threadsID = hypre_NumActiveThreads();
[2aa6644]1067
1068 /* Compute each thread's range of rows to truncate and compress. Note,
1069 * that i, j and data are all compressed as entries are dropped, but
1070 * that the compression only occurs locally over each thread's row
1071 * range. P_diag_i is only made globally consistent at the end of this
1072 * routine. During the dropping phases, P_diag_i[stop] will point to
1073 * the start of the next thread's row range. */
1074
1075 /* my row range */
[e0b7443]1076 start = (n_fine/num_threadsID)*my_thread_num;
1077 if (my_thread_num == num_threadsID-1)
[2aa6644]1078 { stop = n_fine; }
1079 else
[e0b7443]1080 { stop = (n_fine/num_threadsID)*(my_thread_num+1); }
[2aa6644]1081
1082 /*
1083 * Truncate based on truncation tolerance
1084 */
1085 if (trunc_factor > 0)
1086 {
1087 num_lost_offd = 0;
1088
1089 next_open = P_diag_i[start];
1090 now_checking = P_diag_i[start];
1091 next_open_offd = P_offd_i[start];;
1092 now_checking_offd = P_offd_i[start];;
1093
1094 for (i = start; i < stop; i++)
1095 {
1096 max_coef = 0;
1097 for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
1098 max_coef = (max_coef < fabs(P_diag_data[j])) ?
1099 fabs(P_diag_data[j]) : max_coef;
1100 for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
1101 max_coef = (max_coef < fabs(P_offd_data[j])) ?
1102 fabs(P_offd_data[j]) : max_coef;
1103 max_coef *= trunc_factor;
1104
1105 start_j = P_diag_i[i];
1106 P_diag_i[i] -= num_lost;
1107 row_sum = 0;
1108 scale = 0;
1109 for (j = start_j; j < P_diag_i[i+1]; j++)
1110 {
1111 row_sum += P_diag_data[now_checking];
1112 if (fabs(P_diag_data[now_checking]) < max_coef)
1113 {
1114 num_lost++;
1115 now_checking++;
1116 }
1117 else
1118 {
1119 scale += P_diag_data[now_checking];
1120 P_diag_data[next_open] = P_diag_data[now_checking];
1121 P_diag_j[next_open] = P_diag_j[now_checking];
1122 now_checking++;
1123 next_open++;
1124 }
1125 }
1126
1127 start_j = P_offd_i[i];
1128 P_offd_i[i] -= num_lost_offd;
1129
1130 for (j = start_j; j < P_offd_i[i+1]; j++)
1131 {
1132 row_sum += P_offd_data[now_checking_offd];
1133 if (fabs(P_offd_data[now_checking_offd]) < max_coef)
1134 {
1135 num_lost_offd++;
1136 now_checking_offd++;
1137 }
1138 else
1139 {
1140 scale += P_offd_data[now_checking_offd];
1141 P_offd_data[next_open_offd] = P_offd_data[now_checking_offd];
1142 P_offd_j[next_open_offd] = P_offd_j[now_checking_offd];
1143 now_checking_offd++;
1144 next_open_offd++;
1145 }
1146 }
1147 /* normalize row of P */
1148
1149 if (scale != 0.)
1150 {
1151 if (scale != row_sum)
1152 {
1153 scale = row_sum/scale;
1154 for (j = P_diag_i[i]; j < (P_diag_i[i+1]-num_lost); j++)
1155 P_diag_data[j] *= scale;
1156 for (j = P_offd_i[i]; j < (P_offd_i[i+1]-num_lost_offd); j++)
1157 P_offd_data[j] *= scale;
1158 }
1159 }
1160 }
1161 /* store number of dropped elements and number of threads */
1162 if(my_thread_num == 0)
[e0b7443]1163 { max_num_threads[0] = num_threadsID; }
[2aa6644]1164 num_lost_per_thread[my_thread_num] = num_lost;
1165 num_lost_offd_per_thread[my_thread_num] = num_lost_offd;
1166
1167 } /* end if (trunc_factor > 0) */
1168
1169 /*P_diag_i[n_fine] -= num_lost;
1170 P_offd_i[n_fine] -= num_lost_offd;
1171 } */
1172 if (max_elmts > 0)
1173 {
1174 int P_mxnum, cnt1, last_index, last_index_offd;
1175 int *P_aux_j;
1176 double *P_aux_data;
1177 /* find maximum row length locally over this row range */
1178 P_mxnum = 0;
1179 for (i=start; i<stop; i++)
1180 {
1181 /* Note P_diag_i[stop] is the starting point for the next thread
1182 * in j and data, not the stop point for this thread */
1183 last_index = P_diag_i[i+1];
1184 last_index_offd = P_offd_i[i+1];
1185 if(i == stop-1)
1186 {
1187 last_index -= num_lost_per_thread[my_thread_num];
1188 last_index_offd -= num_lost_offd_per_thread[my_thread_num];
1189 }
1190 cnt1 = last_index-P_diag_i[i] + last_index_offd-P_offd_i[i];
1191 if (cnt1 > P_mxnum) P_mxnum = cnt1;
1192 }
1193 /*rowlength = 0;
1194 if (n_fine)
1195 rowlength = P_diag_i[1]+P_offd_i[1];
1196 P_mxnum = rowlength;
1197 for (i=1; i<n_fine; i++)
1198 {
1199 rowlength = P_diag_i[i+1]-P_diag_i[i]+P_offd_i[i+1]-P_offd_i[i];
1200 if (rowlength > P_mxnum) P_mxnum = rowlength;
1201 }*/
1202 if (P_mxnum > max_elmts)
1203 {
1204 num_lost = 0;
1205 num_lost_offd = 0;
1206
1207 /* two temporary arrays to hold row i for temporary operations */
1208 P_aux_j = hypre_CTAlloc(int, P_mxnum);
1209 P_aux_data = hypre_CTAlloc(double, P_mxnum);
1210 cnt_diag = P_diag_i[start];
1211 cnt_offd = P_offd_i[start];
1212
1213 for (i = start; i < stop; i++)
1214 {
1215 /* Note P_diag_i[stop] is the starting point for the next thread
1216 * in j and data, not the stop point for this thread */
1217 last_index = P_diag_i[i+1];
1218 last_index_offd = P_offd_i[i+1];
1219 if(i == stop-1)
1220 {
1221 last_index -= num_lost_per_thread[my_thread_num];
1222 last_index_offd -= num_lost_offd_per_thread[my_thread_num];
1223 }
1224
1225 row_sum = 0;
1226 num_elmts = P_diag_i[i+1]-P_diag_i[i]+P_offd_i[i+1]-P_offd_i[i];
1227 if (max_elmts < num_elmts)
1228 {
1229 cnt = 0;
1230 for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
1231 {
1232 P_aux_j[cnt] = P_diag_j[j];
1233 P_aux_data[cnt++] = P_diag_data[j];
1234 row_sum += P_diag_data[j];
1235 }
1236 num_lost += cnt;
1237 cnt1 = cnt;
1238 for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
1239 {
1240 P_aux_j[cnt] = P_offd_j[j]+num_cols;
1241 P_aux_data[cnt++] = P_offd_data[j];
1242 row_sum += P_offd_data[j];
1243 }
1244 num_lost_offd += cnt-cnt1;
1245 /* sort data */
1246 hypre_qsort2abs(P_aux_j,P_aux_data,0,cnt-1);
1247 scale = 0;
1248 P_diag_i[i] = cnt_diag;
1249 P_offd_i[i] = cnt_offd;
1250 for (j = 0; j < max_elmts; j++)
1251 {
1252 scale += P_aux_data[j];
1253 if (P_aux_j[j] < num_cols)
1254 {
1255 P_diag_j[cnt_diag] = P_aux_j[j];
1256 P_diag_data[cnt_diag++] = P_aux_data[j];
1257 }
1258 else
1259 {
1260 P_offd_j[cnt_offd] = P_aux_j[j]-num_cols;
1261 P_offd_data[cnt_offd++] = P_aux_data[j];
1262 }
1263 }
1264 num_lost -= cnt_diag-P_diag_i[i];
1265 num_lost_offd -= cnt_offd-P_offd_i[i];
1266 /* normalize row of P */
1267
1268 if (scale != 0.)
1269 {
1270 if (scale != row_sum)
1271 {
1272 scale = row_sum/scale;
1273 for (j = P_diag_i[i]; j < cnt_diag; j++)
1274 P_diag_data[j] *= scale;
1275 for (j = P_offd_i[i]; j < cnt_offd; j++)
1276 P_offd_data[j] *= scale;
1277 }
1278 }
1279 }
1280 else
1281 {
1282 if (P_diag_i[i] != cnt_diag)
1283 {
1284 start_j = P_diag_i[i];
1285 P_diag_i[i] = cnt_diag;
1286 for (j = start_j; j < P_diag_i[i+1]; j++)
1287 {
1288 P_diag_j[cnt_diag] = P_diag_j[j];
1289 P_diag_data[cnt_diag++] = P_diag_data[j];
1290 }
1291 }
1292 else
1293 cnt_diag += P_diag_i[i+1]-P_diag_i[i];
1294 if (P_offd_i[i] != cnt_offd)
1295 {
1296 start_j = P_offd_i[i];
1297 P_offd_i[i] = cnt_offd;
1298 for (j = start_j; j < P_offd_i[i+1]; j++)
1299 {
1300 P_offd_j[cnt_offd] = P_offd_j[j];
1301 P_offd_data[cnt_offd++] = P_offd_data[j];
1302 }
1303 }
1304 else
1305 cnt_offd += P_offd_i[i+1]-P_offd_i[i];
1306 }
1307 }
1308
1309 num_lost_per_thread[my_thread_num] += num_lost;
1310 num_lost_offd_per_thread[my_thread_num] += num_lost_offd;
1311 hypre_TFree(P_aux_j);
1312 hypre_TFree(P_aux_data);
1313
1314 } /* end if (P_mxnum > max_elmts) */
1315 } /* end if (max_elmts > 0) */
1316
1317 /* Sum up num_lost_global */
1318#ifdef HYPRE_USING_OPENMP
1319#pragma omp barrier
1320#endif
1321 if(my_thread_num == 0)
1322 {
1323 num_lost_global = 0;
1324 num_lost_global_offd = 0;
1325 for(i = 0; i < max_num_threads[0]; i++)
1326 {
1327 num_lost_global += num_lost_per_thread[i];
1328 num_lost_global_offd += num_lost_offd_per_thread[i];
1329 }
1330 }
1331#ifdef HYPRE_USING_OPENMP
1332#pragma omp barrier
1333#endif
1334
1335 /*
1336 * Synchronize and create new diag data structures
1337 */
1338 if (num_lost_global)
1339 {
1340 /* Each thread has it's own locally compressed CSR matrix from rows start
1341 * to stop. Now, we have to copy each thread's chunk into the new
1342 * process-wide CSR data structures
1343 *
1344 * First, we compute the new process-wide number of nonzeros (i.e.,
1345 * P_diag_size), and compute cum_lost_per_thread[k] so that this
1346 * entry holds the cumulative sum of entries dropped up to and
1347 * including thread k. */
1348 if(my_thread_num == 0)
1349 {
1350 P_diag_size = P_diag_i[n_fine];
1351
1352 for(i = 0; i < max_num_threads[0]; i++)
1353 {
1354 P_diag_size -= num_lost_per_thread[i];
1355 if(i > 0)
1356 { cum_lost_per_thread[i] = num_lost_per_thread[i] + cum_lost_per_thread[i-1]; }
1357 else
1358 { cum_lost_per_thread[i] = num_lost_per_thread[i]; }
1359 }
1360
1361 P_diag_j_new = hypre_CTAlloc(int,P_diag_size);
1362 P_diag_data_new = hypre_CTAlloc(double,P_diag_size);
1363 }
1364#ifdef HYPRE_USING_OPENMP
1365#pragma omp barrier
1366#endif
1367
1368
1369 /* points to next open spot in new data structures for this thread */
1370 if(my_thread_num == 0)
1371 { next_open = 0; }
1372 else
1373 {
1374 /* remember, cum_lost_per_thread[k] stores the num dropped up to and
1375 * including thread k */
1376 next_open = P_diag_i[start] - cum_lost_per_thread[my_thread_num-1];
1377 }
1378 /* copy the j and data arrays over */
1379 for(i = P_diag_i[start]; i < P_diag_i[stop] - num_lost_per_thread[my_thread_num]; i++)
1380 {
1381 P_diag_j_new[next_open] = P_diag_j[i];
1382 P_diag_data_new[next_open] = P_diag_data[i];
1383 next_open += 1;
1384 }
1385
1386#ifdef HYPRE_USING_OPENMP
1387#pragma omp barrier
1388#endif
1389 /* update P_diag_i with number of dropped entries by all lower ranked
1390 * threads */
1391 if(my_thread_num > 0)
1392 {
1393 for(i=start; i<stop; i++)
1394 {
1395 P_diag_i[i] -= cum_lost_per_thread[my_thread_num-1];
1396 }
1397 }
1398
1399 if(my_thread_num == 0)
1400 {
1401 /* Set last entry */
1402 P_diag_i[n_fine] = P_diag_size ;
1403
1404 hypre_TFree(P_diag_j);
1405 hypre_TFree(P_diag_data);
1406 hypre_CSRMatrixJ(P_diag) = P_diag_j_new;
1407 hypre_CSRMatrixData(P_diag) = P_diag_data_new;
1408 hypre_CSRMatrixNumNonzeros(P_diag) = P_diag_size;
1409 }
1410 }
1411
1412
1413 /*
1414 * Synchronize and create new offd data structures
1415 */
1416#ifdef HYPRE_USING_OPENMP
1417#pragma omp barrier
1418#endif
1419 if (num_lost_global_offd)
1420 {
1421 /* Repeat process for off-diagonal */
1422 if(my_thread_num == 0)
1423 {
1424 P_offd_size = P_offd_i[n_fine];
1425 for(i = 0; i < max_num_threads[0]; i++)
1426 {
1427 P_offd_size -= num_lost_offd_per_thread[i];
1428 if(i > 0)
1429 { cum_lost_per_thread[i] = num_lost_offd_per_thread[i] + cum_lost_per_thread[i-1]; }
1430 else
1431 { cum_lost_per_thread[i] = num_lost_offd_per_thread[i]; }
1432 }
1433
1434 P_offd_j_new = hypre_CTAlloc(int,P_offd_size);
1435 P_offd_data_new = hypre_CTAlloc(double,P_offd_size);
1436 }
1437#ifdef HYPRE_USING_OPENMP
1438#pragma omp barrier
1439#endif
1440 /* points to next open spot in new data structures for this thread */
1441 if(my_thread_num == 0)
1442 { next_open = 0; }
1443 else
1444 {
1445 /* remember, cum_lost_per_thread[k] stores the num dropped up to and
1446 * including thread k */
1447 next_open = P_offd_i[start] - cum_lost_per_thread[my_thread_num-1];
1448 }
1449
1450 /* copy the j and data arrays over */
1451 for(i = P_offd_i[start]; i < P_offd_i[stop] - num_lost_offd_per_thread[my_thread_num]; i++)
1452 {
1453 P_offd_j_new[next_open] = P_offd_j[i];
1454 P_offd_data_new[next_open] = P_offd_data[i];
1455 next_open += 1;
1456 }
1457
1458#ifdef HYPRE_USING_OPENMP
1459#pragma omp barrier
1460#endif
1461 /* update P_offd_i with number of dropped entries by all lower ranked
1462 * threads */
1463 if(my_thread_num > 0)
1464 {
1465 for(i=start; i<stop; i++)
1466 {
1467 P_offd_i[i] -= cum_lost_per_thread[my_thread_num-1];
1468 }
1469 }
1470
1471 if(my_thread_num == 0)
1472 {
1473 /* Set last entry */
1474 P_offd_i[n_fine] = P_offd_size ;
1475
1476 hypre_TFree(P_offd_j);
1477 hypre_TFree(P_offd_data);
1478 hypre_CSRMatrixJ(P_offd) = P_offd_j_new;
1479 hypre_CSRMatrixData(P_offd) = P_offd_data_new;
1480 hypre_CSRMatrixNumNonzeros(P_offd) = P_offd_size;
1481 }
1482 }
1483
1484 } /* end parallel region */
1485
1486 hypre_TFree(max_num_threads);
1487 hypre_TFree(cum_lost_per_thread);
1488 hypre_TFree(num_lost_per_thread);
1489 hypre_TFree(num_lost_offd_per_thread);
1490
1491 return ierr;
1492}
1493
1494void hypre_qsort2abs( int *v,
1495 double *w,
1496 int left,
1497 int right )
1498{
1499 int i, last;
1500 if (left >= right)
1501 return;
1502 swap2( v, w, left, (left+right)/2);
1503 last = left;
1504 for (i = left+1; i <= right; i++)
1505 if (fabs(w[i]) > fabs(w[left]))
1506 {
1507 swap2(v, w, ++last, i);
1508 }
1509 swap2(v, w, left, last);
1510 hypre_qsort2abs(v, w, left, last-1);
1511 hypre_qsort2abs(v, w, last+1, right);
1512}
Note: See TracBrowser for help on using the repository browser.