source: CIVL/examples/mpi-omp/AMG2013/parcsr_ls/par_lr_interp.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 163.5 KB
Line 
1/*BHEADER**********************************************************************
2 * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3 * Produced at the Lawrence Livermore National Laboratory.
4 * This file is part of HYPRE. See file COPYRIGHT for details.
5 *
6 * HYPRE is free software; you can redistribute it and/or modify it under the
7 * terms of the GNU Lesser General Public License (as published by the Free
8 * Software Foundation) version 2.1 dated February 1999.
9 *
10 * $Revision: 2.4 $
11 ***********************************************************************EHEADER*/
12
13
14#include "headers.h"
15#include "aux_interp.h"
16
17#define MAX_C_CONNECTIONS 100
18#define HAVE_COMMON_C 1
19
20/*---------------------------------------------------------------------------
21 * hypre_BoomerAMGBuildStdInterp
22 * Comment: The interpolatory weighting can be changed with the sep_weight
23 * variable. This can enable not separating negative and positive
24 * off diagonals in the weight formula.
25 *--------------------------------------------------------------------------*/
26
27int
28hypre_BoomerAMGBuildStdInterp(hypre_ParCSRMatrix *A, int *CF_marker,
29 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
30 int num_functions, int *dof_func, int debug_flag,
31 double trunc_factor, int max_elmts,
32 int sep_weight, int *col_offd_S_to_A,
33 hypre_ParCSRMatrix **P_ptr)
34{
35 /* Communication Variables */
36 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
37 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
38 int my_id, num_procs;
39
40 /* Variables to store input variables */
41 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
42 double *A_diag_data = hypre_CSRMatrixData(A_diag);
43 int *A_diag_i = hypre_CSRMatrixI(A_diag);
44 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
45
46 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
47 double *A_offd_data = hypre_CSRMatrixData(A_offd);
48 int *A_offd_i = hypre_CSRMatrixI(A_offd);
49 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
50
51 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
52 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
53 int n_fine = hypre_CSRMatrixNumRows(A_diag);
54 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
55 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
56 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)local_numrows;
57 HYPRE_BigInt total_global_cpts, my_first_cpt;
58
59 /* Variables to store strong connection matrix info */
60 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
61 int *S_diag_i = hypre_CSRMatrixI(S_diag);
62 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
63
64 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
65 int *S_offd_i = hypre_CSRMatrixI(S_offd);
66 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
67
68 /* Interpolation matrix P */
69 hypre_ParCSRMatrix *P;
70 hypre_CSRMatrix *P_diag;
71 hypre_CSRMatrix *P_offd;
72
73 double *P_diag_data = NULL;
74 int *P_diag_i, *P_diag_j = NULL;
75 double *P_offd_data = NULL;
76 int *P_offd_i, *P_offd_j = NULL;
77
78 HYPRE_BigInt *col_map_offd_P;
79 int *tmp_map_offd;
80 int P_diag_size;
81 int P_offd_size;
82 int *P_marker = NULL;
83 int *P_marker_offd = NULL;
84 int *CF_marker_offd = NULL;
85 int *tmp_CF_marker_offd = NULL;
86 int *dof_func_offd = NULL;
87
88 /* Full row information for columns of A that are off diag*/
89 hypre_BigCSRMatrix *A_ext;
90 double *A_ext_data;
91 int *A_ext_i;
92 HYPRE_BigInt *big_A_ext_j;
93 int *A_ext_j;
94
95 int *fine_to_coarse = NULL;
96 HYPRE_BigInt *fine_to_coarse_offd = NULL;
97 HYPRE_BigInt *found;
98
99 int num_cols_P_offd;
100 int newoff, loc_col;
101 int A_ext_rows, full_off_procNodes;
102
103 hypre_BigCSRMatrix *Sop;
104 int *Sop_i;
105 HYPRE_BigInt *big_Sop_j;
106 int *Sop_j;
107
108 int Soprows;
109
110 /* Variables to keep count of interpolatory points */
111 int jj_counter, jj_counter_offd;
112 int jj_begin_row, jj_end_row;
113 int jj_begin_row_offd = 0;
114 int jj_end_row_offd = 0;
115 int coarse_counter, coarse_counter_offd;
116 int *ihat = NULL;
117 int *ihat_offd = NULL;
118 int *ipnt = NULL;
119 int *ipnt_offd = NULL;
120 int strong_f_marker = -2;
121
122 /* Interpolation weight variables */
123 double *ahat = NULL;
124 double *ahat_offd = NULL;
125 double sum_pos, sum_pos_C, sum_neg, sum_neg_C, sum, sum_C;
126 double diagonal, distribute;
127 double alfa, beta;
128
129 /* Loop variables */
130 int index;
131 int start_indexing = 0;
132 int i, i1, j1, jj, kk, k1;
133 int cnt_c, cnt_f, cnt_c_offd, cnt_f_offd, indx;
134
135 /* Definitions */
136 double zero = 0.0;
137 double one = 1.0;
138 double wall_time;
139 double wall_1 = 0;
140 double wall_2 = 0;
141 double wall_3 = 0;
142
143
144 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
145
146 if (debug_flag== 4) wall_time = time_getWallclockSeconds();
147
148 /* BEGIN */
149 MPI_Comm_size(comm, &num_procs);
150 MPI_Comm_rank(comm,&my_id);
151
152#ifdef HYPRE_NO_GLOBAL_PARTITION
153 my_first_cpt = num_cpts_global[0];
154 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
155 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
156#else
157 my_first_cpt = num_cpts_global[my_id];
158 total_global_cpts = num_cpts_global[num_procs];
159#endif
160
161 if (!comm_pkg)
162 {
163#ifdef HYPRE_NO_GLOBAL_PARTITION
164 hypre_NewCommPkgCreate(A);
165#else
166 hypre_MatvecCommPkgCreate(A);
167#endif
168 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
169 }
170
171 /* Set up off processor information (specifically for neighbors of
172 * neighbors */
173 newoff = 0;
174 full_off_procNodes = 0;
175 if (num_procs > 1)
176 {
177 /*----------------------------------------------------------------------
178 * Get the off processors rows for A and S, associated with columns in
179 * A_offd and S_offd.
180 *---------------------------------------------------------------------*/
181 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
182 A_ext_i = hypre_BigCSRMatrixI(A_ext);
183 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
184 A_ext_data = hypre_BigCSRMatrixData(A_ext);
185 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
186
187 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
188 Sop_i = hypre_BigCSRMatrixI(Sop);
189 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
190 Soprows = hypre_BigCSRMatrixNumRows(Sop);
191
192 /* Find nodes that are neighbors of neighbors, not found in offd */
193 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
194 &A_ext_j, Soprows, col_map_offd, col_1,
195 col_n, Sop_i, big_Sop_j, &Sop_j, CF_marker,
196 comm_pkg);
197
198 hypre_BigCSRMatrixJ(A_ext) = NULL;
199 hypre_BigCSRMatrixJ(Sop) = NULL;
200
201 if(newoff >= 0)
202 full_off_procNodes = newoff + num_cols_A_offd;
203 else
204 return(1);
205
206 /* Possibly add new points and new processors to the comm_pkg, all
207 * processors need new_comm_pkg */
208
209 /* AHB - create a new comm package just for extended info -
210 this will work better with the assumed partition*/
211 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
212 &extend_comm_pkg);
213
214 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
215
216 if (num_functions > 1 && full_off_procNodes > 0)
217 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
218
219 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
220 full_off_procNodes, CF_marker_offd);
221
222 if(num_functions > 1)
223 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
224 full_off_procNodes, dof_func_offd);
225 }
226
227
228 /*-----------------------------------------------------------------------
229 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
230 *-----------------------------------------------------------------------*/
231
232 /*-----------------------------------------------------------------------
233 * Intialize counters and allocate mapping vector.
234 *-----------------------------------------------------------------------*/
235 P_diag_i = hypre_CTAlloc(int, n_fine+1);
236 P_offd_i = hypre_CTAlloc(int, n_fine+1);
237
238 if (n_fine)
239 {
240 fine_to_coarse = hypre_CTAlloc(int, n_fine);
241 P_marker = hypre_CTAlloc(int, n_fine);
242 }
243
244 if (full_off_procNodes)
245 {
246 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
247 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
248 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
249 }
250
251 for (i=0; i < full_off_procNodes; i++)
252 {
253 P_marker_offd[i] = -1;
254 tmp_CF_marker_offd[i] = -1;
255 fine_to_coarse_offd[i] = -1;
256 }
257
258 for (i=0; i < n_fine; i++)
259 {
260 P_marker[i] = -1;
261 fine_to_coarse[i] = -1;
262 }
263
264 jj_counter = start_indexing;
265 jj_counter_offd = start_indexing;
266 coarse_counter = 0;
267 coarse_counter_offd = 0;
268
269 /*-----------------------------------------------------------------------
270 * Loop over fine grid.
271 *-----------------------------------------------------------------------*/
272 for (i = 0; i < n_fine; i++)
273 {
274 P_diag_i[i] = jj_counter;
275 if (num_procs > 1)
276 P_offd_i[i] = jj_counter_offd;
277
278 if (CF_marker[i] >= 0)
279 {
280 jj_counter++;
281 fine_to_coarse[i] = coarse_counter;
282 coarse_counter++;
283 }
284
285 /*--------------------------------------------------------------------
286 * If i is an F-point, interpolation is from the C-points that
287 * strongly influence i, or C-points that stronly influence F-points
288 * that strongly influence i.
289 *--------------------------------------------------------------------*/
290 else if (CF_marker[i] != -3)
291 {
292 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
293 {
294 i1 = S_diag_j[jj];
295 if (CF_marker[i1] >= 0)
296 { /* i1 is a C point */
297 if (P_marker[i1] < P_diag_i[i])
298 {
299 P_marker[i1] = jj_counter;
300 jj_counter++;
301 }
302 }
303 else if (CF_marker[i1] != -3)
304 { /* i1 is a F point, loop through it's strong neighbors */
305 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
306 {
307 k1 = S_diag_j[kk];
308 if (CF_marker[k1] >= 0)
309 {
310 if(P_marker[k1] < P_diag_i[i])
311 {
312 P_marker[k1] = jj_counter;
313 jj_counter++;
314 }
315 }
316 }
317 if(num_procs > 1)
318 {
319 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
320 {
321 if(col_offd_S_to_A)
322 k1 = col_offd_S_to_A[S_offd_j[kk]];
323 else
324 k1 = S_offd_j[kk];
325 if (CF_marker_offd[k1] > 0)
326 {
327 if(P_marker_offd[k1] < P_offd_i[i])
328 {
329 tmp_CF_marker_offd[k1] = 1;
330 P_marker_offd[k1] = jj_counter_offd;
331 jj_counter_offd++;
332 }
333 }
334 }
335 }
336 }
337 }
338 /* Look at off diag strong connections of i */
339 if (num_procs > 1)
340 {
341 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
342 {
343 i1 = S_offd_j[jj];
344 if(col_offd_S_to_A)
345 i1 = col_offd_S_to_A[i1];
346 if (CF_marker_offd[i1] > 0)
347 {
348 if(P_marker_offd[i1] < P_offd_i[i])
349 {
350 tmp_CF_marker_offd[i1] = 1;
351 P_marker_offd[i1] = jj_counter_offd;
352 jj_counter_offd++;
353 }
354 }
355 else if (CF_marker_offd[i1] != -3)
356 { /* F point; look at neighbors of i1. Sop contains global col
357 * numbers and entries that could be in S_diag or S_offd or
358 * neither. */
359 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
360 {
361 loc_col = Sop_j[kk];
362 if(loc_col > -1)
363 { /* In S_diag */
364 if(CF_marker[loc_col] > 0)
365 {
366 if(P_marker[loc_col] < P_diag_i[i])
367 {
368 P_marker[loc_col] = jj_counter;
369 jj_counter++;
370 }
371 }
372 }
373 else
374 {
375 loc_col = - loc_col - 1;
376 if(CF_marker_offd[loc_col] > 0)
377 {
378 if(P_marker_offd[loc_col] < P_offd_i[i])
379 {
380 P_marker_offd[loc_col] = jj_counter_offd;
381 tmp_CF_marker_offd[loc_col] = 1;
382 jj_counter_offd++;
383 }
384 }
385 }
386 }
387 }
388 }
389 }
390 }
391 }
392
393 if (debug_flag==4)
394 {
395 wall_time = time_getWallclockSeconds() - wall_time;
396 printf("Proc = %d determine structure %f\n",
397 my_id, wall_time);
398 fflush(NULL);
399 }
400 /*-----------------------------------------------------------------------
401 * Allocate arrays.
402 *-----------------------------------------------------------------------*/
403
404
405 P_diag_size = jj_counter;
406 P_offd_size = jj_counter_offd;
407
408 if (P_diag_size)
409 {
410 P_diag_j = hypre_CTAlloc(int, P_diag_size);
411 P_diag_data = hypre_CTAlloc(double, P_diag_size);
412 }
413 if (P_offd_size)
414 {
415 P_offd_j = hypre_CTAlloc(int, P_offd_size);
416 P_offd_data = hypre_CTAlloc(double, P_offd_size);
417 }
418
419 P_diag_i[n_fine] = jj_counter;
420 P_offd_i[n_fine] = jj_counter_offd;
421
422 jj_counter = start_indexing;
423 jj_counter_offd = start_indexing;
424
425 /* Fine to coarse mapping */
426 if(num_procs > 1)
427 {
428 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
429 full_off_procNodes, my_first_cpt,
430 fine_to_coarse_offd);
431 }
432
433 /* Initialize ahat, which is a modification to a, used in the standard
434 * interpolation routine. */
435 if (n_fine)
436 {
437 ahat = hypre_CTAlloc(double, n_fine);
438 ihat = hypre_CTAlloc(int, n_fine);
439 ipnt = hypre_CTAlloc(int, n_fine);
440 }
441 if (full_off_procNodes)
442 {
443 ahat_offd = hypre_CTAlloc(double, full_off_procNodes);
444 ihat_offd = hypre_CTAlloc(int, full_off_procNodes);
445 ipnt_offd = hypre_CTAlloc(int, full_off_procNodes);
446 }
447
448 for (i = 0; i < n_fine; i++)
449 {
450 P_marker[i] = -1;
451 ahat[i] = 0;
452 ihat[i] = -1;
453 }
454 for (i = 0; i < full_off_procNodes; i++)
455 {
456 P_marker_offd[i] = -1;
457 ahat_offd[i] = 0;
458 ihat_offd[i] = -1;
459 }
460
461 /*-----------------------------------------------------------------------
462 * Loop over fine grid points.
463 *-----------------------------------------------------------------------*/
464 for (i = 0; i < n_fine; i++)
465 {
466 jj_begin_row = jj_counter;
467 if(num_procs > 1)
468 jj_begin_row_offd = jj_counter_offd;
469
470 /*--------------------------------------------------------------------
471 * If i is a c-point, interpolation is the identity.
472 *--------------------------------------------------------------------*/
473
474 if (CF_marker[i] >= 0)
475 {
476 P_diag_j[jj_counter] = fine_to_coarse[i];
477 P_diag_data[jj_counter] = one;
478 jj_counter++;
479 }
480
481 /*--------------------------------------------------------------------
482 * If i is an F-point, build interpolation.
483 *--------------------------------------------------------------------*/
484
485 else if (CF_marker[i] != -3)
486 {
487 if (debug_flag==4) wall_time = time_getWallclockSeconds();
488 strong_f_marker--;
489 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
490 {
491 i1 = S_diag_j[jj];
492
493 /*--------------------------------------------------------------
494 * If neighbor i1 is a C-point, set column number in P_diag_j
495 * and initialize interpolation weight to zero.
496 *--------------------------------------------------------------*/
497
498 if (CF_marker[i1] >= 0)
499 {
500 if (P_marker[i1] < jj_begin_row)
501 {
502 P_marker[i1] = jj_counter;
503 P_diag_j[jj_counter] = i1;
504 P_diag_data[jj_counter] = zero;
505 jj_counter++;
506 }
507 }
508 else if (CF_marker[i1] != -3)
509 {
510 P_marker[i1] = strong_f_marker;
511 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
512 {
513 k1 = S_diag_j[kk];
514 if (CF_marker[k1] >= 0)
515 {
516 if(P_marker[k1] < jj_begin_row)
517 {
518 P_marker[k1] = jj_counter;
519 P_diag_j[jj_counter] = k1;
520 P_diag_data[jj_counter] = zero;
521 jj_counter++;
522 }
523 }
524 }
525 if(num_procs > 1)
526 {
527 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
528 {
529 if(col_offd_S_to_A)
530 k1 = col_offd_S_to_A[S_offd_j[kk]];
531 else
532 k1 = S_offd_j[kk];
533 if(CF_marker_offd[k1] >= 0)
534 {
535 if(P_marker_offd[k1] < jj_begin_row_offd)
536 {
537 P_marker_offd[k1] = jj_counter_offd;
538 P_offd_j[jj_counter_offd] = k1;
539 P_offd_data[jj_counter_offd] = zero;
540 jj_counter_offd++;
541 }
542 }
543 }
544 }
545 }
546 }
547
548 if ( num_procs > 1)
549 {
550 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
551 {
552 i1 = S_offd_j[jj];
553 if(col_offd_S_to_A)
554 i1 = col_offd_S_to_A[i1];
555 if ( CF_marker_offd[i1] >= 0)
556 {
557 if(P_marker_offd[i1] < jj_begin_row_offd)
558 {
559 P_marker_offd[i1] = jj_counter_offd;
560 P_offd_j[jj_counter_offd]=i1;
561 P_offd_data[jj_counter_offd] = zero;
562 jj_counter_offd++;
563 }
564 }
565 else if (CF_marker_offd[i1] != -3)
566 {
567 P_marker_offd[i1] = strong_f_marker;
568 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
569 {
570 loc_col = Sop_j[kk];
571 if(loc_col > -1)
572 {
573 if(CF_marker[loc_col] > 0)
574 {
575 if(P_marker[loc_col] < jj_begin_row)
576 {
577 P_marker[loc_col] = jj_counter;
578 P_diag_j[jj_counter] = loc_col;
579 P_diag_data[jj_counter] = zero;
580 jj_counter++;
581 }
582 }
583 }
584 else
585 {
586 loc_col = -loc_col - 1;
587 if(CF_marker_offd[loc_col] > 0)
588 {
589 if(P_marker_offd[loc_col] < jj_begin_row_offd)
590 {
591 P_marker_offd[loc_col] = jj_counter_offd;
592 P_offd_j[jj_counter_offd]=loc_col;
593 P_offd_data[jj_counter_offd] = zero;
594 jj_counter_offd++;
595 }
596 }
597 }
598 }
599 }
600 }
601 }
602
603 jj_end_row = jj_counter;
604 jj_end_row_offd = jj_counter_offd;
605
606 if (debug_flag==4)
607 {
608 wall_time = time_getWallclockSeconds() - wall_time;
609 wall_1 += wall_time;
610 fflush(NULL);
611 }
612 if (debug_flag==4) wall_time = time_getWallclockSeconds();
613 cnt_c = 0;
614 cnt_f = jj_end_row-jj_begin_row;
615 cnt_c_offd = 0;
616 cnt_f_offd = jj_end_row_offd-jj_begin_row_offd;
617 ihat[i] = cnt_f;
618 ipnt[cnt_f] = i;
619 ahat[cnt_f++] = A_diag_data[A_diag_i[i]];
620 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
621 { /* i1 is direct neighbor */
622 i1 = A_diag_j[jj];
623 if (P_marker[i1] != strong_f_marker)
624 {
625 indx = ihat[i1];
626 if (indx > -1)
627 ahat[indx] += A_diag_data[jj];
628 else if (P_marker[i1] >= jj_begin_row)
629 {
630 ihat[i1] = cnt_c;
631 ipnt[cnt_c] = i1;
632 ahat[cnt_c++] += A_diag_data[jj];
633 }
634 else if (CF_marker[i1] != -3)
635 {
636 ihat[i1] = cnt_f;
637 ipnt[cnt_f] = i1;
638 ahat[cnt_f++] += A_diag_data[jj];
639 }
640 }
641 else
642 {
643 if(num_functions == 1 || dof_func[i] == dof_func[i1])
644 {
645 distribute = A_diag_data[jj]/A_diag_data[A_diag_i[i1]];
646 for (kk = A_diag_i[i1]+1; kk < A_diag_i[i1+1]; kk++)
647 {
648 k1 = A_diag_j[kk];
649 indx = ihat[k1];
650 if (indx > -1)
651 ahat[indx] -= A_diag_data[kk]*distribute;
652 else if (P_marker[k1] >= jj_begin_row)
653 {
654 ihat[k1] = cnt_c;
655 ipnt[cnt_c] = k1;
656 ahat[cnt_c++] -= A_diag_data[kk]*distribute;
657 }
658 else
659 {
660 ihat[k1] = cnt_f;
661 ipnt[cnt_f] = k1;
662 ahat[cnt_f++] -= A_diag_data[kk]*distribute;
663 }
664 }
665 if(num_procs > 1)
666 {
667 for (kk = A_offd_i[i1]; kk < A_offd_i[i1+1]; kk++)
668 {
669 k1 = A_offd_j[kk];
670 indx = ihat_offd[k1];
671 if(num_functions == 1 || dof_func[i1] == dof_func_offd[k1])
672 {
673 if (indx > -1)
674 ahat_offd[indx] -= A_offd_data[kk]*distribute;
675 else if (P_marker_offd[k1] >= jj_begin_row_offd)
676 {
677 ihat_offd[k1] = cnt_c_offd;
678 ipnt_offd[cnt_c_offd] = k1;
679 ahat_offd[cnt_c_offd++] -= A_offd_data[kk]*distribute;
680 }
681 else
682 {
683 ihat_offd[k1] = cnt_f_offd;
684 ipnt_offd[cnt_f_offd] = k1;
685 ahat_offd[cnt_f_offd++] -= A_offd_data[kk]*distribute;
686 }
687 }
688 }
689 }
690 }
691 }
692 }
693 if(num_procs > 1)
694 {
695 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
696 {
697 i1 = A_offd_j[jj];
698 if(P_marker_offd[i1] != strong_f_marker)
699 {
700 indx = ihat_offd[i1];
701 if (indx > -1)
702 ahat_offd[indx] += A_offd_data[jj];
703 else if (P_marker_offd[i1] >= jj_begin_row_offd)
704 {
705 ihat_offd[i1] = cnt_c_offd;
706 ipnt_offd[cnt_c_offd] = i1;
707 ahat_offd[cnt_c_offd++] += A_offd_data[jj];
708 }
709 else if (CF_marker_offd[i1] != -3)
710 {
711 ihat_offd[i1] = cnt_f_offd;
712 ipnt_offd[cnt_f_offd] = i1;
713 ahat_offd[cnt_f_offd++] += A_offd_data[jj];
714 }
715 }
716 else
717 {
718 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
719 {
720 distribute = A_offd_data[jj]/A_ext_data[A_ext_i[i1]];
721 for (kk = A_ext_i[i1]+1; kk < A_ext_i[i1+1]; kk++)
722 {
723 loc_col = A_ext_j[kk];
724 if(loc_col > -1)
725 { /*diag*/
726 indx = ihat[loc_col];
727 if (indx > -1)
728 ahat[indx] -= A_ext_data[kk]*distribute;
729 else if (P_marker[loc_col] >= jj_begin_row)
730 {
731 ihat[loc_col] = cnt_c;
732 ipnt[cnt_c] = loc_col;
733 ahat[cnt_c++] -= A_ext_data[kk]*distribute;
734 }
735 else
736 {
737 ihat[loc_col] = cnt_f;
738 ipnt[cnt_f] = loc_col;
739 ahat[cnt_f++] -= A_ext_data[kk]*distribute;
740 }
741 }
742 else
743 {
744 loc_col = - loc_col - 1;
745 if(num_functions == 1 ||
746 dof_func_offd[loc_col] == dof_func_offd[i1])
747 {
748 indx = ihat_offd[loc_col];
749 if (indx > -1)
750 ahat_offd[indx] -= A_ext_data[kk]*distribute;
751 else if(P_marker_offd[loc_col] >= jj_begin_row_offd)
752 {
753 ihat_offd[loc_col] = cnt_c_offd;
754 ipnt_offd[cnt_c_offd] = loc_col;
755 ahat_offd[cnt_c_offd++] -= A_ext_data[kk]*distribute;
756 }
757 else
758 {
759 ihat_offd[loc_col] = cnt_f_offd;
760 ipnt_offd[cnt_f_offd] = loc_col;
761 ahat_offd[cnt_f_offd++] -= A_ext_data[kk]*distribute;
762 }
763 }
764 }
765 }
766 }
767 }
768 }
769 }
770 if (debug_flag==4)
771 {
772 wall_time = time_getWallclockSeconds() - wall_time;
773 wall_2 += wall_time;
774 fflush(NULL);
775 }
776
777 if (debug_flag==4) wall_time = time_getWallclockSeconds();
778 diagonal = ahat[cnt_c];
779 ahat[cnt_c] = 0;
780 sum_pos = 0;
781 sum_pos_C = 0;
782 sum_neg = 0;
783 sum_neg_C = 0;
784 sum = 0;
785 sum_C = 0;
786 if(sep_weight == 1)
787 {
788 for (jj=0; jj < cnt_c; jj++)
789 {
790 if (ahat[jj] > 0)
791 {
792 sum_pos_C += ahat[jj];
793 }
794 else
795 {
796 sum_neg_C += ahat[jj];
797 }
798 }
799 if(num_procs > 1)
800 {
801 for (jj=0; jj < cnt_c_offd; jj++)
802 {
803 if (ahat_offd[jj] > 0)
804 {
805 sum_pos_C += ahat_offd[jj];
806 }
807 else
808 {
809 sum_neg_C += ahat_offd[jj];
810 }
811 }
812 }
813 sum_pos = sum_pos_C;
814 sum_neg = sum_neg_C;
815 for (jj=cnt_c+1; jj < cnt_f; jj++)
816 {
817 if (ahat[jj] > 0)
818 {
819 sum_pos += ahat[jj];
820 }
821 else
822 {
823 sum_neg += ahat[jj];
824 }
825 ahat[jj] = 0;
826 }
827 if(num_procs > 1)
828 {
829 for (jj=cnt_c_offd; jj < cnt_f_offd; jj++)
830 {
831 if (ahat_offd[jj] > 0)
832 {
833 sum_pos += ahat_offd[jj];
834 }
835 else
836 {
837 sum_neg += ahat_offd[jj];
838 }
839 ahat_offd[jj] = 0;
840 }
841 }
842 if (sum_neg_C) alfa = sum_neg/sum_neg_C/diagonal;
843 if (sum_pos_C) beta = sum_pos/sum_pos_C/diagonal;
844
845 /*-----------------------------------------------------------------
846 * Set interpolation weight by dividing by the diagonal.
847 *-----------------------------------------------------------------*/
848
849 for (jj = jj_begin_row; jj < jj_end_row; jj++)
850 {
851 j1 = ihat[P_diag_j[jj]];
852 if (ahat[j1] > 0)
853 P_diag_data[jj] = -beta*ahat[j1];
854 else
855 P_diag_data[jj] = -alfa*ahat[j1];
856
857 P_diag_j[jj] = fine_to_coarse[P_diag_j[jj]];
858 ahat[j1] = 0;
859 }
860 for (jj=0; jj < cnt_f; jj++)
861 ihat[ipnt[jj]] = -1;
862 if(num_procs > 1)
863 {
864 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
865 {
866 j1 = ihat_offd[P_offd_j[jj]];
867 if (ahat_offd[j1] > 0)
868 P_offd_data[jj] = -beta*ahat_offd[j1];
869 else
870 P_offd_data[jj] = -alfa*ahat_offd[j1];
871
872 ahat_offd[j1] = 0;
873 }
874 for (jj=0; jj < cnt_f_offd; jj++)
875 ihat_offd[ipnt_offd[jj]] = -1;
876 }
877 }
878 else
879 {
880 for (jj=0; jj < cnt_c; jj++)
881 {
882 sum_C += ahat[jj];
883 }
884 if(num_procs > 1)
885 {
886 for (jj=0; jj < cnt_c_offd; jj++)
887 {
888 sum_C += ahat_offd[jj];
889 }
890 }
891 sum = sum_C;
892 for (jj=cnt_c+1; jj < cnt_f; jj++)
893 {
894 sum += ahat[jj];
895 ahat[jj] = 0;
896 }
897 if(num_procs > 1)
898 {
899 for (jj=cnt_c_offd; jj < cnt_f_offd; jj++)
900 {
901 sum += ahat_offd[jj];
902 ahat_offd[jj] = 0;
903 }
904 }
905 if (sum_C) alfa = sum/sum_C/diagonal;
906
907 /*-----------------------------------------------------------------
908 * Set interpolation weight by dividing by the diagonal.
909 *-----------------------------------------------------------------*/
910
911 for (jj = jj_begin_row; jj < jj_end_row; jj++)
912 {
913 j1 = ihat[P_diag_j[jj]];
914 P_diag_data[jj] = -alfa*ahat[j1];
915 P_diag_j[jj] = fine_to_coarse[P_diag_j[jj]];
916 ahat[j1] = 0;
917 }
918 for (jj=0; jj < cnt_f; jj++)
919 ihat[ipnt[jj]] = -1;
920 if(num_procs > 1)
921 {
922 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
923 {
924 j1 = ihat_offd[P_offd_j[jj]];
925 P_offd_data[jj] = -alfa*ahat_offd[j1];
926 ahat_offd[j1] = 0;
927 }
928 for (jj=0; jj < cnt_f_offd; jj++)
929 ihat_offd[ipnt_offd[jj]] = -1;
930 }
931 }
932 if (debug_flag==4)
933 {
934 wall_time = time_getWallclockSeconds() - wall_time;
935 wall_3 += wall_time;
936 fflush(NULL);
937 }
938 }
939 }
940
941 if (debug_flag==4)
942 {
943 printf("Proc = %d fill part 1 %f part 2 %f part 3 %f\n",
944 my_id, wall_1, wall_2, wall_3);
945 fflush(NULL);
946 }
947 P = hypre_ParCSRMatrixCreate(comm,
948 hypre_ParCSRMatrixGlobalNumRows(A),
949 total_global_cpts,
950 hypre_ParCSRMatrixColStarts(A),
951 num_cpts_global,
952 0,
953 P_diag_i[n_fine],
954 P_offd_i[n_fine]);
955
956 P_diag = hypre_ParCSRMatrixDiag(P);
957 hypre_CSRMatrixData(P_diag) = P_diag_data;
958 hypre_CSRMatrixI(P_diag) = P_diag_i;
959 hypre_CSRMatrixJ(P_diag) = P_diag_j;
960 P_offd = hypre_ParCSRMatrixOffd(P);
961 hypre_CSRMatrixData(P_offd) = P_offd_data;
962 hypre_CSRMatrixI(P_offd) = P_offd_i;
963 hypre_CSRMatrixJ(P_offd) = P_offd_j;
964 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
965
966 /* Compress P, removing coefficients smaller than trunc_factor * Max */
967 if (trunc_factor != 0.0 || max_elmts > 0)
968 {
969 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
970 P_diag_data = hypre_CSRMatrixData(P_diag);
971 P_diag_i = hypre_CSRMatrixI(P_diag);
972 P_diag_j = hypre_CSRMatrixJ(P_diag);
973 P_offd_data = hypre_CSRMatrixData(P_offd);
974 P_offd_i = hypre_CSRMatrixI(P_offd);
975 P_offd_j = hypre_CSRMatrixJ(P_offd);
976 P_diag_size = P_diag_i[n_fine];
977 P_offd_size = P_offd_i[n_fine];
978 }
979
980 /* This builds col_map, col_map should be monotone increasing and contain
981 * global numbers. */
982 num_cols_P_offd = 0;
983 if(P_offd_size)
984 {
985 num_cols_P_offd = 0;
986 for (i=0; i < P_offd_size; i++)
987 {
988 index = P_offd_j[i];
989 if(tmp_CF_marker_offd[index] == 1)
990 {
991 num_cols_P_offd++;
992 tmp_CF_marker_offd[index] = 2;
993 }
994 }
995
996 if (num_cols_P_offd)
997 {
998 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
999 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
1000 }
1001
1002 index = 0;
1003 for(i = 0; i < num_cols_P_offd; i++)
1004 {
1005 while( tmp_CF_marker_offd[index] == -1) index++;
1006 col_map_offd_P[i] = fine_to_coarse_offd[index];
1007 tmp_map_offd[i] = index++;
1008 }
1009
1010 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
1011
1012 for (i = 0; i < num_cols_P_offd; i++)
1013 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
1014
1015 for(i = 0; i < P_offd_size; i++)
1016 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
1017 /*index = 0;
1018 for(i = 0; i < num_cols_P_offd; i++)
1019 {
1020 while (P_marker[index] == 0) index++;
1021
1022 col_map_offd_P[i] = fine_to_coarse_offd[index];
1023 index++;
1024 }*/
1025
1026 /* Sort the col_map_offd_P and P_offd_j correctly */
1027 /* Check if sort actually changed anything */
1028 /*for(i = 0; i < num_cols_P_offd; i++)
1029 P_marker[i] = col_map_offd_P[i];
1030
1031 if(hypre_ssort(col_map_offd_P,num_cols_P_offd))
1032 {
1033 for(i = 0; i < P_offd_size; i++)
1034 for(j = 0; j < num_cols_P_offd; j++)
1035 if(P_marker[P_offd_j[i]] == col_map_offd_P[j])
1036 {
1037 P_offd_j[i] = j;
1038 j = num_cols_P_offd;
1039 }
1040 }
1041 hypre_TFree(P_marker); */
1042 }
1043
1044 if (num_cols_P_offd)
1045 {
1046 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
1047 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
1048 hypre_TFree(tmp_map_offd);
1049 }
1050
1051#ifdef HYPRE_NO_GLOBAL_PARTITION
1052 hypre_NewCommPkgCreate(P);
1053#else
1054 hypre_MatvecCommPkgCreate(P);
1055#endif
1056
1057 for (i=0; i < n_fine; i++)
1058 if (CF_marker[i] == -3) CF_marker[i] = -1;
1059
1060 *P_ptr = P;
1061
1062 /* Deallocate memory */
1063 hypre_TFree(fine_to_coarse);
1064 hypre_TFree(P_marker);
1065 hypre_TFree(ahat);
1066 hypre_TFree(ihat);
1067 hypre_TFree(ipnt);
1068
1069 if (full_off_procNodes)
1070 {
1071 hypre_TFree(ahat_offd);
1072 hypre_TFree(ihat_offd);
1073 hypre_TFree(ipnt_offd);
1074 }
1075 if (num_procs > 1)
1076 {
1077 hypre_BigCSRMatrixDestroy(Sop);
1078 hypre_BigCSRMatrixDestroy(A_ext);
1079 hypre_TFree(fine_to_coarse_offd);
1080 hypre_TFree(P_marker_offd);
1081 hypre_TFree(CF_marker_offd);
1082 hypre_TFree(tmp_CF_marker_offd);
1083
1084 if(num_functions > 1)
1085 hypre_TFree(dof_func_offd);
1086 hypre_TFree(found);
1087
1088 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
1089
1090 }
1091
1092
1093 return hypre_error_flag;
1094}
1095
1096/*---------------------------------------------------------------------------
1097 * hypre_BoomerAMGBuildExtPIInterp
1098 * Comment:
1099 *--------------------------------------------------------------------------*/
1100int
1101hypre_BoomerAMGBuildExtPIInterp(hypre_ParCSRMatrix *A, int *CF_marker,
1102 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
1103 int num_functions, int *dof_func, int debug_flag,
1104 double trunc_factor, int max_elmts,
1105 int *col_offd_S_to_A,
1106 hypre_ParCSRMatrix **P_ptr)
1107{
1108 /* Communication Variables */
1109 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1110 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1111
1112
1113 int my_id, num_procs;
1114
1115 /* Variables to store input variables */
1116 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1117 double *A_diag_data = hypre_CSRMatrixData(A_diag);
1118 int *A_diag_i = hypre_CSRMatrixI(A_diag);
1119 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
1120
1121 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1122 double *A_offd_data = hypre_CSRMatrixData(A_offd);
1123 int *A_offd_i = hypre_CSRMatrixI(A_offd);
1124 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
1125
1126 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
1127 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
1128 int n_fine = hypre_CSRMatrixNumRows(A_diag);
1129 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
1130 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
1131 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) local_numrows;
1132 HYPRE_BigInt total_global_cpts, my_first_cpt;
1133
1134 /* Variables to store strong connection matrix info */
1135 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
1136 int *S_diag_i = hypre_CSRMatrixI(S_diag);
1137 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
1138
1139 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
1140 int *S_offd_i = hypre_CSRMatrixI(S_offd);
1141 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
1142
1143 /* Interpolation matrix P */
1144 hypre_ParCSRMatrix *P;
1145 hypre_CSRMatrix *P_diag;
1146 hypre_CSRMatrix *P_offd;
1147
1148 double *P_diag_data = NULL;
1149 int *P_diag_i, *P_diag_j = NULL;
1150 double *P_offd_data = NULL;
1151 int *P_offd_i, *P_offd_j = NULL;
1152
1153 HYPRE_BigInt *col_map_offd_P;
1154 int *tmp_map_offd;
1155 int P_diag_size;
1156 int P_offd_size;
1157 int *P_marker = NULL;
1158 int *P_marker_offd = NULL;
1159 int *CF_marker_offd = NULL;
1160 int *tmp_CF_marker_offd = NULL;
1161 int *dof_func_offd = NULL;
1162
1163 /* Full row information for columns of A that are off diag*/
1164 hypre_BigCSRMatrix *A_ext;
1165 double *A_ext_data;
1166 int *A_ext_i;
1167 HYPRE_BigInt *big_A_ext_j;
1168 int *A_ext_j;
1169
1170 int *fine_to_coarse = NULL;
1171 HYPRE_BigInt *fine_to_coarse_offd = NULL;
1172 HYPRE_BigInt *found;
1173
1174 int num_cols_P_offd;
1175 int newoff, loc_col;
1176 int A_ext_rows, full_off_procNodes;
1177
1178 hypre_BigCSRMatrix *Sop;
1179 int *Sop_i;
1180 HYPRE_BigInt *big_Sop_j;
1181 int *Sop_j;
1182
1183 int Soprows, sgn;
1184
1185 /* Variables to keep count of interpolatory points */
1186 int *jj_count, *jj_count_offd;
1187 int jj_counter, jj_counter_offd;
1188 int jj_begin_row, jj_end_row;
1189 int jj_begin_row_offd = 0;
1190 int jj_end_row_offd = 0;
1191 int *coarse_counter, coarse_counter_offd;
1192
1193 /* Interpolation weight variables */
1194 double sum, diagonal, distribute;
1195 int strong_f_marker = -2;
1196
1197 /* Loop variables */
1198 int index;
1199 int start_indexing = 0;
1200 int i, i1, i2, j, jj, kk, k1, jj1;
1201
1202 /* Definitions */
1203 double zero = 0.0;
1204 double one = 1.0;
1205 double wall_time;
1206 int size, rest, ns, ne;
1207 int num_threads;
1208 int coarse_shift;
1209
1210
1211 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
1212
1213 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1214
1215 /* BEGIN */
1216 MPI_Comm_size(comm, &num_procs);
1217 MPI_Comm_rank(comm,&my_id);
1218 num_threads = hypre_NumThreads();
1219
1220
1221#ifdef HYPRE_NO_GLOBAL_PARTITION
1222 my_first_cpt = num_cpts_global[0];
1223 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
1224 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
1225#else
1226 my_first_cpt = num_cpts_global[my_id];
1227 total_global_cpts = num_cpts_global[num_procs];
1228#endif
1229
1230 if (!comm_pkg)
1231 {
1232#ifdef HYPRE_NO_GLOBAL_PARTITION
1233 hypre_NewCommPkgCreate(A);
1234#else
1235 hypre_MatvecCommPkgCreate(A);
1236#endif
1237 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1238 }
1239
1240 /* Set up off processor information (specifically for neighbors of
1241 * neighbors */
1242 newoff = 0;
1243 full_off_procNodes = 0;
1244 if (num_procs > 1)
1245 {
1246 /*----------------------------------------------------------------------
1247 * Get the off processors rows for A and S, associated with columns in
1248 * A_offd and S_offd.
1249 *---------------------------------------------------------------------*/
1250 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
1251 A_ext_i = hypre_BigCSRMatrixI(A_ext);
1252 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
1253 A_ext_data = hypre_BigCSRMatrixData(A_ext);
1254 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
1255
1256 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
1257 Sop_i = hypre_BigCSRMatrixI(Sop);
1258 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
1259 Soprows = hypre_BigCSRMatrixNumRows(Sop);
1260
1261 /* Find nodes that are neighbors of neighbors, not found in offd */
1262 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
1263 &A_ext_j,
1264 Soprows, col_map_offd, col_1, col_n,
1265 Sop_i, big_Sop_j, &Sop_j, CF_marker, comm_pkg);
1266
1267 hypre_BigCSRMatrixJ(A_ext) = NULL;
1268 hypre_BigCSRMatrixJ(Sop) = NULL;
1269
1270 if(newoff >= 0)
1271 full_off_procNodes = newoff + num_cols_A_offd;
1272 else
1273 return(1);
1274
1275 /* Possibly add new points and new processors to the comm_pkg, all
1276 * processors need new_comm_pkg */
1277
1278 /* AHB - create a new comm package just for extended info -
1279 this will work better with the assumed partition*/
1280 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
1281 &extend_comm_pkg);
1282
1283 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
1284
1285 if (num_functions > 1 && full_off_procNodes > 0)
1286 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
1287
1288 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
1289 full_off_procNodes, CF_marker_offd);
1290
1291 if(num_functions > 1)
1292 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
1293 full_off_procNodes, dof_func_offd);
1294 }
1295
1296
1297 /*-----------------------------------------------------------------------
1298 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
1299 *-----------------------------------------------------------------------*/
1300
1301 /*-----------------------------------------------------------------------
1302 * Intialize counters and allocate mapping vector.
1303 *-----------------------------------------------------------------------*/
1304 P_diag_i = hypre_CTAlloc(int, n_fine+1);
1305 P_offd_i = hypre_CTAlloc(int, n_fine+1);
1306
1307 if (n_fine)
1308 {
1309 fine_to_coarse = hypre_CTAlloc(int, n_fine);
1310 }
1311
1312 if (full_off_procNodes)
1313 {
1314 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
1315 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
1316 }
1317
1318 for (i=0; i < full_off_procNodes; i++)
1319 {
1320 tmp_CF_marker_offd[i] = -1;
1321 fine_to_coarse_offd[i] = -1;
1322 }
1323
1324 coarse_counter = hypre_CTAlloc(int, num_threads);
1325 jj_count = hypre_CTAlloc(int, num_threads);
1326 jj_count_offd = hypre_CTAlloc(int, num_threads);
1327
1328 jj_counter = start_indexing;
1329 jj_counter_offd = start_indexing;
1330 coarse_counter_offd = 0;
1331
1332 /*-----------------------------------------------------------------------
1333 * Loop over fine grid.
1334 *-----------------------------------------------------------------------*/
1335#define HYPRE_SMP_PRIVATE i,j,i1,jj,kk,k1,ns,ne,size,rest,P_marker,P_marker_offd, loc_col
1336#include "../utilities/hypre_smp_forloop.h"
1337 for (j = 0; j < num_threads; j++)
1338 {
1339 size = n_fine/num_threads;
1340 rest = n_fine - size*num_threads;
1341 jj_count[j] = 0;
1342 jj_count_offd[j] = 0;
1343 coarse_counter[j] = 0;
1344 if (n_fine)
1345 {
1346 P_marker = hypre_CTAlloc(int, n_fine);
1347 for (i=0; i < n_fine; i++)
1348 P_marker[i] = -1;
1349 }
1350
1351 if (full_off_procNodes)
1352 {
1353 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
1354 for (i=0; i < full_off_procNodes; i++)
1355 P_marker_offd[i] = -1;
1356 }
1357
1358 if (j < rest)
1359 {
1360 ns = j*size+j;
1361 ne = (j+1)*size+j+1;
1362 }
1363 else
1364 {
1365 ns = j*size+rest;
1366 ne = (j+1)*size+rest;
1367 }
1368 for (i = ns; i < ne; i++)
1369 {
1370 P_diag_i[i] = jj_count[j];
1371 if (num_procs > 1)
1372 P_offd_i[i] = jj_count_offd[j];
1373
1374 fine_to_coarse[i] = -1;
1375 if (CF_marker[i] >= 0)
1376 {
1377 jj_count[j]++;
1378 fine_to_coarse[i] = coarse_counter[j];
1379 coarse_counter[j]++;
1380 }
1381
1382 /*--------------------------------------------------------------------
1383 * If i is an F-point, interpolation is from the C-points that
1384 * strongly influence i, or C-points that stronly influence F-points
1385 * that strongly influence i.
1386 *--------------------------------------------------------------------*/
1387 else if (CF_marker[i] != -3)
1388 {
1389 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
1390 {
1391 i1 = S_diag_j[jj];
1392 if (CF_marker[i1] > 0)
1393 { /* i1 is a C point */
1394 if (P_marker[i1] < P_diag_i[i])
1395 {
1396 P_marker[i1] = jj_count[j];
1397 jj_count[j]++;
1398 }
1399 }
1400 else if (CF_marker[i1] != -3)
1401 { /* i1 is a F point, loop through it's strong neighbors */
1402 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
1403 {
1404 k1 = S_diag_j[kk];
1405 if (CF_marker[k1] > 0)
1406 {
1407 if(P_marker[k1] < P_diag_i[i])
1408 {
1409 P_marker[k1] = jj_count[j];
1410 jj_count[j]++;
1411 }
1412 }
1413 }
1414 if(num_procs > 1)
1415 {
1416 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
1417 {
1418 if(col_offd_S_to_A)
1419 k1 = col_offd_S_to_A[S_offd_j[kk]];
1420 else
1421 k1 = S_offd_j[kk];
1422 if (CF_marker_offd[k1] > 0)
1423 {
1424 if(P_marker_offd[k1] < P_offd_i[i])
1425 {
1426 tmp_CF_marker_offd[k1] = 1;
1427 P_marker_offd[k1] = jj_count_offd[j];
1428 jj_count_offd[j]++;
1429 }
1430 }
1431 }
1432 }
1433 }
1434 }
1435 /* Look at off diag strong connections of i */
1436 if (num_procs > 1)
1437 {
1438 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1439 {
1440 i1 = S_offd_j[jj];
1441 if(col_offd_S_to_A)
1442 i1 = col_offd_S_to_A[i1];
1443 if (CF_marker_offd[i1] > 0)
1444 {
1445 if(P_marker_offd[i1] < P_offd_i[i])
1446 {
1447 tmp_CF_marker_offd[i1] = 1;
1448 P_marker_offd[i1] = jj_count_offd[j];
1449 jj_count_offd[j]++;
1450 }
1451 }
1452 else if (CF_marker_offd[i1] != -3)
1453 { /* F point; look at neighbors of i1. Sop contains global col
1454 * numbers and entries that could be in S_diag or S_offd or
1455 * neither. */
1456 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
1457 {
1458 loc_col = Sop_j[kk];
1459 if(loc_col > -1)
1460 { /* In S_diag */
1461 if(CF_marker[loc_col] > 0)
1462 {
1463 if(P_marker[loc_col] < P_diag_i[i])
1464 {
1465 P_marker[loc_col] = jj_count[j];
1466 jj_count[j]++;
1467 }
1468 }
1469 }
1470 else
1471 {
1472 loc_col = -loc_col - 1;
1473 if(CF_marker_offd[loc_col] > 0)
1474 {
1475 if(P_marker_offd[loc_col] < P_offd_i[i])
1476 {
1477 P_marker_offd[loc_col] = jj_count_offd[j];
1478 tmp_CF_marker_offd[loc_col] = 1;
1479 jj_count_offd[j]++;
1480 }
1481 }
1482 }
1483 }
1484 }
1485 }
1486 }
1487 }
1488 }
1489 if (n_fine) hypre_TFree(P_marker);
1490 if (full_off_procNodes) hypre_TFree(P_marker_offd);
1491 }
1492
1493 for (i=0; i < num_threads-1; i++)
1494 {
1495 coarse_counter[i+1] += coarse_counter[i];
1496 jj_count[i+1] += jj_count[i];
1497 jj_count_offd[i+1] += jj_count_offd[i];
1498 }
1499 i = num_threads-1;
1500 jj_counter = jj_count[i];
1501 jj_counter_offd = jj_count_offd[i];
1502
1503 if (debug_flag==4)
1504 {
1505 wall_time = time_getWallclockSeconds() - wall_time;
1506 printf("Proc = %d determine structure %f\n",
1507 my_id, wall_time);
1508 fflush(NULL);
1509 }
1510 /*-----------------------------------------------------------------------
1511 * Allocate arrays.
1512 *-----------------------------------------------------------------------*/
1513
1514 if (debug_flag== 4) wall_time = time_getWallclockSeconds();
1515
1516 P_diag_size = jj_counter;
1517 P_offd_size = jj_counter_offd;
1518 if (P_diag_size)
1519 {
1520 P_diag_j = hypre_CTAlloc(int, P_diag_size);
1521 P_diag_data = hypre_CTAlloc(double, P_diag_size);
1522 }
1523
1524 if (P_offd_size)
1525 {
1526 P_offd_j = hypre_CTAlloc(int, P_offd_size);
1527 P_offd_data = hypre_CTAlloc(double, P_offd_size);
1528 }
1529
1530 P_diag_i[n_fine] = jj_counter;
1531 P_offd_i[n_fine] = jj_counter_offd;
1532
1533 jj_counter = start_indexing;
1534 jj_counter_offd = start_indexing;
1535
1536#define HYPRE_SMP_PRIVATE i,j,ns,ne,size,rest,coarse_shift
1537#include "../utilities/hypre_smp_forloop.h"
1538 for (j = 0; j < num_threads; j++)
1539 {
1540 coarse_shift = 0;
1541 if (j > 0) coarse_shift = coarse_counter[j-1];
1542 size = n_fine/num_threads;
1543 rest = n_fine - size*num_threads;
1544 if (j < rest)
1545 {
1546 ns = j*size+j;
1547 ne = (j+1)*size+j+1;
1548 }
1549 else
1550 {
1551 ns = j*size+rest;
1552 ne = (j+1)*size+rest;
1553 }
1554 for (i = ns; i < ne; i++)
1555 fine_to_coarse[i] += coarse_shift;
1556 }
1557
1558 /* Fine to coarse mapping */
1559 if(num_procs > 1)
1560 {
1561 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
1562 full_off_procNodes, my_first_cpt,
1563 fine_to_coarse_offd);
1564 }
1565
1566 /*-----------------------------------------------------------------------
1567 * Loop over fine grid points.
1568 *-----------------------------------------------------------------------*/
1569#define HYPRE_SMP_PRIVATE i,j,i1,i2,jj,jj1,kk,k1,ns,ne,size,rest,P_marker,P_marker_offd,loc_col,jj_counter,jj_counter_offd,strong_f_marker,sum,sgn,distribute,diagonal,jj_begin_row,jj_end_row,jj_begin_row_offd,jj_end_row_offd
1570#include "../utilities/hypre_smp_forloop.h"
1571 for (j = 0; j < num_threads; j++)
1572 {
1573 size = n_fine/num_threads;
1574 rest = n_fine - size*num_threads;
1575 strong_f_marker = -2;
1576 if (n_fine)
1577 {
1578 P_marker = hypre_CTAlloc(int, n_fine);
1579 for (i=0; i < n_fine; i++)
1580 P_marker[i] = -1;
1581 }
1582
1583 jj_counter = 0;
1584 if (j > 0) jj_counter = jj_count[j-1];
1585 jj_counter_offd = 0;
1586 if (j > 0) jj_counter_offd = jj_count_offd[j-1];
1587
1588 if (full_off_procNodes)
1589 {
1590 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
1591 for (i=0; i < full_off_procNodes; i++)
1592 P_marker_offd[i] = -1;
1593 }
1594
1595 if (j < rest)
1596 {
1597 ns = j*size+j;
1598 ne = (j+1)*size+j+1;
1599 }
1600 else
1601 {
1602 ns = j*size+rest;
1603 ne = (j+1)*size+rest;
1604 }
1605 for (i = ns; i < ne; i++)
1606 {
1607 /*for (i = 0; i < n_fine; i++)
1608 {*/
1609 jj_begin_row = jj_counter;
1610 jj_begin_row_offd = jj_counter_offd;
1611 P_diag_i[i] = jj_counter;
1612 if (num_procs > 1)
1613 P_offd_i[i] = jj_counter_offd;
1614
1615 /*--------------------------------------------------------------------
1616 * If i is a c-point, interpolation is the identity.
1617 *--------------------------------------------------------------------*/
1618
1619 if (CF_marker[i] >= 0)
1620 {
1621 P_diag_j[jj_counter] = fine_to_coarse[i];
1622 P_diag_data[jj_counter] = one;
1623 jj_counter++;
1624 }
1625
1626 /*--------------------------------------------------------------------
1627 * If i is an F-point, build interpolation.
1628 *--------------------------------------------------------------------*/
1629
1630 else if (CF_marker[i] != -3)
1631 {
1632 strong_f_marker--;
1633 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
1634 {
1635 i1 = S_diag_j[jj];
1636
1637 /*--------------------------------------------------------------
1638 * If neighbor i1 is a C-point, set column number in P_diag_j
1639 * and initialize interpolation weight to zero.
1640 *--------------------------------------------------------------*/
1641
1642 if (CF_marker[i1] >= 0)
1643 {
1644 if (P_marker[i1] < jj_begin_row)
1645 {
1646 P_marker[i1] = jj_counter;
1647 P_diag_j[jj_counter] = fine_to_coarse[i1];
1648 P_diag_data[jj_counter] = zero;
1649 jj_counter++;
1650 }
1651 }
1652 else if (CF_marker[i1] != -3)
1653 {
1654 P_marker[i1] = strong_f_marker;
1655 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
1656 {
1657 k1 = S_diag_j[kk];
1658 if (CF_marker[k1] > 0)
1659 {
1660 if(P_marker[k1] < jj_begin_row)
1661 {
1662 P_marker[k1] = jj_counter;
1663 P_diag_j[jj_counter] = fine_to_coarse[k1];
1664 P_diag_data[jj_counter] = zero;
1665 jj_counter++;
1666 }
1667 }
1668 }
1669 if(num_procs > 1)
1670 {
1671 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
1672 {
1673 if(col_offd_S_to_A)
1674 k1 = col_offd_S_to_A[S_offd_j[kk]];
1675 else
1676 k1 = S_offd_j[kk];
1677 if(CF_marker_offd[k1] > 0)
1678 {
1679 if(P_marker_offd[k1] < jj_begin_row_offd)
1680 {
1681 P_marker_offd[k1] = jj_counter_offd;
1682 P_offd_j[jj_counter_offd] = k1;
1683 P_offd_data[jj_counter_offd] = zero;
1684 jj_counter_offd++;
1685 }
1686 }
1687 }
1688 }
1689 }
1690 }
1691
1692 if ( num_procs > 1)
1693 {
1694 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1695 {
1696 i1 = S_offd_j[jj];
1697 if(col_offd_S_to_A)
1698 i1 = col_offd_S_to_A[i1];
1699 if ( CF_marker_offd[i1] > 0)
1700 {
1701 if(P_marker_offd[i1] < jj_begin_row_offd)
1702 {
1703 P_marker_offd[i1] = jj_counter_offd;
1704 P_offd_j[jj_counter_offd] = i1;
1705 P_offd_data[jj_counter_offd] = zero;
1706 jj_counter_offd++;
1707 }
1708 }
1709 else if (CF_marker_offd[i1] != -3)
1710 {
1711 P_marker_offd[i1] = strong_f_marker;
1712 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
1713 {
1714 loc_col = Sop_j[kk];
1715 if(loc_col > -1)
1716 {
1717 if(CF_marker[loc_col] > 0)
1718 {
1719 if(P_marker[loc_col] < jj_begin_row)
1720 {
1721 P_marker[loc_col] = jj_counter;
1722 P_diag_j[jj_counter] = fine_to_coarse[loc_col];
1723 P_diag_data[jj_counter] = zero;
1724 jj_counter++;
1725 }
1726 }
1727 }
1728 else
1729 {
1730 loc_col = - loc_col - 1;
1731 if(CF_marker_offd[loc_col] > 0)
1732 {
1733 if(P_marker_offd[loc_col] < jj_begin_row_offd)
1734 {
1735 P_marker_offd[loc_col] = jj_counter_offd;
1736 P_offd_j[jj_counter_offd]=loc_col;
1737 P_offd_data[jj_counter_offd] = zero;
1738 jj_counter_offd++;
1739 }
1740 }
1741 }
1742 }
1743 }
1744 }
1745 }
1746
1747 jj_end_row = jj_counter;
1748 jj_end_row_offd = jj_counter_offd;
1749
1750 diagonal = A_diag_data[A_diag_i[i]];
1751
1752 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1753 { /* i1 is a c-point and strongly influences i, accumulate
1754 * a_(i,i1) into interpolation weight */
1755 i1 = A_diag_j[jj];
1756 if (P_marker[i1] >= jj_begin_row)
1757 {
1758 P_diag_data[P_marker[i1]] += A_diag_data[jj];
1759 }
1760 else if(P_marker[i1] == strong_f_marker)
1761 {
1762 sum = zero;
1763 sgn = 1;
1764 if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
1765 /* Loop over row of A for point i1 and calculate the sum
1766 * of the connections to c-points that strongly incluence i. */
1767 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
1768 {
1769 i2 = A_diag_j[jj1];
1770 if((P_marker[i2] >= jj_begin_row || i2 == i) && (sgn*A_diag_data[jj1]) < 0)
1771 sum += A_diag_data[jj1];
1772 }
1773 if(num_procs > 1)
1774 {
1775 for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++)
1776 {
1777 i2 = A_offd_j[jj1];
1778 if(P_marker_offd[i2] >= jj_begin_row_offd &&
1779 (sgn*A_offd_data[jj1]) < 0)
1780 sum += A_offd_data[jj1];
1781 }
1782 }
1783 if(sum != 0)
1784 {
1785 distribute = A_diag_data[jj]/sum;
1786 /* Loop over row of A for point i1 and do the distribution */
1787 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
1788 {
1789 i2 = A_diag_j[jj1];
1790 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
1791 P_diag_data[P_marker[i2]] +=
1792 distribute*A_diag_data[jj1];
1793 if(i2 == i && (sgn*A_diag_data[jj1]) < 0)
1794 diagonal += distribute*A_diag_data[jj1];
1795 }
1796 if(num_procs > 1)
1797 {
1798 for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
1799 {
1800 i2 = A_offd_j[jj1];
1801 if(P_marker_offd[i2] >= jj_begin_row_offd &&
1802 (sgn*A_offd_data[jj1]) < 0)
1803 P_offd_data[P_marker_offd[i2]] +=
1804 distribute*A_offd_data[jj1];
1805 }
1806 }
1807 }
1808 else
1809 {
1810 diagonal += A_diag_data[jj];
1811 }
1812 }
1813 /* neighbor i1 weakly influences i, accumulate a_(i,i1) into
1814 * diagonal */
1815 else if (CF_marker[i1] != -3)
1816 {
1817 if(num_functions == 1 || dof_func[i] == dof_func[i1])
1818 diagonal += A_diag_data[jj];
1819 }
1820 }
1821 if(num_procs > 1)
1822 {
1823 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1824 {
1825 i1 = A_offd_j[jj];
1826 if(P_marker_offd[i1] >= jj_begin_row_offd)
1827 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
1828 else if(P_marker_offd[i1] == strong_f_marker)
1829 {
1830 sum = zero;
1831 sgn = 1;
1832 if(A_ext_data[A_ext_i[i1]] < 0) sgn = -1;
1833 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
1834 {
1835 loc_col = A_ext_j[jj1];
1836 if(loc_col > -1)
1837 { /* diag */
1838 if((P_marker[loc_col] >= jj_begin_row || loc_col == i)
1839 && (sgn*A_ext_data[jj1]) < 0)
1840 sum += A_ext_data[jj1];
1841 }
1842 else
1843 {
1844 loc_col = - loc_col - 1;
1845 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
1846 (sgn*A_ext_data[jj1]) < 0)
1847 sum += A_ext_data[jj1];
1848 }
1849 }
1850 if(sum != 0)
1851 {
1852 distribute = A_offd_data[jj] / sum;
1853 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
1854 {
1855 loc_col = A_ext_j[jj1];
1856 if(loc_col > -1)
1857 { /* diag */
1858 if(P_marker[loc_col] >= jj_begin_row &&
1859 (sgn*A_ext_data[jj1]) < 0)
1860 P_diag_data[P_marker[loc_col]] += distribute*
1861 A_ext_data[jj1];
1862 if(loc_col == i && (sgn*A_ext_data[jj1]) < 0)
1863 diagonal += distribute*A_ext_data[jj1];
1864 }
1865 else
1866 {
1867 loc_col = - loc_col - 1;
1868 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
1869 (sgn*A_ext_data[jj1]) < 0)
1870 P_offd_data[P_marker_offd[loc_col]] += distribute*
1871 A_ext_data[jj1];
1872 }
1873 }
1874 }
1875 else
1876 {
1877 diagonal += A_offd_data[jj];
1878 }
1879 }
1880 else if (CF_marker_offd[i1] != -3)
1881 {
1882 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
1883 diagonal += A_offd_data[jj];
1884 }
1885 }
1886 }
1887 for(jj = jj_begin_row; jj < jj_end_row; jj++)
1888 P_diag_data[jj] /= -diagonal;
1889 for(jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
1890 P_offd_data[jj] /= -diagonal;
1891 }
1892 strong_f_marker--;
1893 }
1894 if (n_fine) hypre_TFree(P_marker);
1895 if (full_off_procNodes) hypre_TFree(P_marker_offd);
1896 }
1897
1898 if (debug_flag==4)
1899 {
1900 wall_time = time_getWallclockSeconds() - wall_time;
1901 printf("Proc = %d fill structure %f\n",
1902 my_id, wall_time);
1903 fflush(NULL);
1904 }
1905 /*-----------------------------------------------------------------------
1906 * Allocate arrays.
1907 *-----------------------------------------------------------------------*/
1908
1909 P = hypre_ParCSRMatrixCreate(comm,
1910 hypre_ParCSRMatrixGlobalNumRows(A),
1911 total_global_cpts,
1912 hypre_ParCSRMatrixColStarts(A),
1913 num_cpts_global,
1914 0,
1915 P_diag_i[n_fine],
1916 P_offd_i[n_fine]);
1917
1918 P_diag = hypre_ParCSRMatrixDiag(P);
1919 hypre_CSRMatrixData(P_diag) = P_diag_data;
1920 hypre_CSRMatrixI(P_diag) = P_diag_i;
1921 hypre_CSRMatrixJ(P_diag) = P_diag_j;
1922 P_offd = hypre_ParCSRMatrixOffd(P);
1923 hypre_CSRMatrixData(P_offd) = P_offd_data;
1924 hypre_CSRMatrixI(P_offd) = P_offd_i;
1925 hypre_CSRMatrixJ(P_offd) = P_offd_j;
1926 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
1927
1928 /* Compress P, removing coefficients smaller than trunc_factor * Max */
1929 if (trunc_factor != 0.0 || max_elmts > 0)
1930 {
1931 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
1932 P_diag_data = hypre_CSRMatrixData(P_diag);
1933 P_diag_i = hypre_CSRMatrixI(P_diag);
1934 P_diag_j = hypre_CSRMatrixJ(P_diag);
1935 P_offd_data = hypre_CSRMatrixData(P_offd);
1936 P_offd_i = hypre_CSRMatrixI(P_offd);
1937 P_offd_j = hypre_CSRMatrixJ(P_offd);
1938 P_diag_size = P_diag_i[n_fine];
1939 P_offd_size = P_offd_i[n_fine];
1940 }
1941
1942 /* This builds col_map, col_map should be monotone increasing and contain
1943 * global numbers. */
1944 num_cols_P_offd = 0;
1945 if(P_offd_size)
1946 {
1947 num_cols_P_offd = 0;
1948 for (i=0; i < P_offd_size; i++)
1949 {
1950 index = P_offd_j[i];
1951 if (tmp_CF_marker_offd[index] == 1)
1952 {
1953 num_cols_P_offd++;
1954 tmp_CF_marker_offd[index] = 2;
1955 }
1956 }
1957
1958 if (num_cols_P_offd)
1959 {
1960 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
1961 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
1962 }
1963
1964 index = 0;
1965 for(i = 0; i < num_cols_P_offd; i++)
1966 {
1967 while( tmp_CF_marker_offd[index] != 2) index++;
1968 /*printf("index %d tmp_CF %d\n", index, tmp_CF_marker_offd[index]);*/
1969 col_map_offd_P[i] = fine_to_coarse_offd[index];
1970 tmp_map_offd[i] = index++;
1971 }
1972
1973 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
1974
1975 for (i = 0; i < num_cols_P_offd; i++)
1976 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
1977
1978 for(i = 0; i < P_offd_size; i++)
1979 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
1980 }
1981
1982 if (num_cols_P_offd)
1983 {
1984 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
1985 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
1986 hypre_TFree(tmp_map_offd);
1987 }
1988
1989#ifdef HYPRE_NO_GLOBAL_PARTITION
1990 hypre_NewCommPkgCreate(P);
1991#else
1992 hypre_MatvecCommPkgCreate(P);
1993#endif
1994
1995 for (i=0; i < n_fine; i++)
1996 if (CF_marker[i] == -3) CF_marker[i] = -1;
1997
1998 *P_ptr = P;
1999
2000 /* Deallocate memory */
2001 hypre_TFree(fine_to_coarse);
2002 hypre_TFree(jj_count);
2003 hypre_TFree(jj_count_offd);
2004 hypre_TFree(coarse_counter);
2005
2006 if (num_procs > 1)
2007 {
2008 hypre_BigCSRMatrixDestroy(Sop);
2009 hypre_BigCSRMatrixDestroy(A_ext);
2010 hypre_TFree(fine_to_coarse_offd);
2011 hypre_TFree(CF_marker_offd);
2012 hypre_TFree(tmp_CF_marker_offd);
2013 if(num_functions > 1)
2014 hypre_TFree(dof_func_offd);
2015 hypre_TFree(found);
2016
2017
2018 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
2019
2020
2021 }
2022
2023 return hypre_error_flag;
2024}
2025
2026/*---------------------------------------------------------------------------
2027 * hypre_BoomerAMGBuildExtPICCInterp
2028 * Comment: Only use FF when there is no common c point.
2029 *--------------------------------------------------------------------------*/
2030int
2031hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, int *CF_marker,
2032 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
2033 int num_functions, int *dof_func, int debug_flag,
2034 double trunc_factor, int max_elmts,
2035 int *col_offd_S_to_A,
2036 hypre_ParCSRMatrix **P_ptr)
2037{
2038 /* Communication Variables */
2039 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
2040 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2041
2042
2043 int my_id, num_procs;
2044
2045 /* Variables to store input variables */
2046 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2047 double *A_diag_data = hypre_CSRMatrixData(A_diag);
2048 int *A_diag_i = hypre_CSRMatrixI(A_diag);
2049 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2050
2051 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2052 double *A_offd_data = hypre_CSRMatrixData(A_offd);
2053 int *A_offd_i = hypre_CSRMatrixI(A_offd);
2054 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2055
2056 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
2057 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
2058 int n_fine = hypre_CSRMatrixNumRows(A_diag);
2059 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
2060 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
2061 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) local_numrows;
2062 HYPRE_BigInt total_global_cpts, my_first_cpt;
2063
2064 /* Variables to store strong connection matrix info */
2065 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
2066 int *S_diag_i = hypre_CSRMatrixI(S_diag);
2067 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
2068
2069 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
2070 int *S_offd_i = hypre_CSRMatrixI(S_offd);
2071 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
2072
2073 /* Interpolation matrix P */
2074 hypre_ParCSRMatrix *P;
2075 hypre_CSRMatrix *P_diag;
2076 hypre_CSRMatrix *P_offd;
2077
2078 double *P_diag_data = NULL;
2079 int *P_diag_i, *P_diag_j = NULL;
2080 double *P_offd_data = NULL;
2081 int *P_offd_i, *P_offd_j = NULL;
2082
2083 HYPRE_BigInt *col_map_offd_P;
2084 int *tmp_map_offd;
2085 int P_diag_size;
2086 int P_offd_size;
2087 int *P_marker = NULL;
2088 int *P_marker_offd = NULL;
2089 int *CF_marker_offd = NULL;
2090 int *tmp_CF_marker_offd = NULL;
2091 int *dof_func_offd = NULL;
2092 /*int **ext_p, **ext_p_offd;*/
2093 int ccounter_offd;
2094 /*int *clist_offd;*/
2095 int common_c;
2096
2097 /* Full row information for columns of A that are off diag*/
2098 hypre_BigCSRMatrix *A_ext;
2099 double *A_ext_data;
2100 int *A_ext_i;
2101 HYPRE_BigInt *big_A_ext_j;
2102 int *A_ext_j;
2103
2104 int *fine_to_coarse = NULL;
2105 HYPRE_BigInt *fine_to_coarse_offd = NULL;
2106 HYPRE_BigInt *found;
2107
2108 int num_cols_P_offd;
2109 int newoff, loc_col;
2110 int A_ext_rows, full_off_procNodes;
2111
2112 hypre_BigCSRMatrix *Sop;
2113 int *Sop_i;
2114 HYPRE_BigInt *big_Sop_j;
2115 int *Sop_j;
2116
2117 int Soprows, sgn;
2118
2119 /* Variables to keep count of interpolatory points */
2120 int jj_counter, jj_counter_offd;
2121 int jj_begin_row, jj_end_row;
2122 int jj_begin_row_offd = 0;
2123 int jj_end_row_offd = 0;
2124 int coarse_counter, coarse_counter_offd;
2125
2126 /* Interpolation weight variables */
2127 double sum, diagonal, distribute;
2128 int strong_f_marker = -2;
2129
2130 /* Loop variables */
2131 int index;
2132 int start_indexing = 0;
2133 int i, i1, i2, jj, kk, k1, jj1;
2134 int ccounter;
2135 /*int *clist, ccounter;*/
2136
2137 /* Definitions */
2138 double zero = 0.0;
2139 double one = 1.0;
2140
2141 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
2142
2143 /* BEGIN */
2144 MPI_Comm_size(comm, &num_procs);
2145 MPI_Comm_rank(comm,&my_id);
2146
2147#ifdef HYPRE_NO_GLOBAL_PARTITION
2148 my_first_cpt = num_cpts_global[0];
2149 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
2150 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
2151#else
2152 my_first_cpt = num_cpts_global[my_id];
2153 total_global_cpts = num_cpts_global[num_procs];
2154#endif
2155
2156 if (!comm_pkg)
2157 {
2158#ifdef HYPRE_NO_GLOBAL_PARTITION
2159 hypre_NewCommPkgCreate(A);
2160#else
2161 hypre_MatvecCommPkgCreate(A);
2162#endif
2163 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2164 }
2165
2166 /* Set up off processor information (specifically for neighbors of
2167 * neighbors */
2168 newoff = 0;
2169 full_off_procNodes = 0;
2170 if (num_procs > 1)
2171 {
2172 /*----------------------------------------------------------------------
2173 * Get the off processors rows for A and S, associated with columns in
2174 * A_offd and S_offd.
2175 *---------------------------------------------------------------------*/
2176 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
2177 A_ext_i = hypre_BigCSRMatrixI(A_ext);
2178 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
2179 A_ext_data = hypre_BigCSRMatrixData(A_ext);
2180 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
2181
2182 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
2183 Sop_i = hypre_BigCSRMatrixI(Sop);
2184 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
2185 Soprows = hypre_BigCSRMatrixNumRows(Sop);
2186
2187 /* Find nodes that are neighbors of neighbors, not found in offd */
2188 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
2189 &A_ext_j,
2190 Soprows, col_map_offd, col_1, col_n,
2191 Sop_i, big_Sop_j, &Sop_j, CF_marker, comm_pkg);
2192
2193 hypre_BigCSRMatrixJ(A_ext) = NULL;
2194 hypre_BigCSRMatrixJ(Sop) = NULL;
2195
2196 if(newoff >= 0)
2197 full_off_procNodes = newoff + num_cols_A_offd;
2198 else
2199 return(1);
2200
2201 /* Possibly add new points and new processors to the comm_pkg, all
2202 * processors need new_comm_pkg */
2203
2204 /* AHB - create a new comm package just for extended info -
2205 this will work better with the assumed partition*/
2206 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
2207 &extend_comm_pkg);
2208
2209 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
2210
2211 if (num_functions > 1 && full_off_procNodes > 0)
2212 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
2213
2214 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
2215 full_off_procNodes, CF_marker_offd);
2216
2217 if(num_functions > 1)
2218 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
2219 full_off_procNodes, dof_func_offd);
2220
2221
2222 }
2223
2224 /*-----------------------------------------------------------------------
2225 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
2226 *-----------------------------------------------------------------------*/
2227
2228 /*-----------------------------------------------------------------------
2229 * Intialize counters and allocate mapping vector.
2230 *-----------------------------------------------------------------------*/
2231 P_diag_i = hypre_CTAlloc(int, n_fine+1);
2232 P_offd_i = hypre_CTAlloc(int, n_fine+1);
2233
2234 if (n_fine)
2235 {
2236 fine_to_coarse = hypre_CTAlloc(int, n_fine);
2237 P_marker = hypre_CTAlloc(int, n_fine);
2238 }
2239
2240 if (full_off_procNodes)
2241 {
2242 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
2243 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
2244 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
2245 }
2246
2247 for (i=0; i < full_off_procNodes; i++)
2248 {
2249 P_marker_offd[i] = -1;
2250 tmp_CF_marker_offd[i] = -1;
2251 fine_to_coarse_offd[i] = -1;
2252 }
2253
2254 for (i=0; i < n_fine; i++)
2255 {
2256 P_marker[i] = -1;
2257 fine_to_coarse[i] = -1;
2258 }
2259
2260 jj_counter = start_indexing;
2261 jj_counter_offd = start_indexing;
2262 coarse_counter = 0;
2263 coarse_counter_offd = 0;
2264
2265 /*-----------------------------------------------------------------------
2266 * Loop over fine grid.
2267 *-----------------------------------------------------------------------*/
2268 for (i = 0; i < n_fine; i++)
2269 {
2270 P_diag_i[i] = jj_counter;
2271 if (num_procs > 1)
2272 P_offd_i[i] = jj_counter_offd;
2273
2274 if (CF_marker[i] > 0)
2275 {
2276 jj_counter++;
2277 fine_to_coarse[i] = coarse_counter;
2278 coarse_counter++;
2279 }
2280
2281 /*--------------------------------------------------------------------
2282 * If i is an F-point, interpolation is from the C-points that
2283 * strongly influence i, or C-points that stronly influence F-points
2284 * that strongly influence i.
2285 *--------------------------------------------------------------------*/
2286 else if (CF_marker[i] != -3)
2287 {
2288 /* Initialize ccounter for each f point */
2289 ccounter = 0;
2290 ccounter_offd = 0;
2291 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2292 { /* search through diag to find all c neighbors */
2293 i1 = S_diag_j[jj];
2294 if (CF_marker[i1] > 0)
2295 { /* i1 is a C point */
2296 CF_marker[i1] = 2;
2297 if (P_marker[i1] < P_diag_i[i])
2298 {
2299 /*clist[ccounter++] = i1;*/
2300 P_marker[i1] = jj_counter;
2301 jj_counter++;
2302 }
2303 }
2304 }
2305 /*qsort0(clist,0,ccounter-1);*/
2306 if(num_procs > 1)
2307 {
2308 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2309 { /* search through offd to find all c neighbors */
2310 if(col_offd_S_to_A)
2311 i1 = col_offd_S_to_A[S_offd_j[jj]];
2312 else
2313 i1 = S_offd_j[jj];
2314 if(CF_marker_offd[i1] > 0)
2315 { /* i1 is a C point direct neighbor */
2316 CF_marker_offd[i1] = 2;
2317 if(P_marker_offd[i1] < P_offd_i[i])
2318 {
2319 /*clist_offd[ccounter_offd++] = i1;*/
2320 tmp_CF_marker_offd[i1] = 1;
2321 P_marker_offd[i1] = jj_counter_offd;
2322 jj_counter_offd++;
2323 }
2324 }
2325 }
2326 /*qsort0(clist_offd,0,ccounter_offd-1);*/
2327 }
2328 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2329 { /* Search diag to find f neighbors and determine if common c point */
2330 i1 = S_diag_j[jj];
2331 if (CF_marker[i1] == -1)
2332 { /* i1 is a F point, loop through it's strong neighbors */
2333 common_c = 0;
2334 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
2335 {
2336 k1 = S_diag_j[kk];
2337 if (CF_marker[k1] == 2)
2338 {
2339 /*if(hypre_BinarySearch(clist,k1,ccounter) >= 0)
2340 {*/
2341 common_c = 1;
2342 break;
2343 /*kk = S_diag_i[i1+1];
2344 }*/
2345 }
2346 }
2347 if(num_procs > 1 && common_c == 0)
2348 { /* no common c point yet, check offd */
2349 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
2350 {
2351 if(col_offd_S_to_A)
2352 k1 = col_offd_S_to_A[S_offd_j[kk]];
2353 else
2354 k1 = S_offd_j[kk];
2355
2356 if (CF_marker_offd[k1] == 2)
2357 { /* k1 is a c point check if it is common */
2358 /*if(hypre_BinarySearch(clist_offd,k1,ccounter_offd) >= 0)
2359 {*/
2360 common_c = 1;
2361 break;
2362 /*kk = S_offd_i[i1+1];
2363 }*/
2364 }
2365 }
2366 }
2367 if(!common_c)
2368 { /* No common c point, extend the interp set */
2369 for(kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
2370 {
2371 k1 = S_diag_j[kk];
2372 if(CF_marker[k1] > 0)
2373 {
2374 if(P_marker[k1] < P_diag_i[i])
2375 {
2376 P_marker[k1] = jj_counter;
2377 jj_counter++;
2378 /*break;*/
2379 }
2380 }
2381 }
2382 if(num_procs > 1)
2383 {
2384 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
2385 {
2386 if(col_offd_S_to_A)
2387 k1 = col_offd_S_to_A[S_offd_j[kk]];
2388 else
2389 k1 = S_offd_j[kk];
2390 if (CF_marker_offd[k1] > 0)
2391 {
2392 if(P_marker_offd[k1] < P_offd_i[i])
2393 {
2394 tmp_CF_marker_offd[k1] = 1;
2395 P_marker_offd[k1] = jj_counter_offd;
2396 jj_counter_offd++;
2397 /*break;*/
2398 }
2399 }
2400 }
2401 }
2402 }
2403 }
2404 }
2405 /* Look at off diag strong connections of i */
2406 if (num_procs > 1)
2407 {
2408 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2409 {
2410 i1 = S_offd_j[jj];
2411 if(col_offd_S_to_A)
2412 i1 = col_offd_S_to_A[i1];
2413 if (CF_marker_offd[i1] == -1)
2414 { /* F point; look at neighbors of i1. Sop contains global col
2415 * numbers and entries that could be in S_diag or S_offd or
2416 * neither. */
2417 common_c = 0;
2418 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
2419 { /* Check if common c */
2420 loc_col = Sop_j[kk];
2421 if(loc_col > -1)
2422 { /* In S_diag */
2423 if(CF_marker[loc_col] == 2)
2424 {
2425 common_c = 1;
2426 break;
2427 }
2428 }
2429 else
2430 {
2431 loc_col = -loc_col - 1;
2432 if(CF_marker_offd[loc_col] == 2)
2433 {
2434 common_c = 1;
2435 break;
2436 }
2437 }
2438 }
2439 if(!common_c)
2440 {
2441 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
2442 { /* Check if common c */
2443 loc_col = Sop_j[kk];
2444 if(loc_col > -1)
2445 { /* In S_diag */
2446 if(CF_marker[loc_col] > 0)
2447 {
2448 if(P_marker[loc_col] < P_diag_i[i])
2449 {
2450 P_marker[loc_col] = jj_counter;
2451 jj_counter++;
2452 }
2453 }
2454 }
2455 else
2456 {
2457 loc_col = - loc_col - 1;
2458 if(CF_marker_offd[loc_col] > 0)
2459 {
2460 if(P_marker_offd[loc_col] < P_offd_i[i])
2461 {
2462 P_marker_offd[loc_col] = jj_counter_offd;
2463 tmp_CF_marker_offd[loc_col] = 1;
2464 jj_counter_offd++;
2465 }
2466 }
2467 }
2468 }
2469 }
2470 }
2471 }
2472 }
2473 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2474 { /* search through diag to find all c neighbors */
2475 i1 = S_diag_j[jj];
2476 if (CF_marker[i1] == 2)
2477 CF_marker[i1] = 1;
2478 }
2479 if(num_procs > 1)
2480 {
2481 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2482 { /* search through offd to find all c neighbors */
2483 if(col_offd_S_to_A)
2484 i1 = col_offd_S_to_A[S_offd_j[jj]];
2485 else
2486 i1 = S_offd_j[jj];
2487 if(CF_marker_offd[i1] == 2)
2488 { /* i1 is a C point direct neighbor */
2489 CF_marker_offd[i1] = 1;
2490 }
2491 }
2492 }
2493 }
2494 }
2495
2496 /*-----------------------------------------------------------------------
2497 * Allocate arrays.
2498 *-----------------------------------------------------------------------*/
2499
2500 P_diag_size = jj_counter;
2501 P_offd_size = jj_counter_offd;
2502 if (P_diag_size)
2503 {
2504 P_diag_j = hypre_CTAlloc(int, P_diag_size);
2505 P_diag_data = hypre_CTAlloc(double, P_diag_size);
2506 }
2507
2508 if (P_offd_size)
2509 {
2510 P_offd_j = hypre_CTAlloc(int, P_offd_size);
2511 P_offd_data = hypre_CTAlloc(double, P_offd_size);
2512 }
2513 P_diag_i[n_fine] = jj_counter;
2514 P_offd_i[n_fine] = jj_counter_offd;
2515
2516 jj_counter = start_indexing;
2517 jj_counter_offd = start_indexing;
2518 ccounter = start_indexing;
2519 ccounter_offd = start_indexing;
2520
2521 /* Fine to coarse mapping */
2522 if(num_procs > 1)
2523 {
2524 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
2525 full_off_procNodes, my_first_cpt,
2526 fine_to_coarse_offd);
2527 }
2528
2529 for (i = 0; i < n_fine; i++)
2530 P_marker[i] = -1;
2531
2532 for (i = 0; i < full_off_procNodes; i++)
2533 P_marker_offd[i] = -1;
2534
2535 /*-----------------------------------------------------------------------
2536 * Loop over fine grid points.
2537 *-----------------------------------------------------------------------*/
2538 for (i = 0; i < n_fine; i++)
2539 {
2540 jj_begin_row = jj_counter;
2541 if(num_procs > 1)
2542 jj_begin_row_offd = jj_counter_offd;
2543
2544 /*--------------------------------------------------------------------
2545 * If i is a c-point, interpolation is the identity.
2546 *--------------------------------------------------------------------*/
2547
2548 if (CF_marker[i] >= 0)
2549 {
2550 P_diag_j[jj_counter] = fine_to_coarse[i];
2551 P_diag_data[jj_counter] = one;
2552 jj_counter++;
2553 }
2554
2555 /*--------------------------------------------------------------------
2556 * If i is an F-point, build interpolation.
2557 *--------------------------------------------------------------------*/
2558
2559 else if (CF_marker[i] != -3)
2560 {
2561 ccounter = 0;
2562 ccounter_offd = 0;
2563 strong_f_marker--;
2564
2565 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2566 { /* Search C points only */
2567 i1 = S_diag_j[jj];
2568
2569 /*--------------------------------------------------------------
2570 * If neighbor i1 is a C-point, set column number in P_diag_j
2571 * and initialize interpolation weight to zero.
2572 *--------------------------------------------------------------*/
2573
2574 if (CF_marker[i1] > 0)
2575 {
2576 CF_marker[i1] = 2;
2577 if (P_marker[i1] < jj_begin_row)
2578 {
2579 P_marker[i1] = jj_counter;
2580 P_diag_j[jj_counter] = fine_to_coarse[i1];
2581 P_diag_data[jj_counter] = zero;
2582 jj_counter++;
2583 }
2584 }
2585 }
2586 if ( num_procs > 1)
2587 {
2588 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2589 {
2590 if(col_offd_S_to_A)
2591 i1 = col_offd_S_to_A[S_offd_j[jj]];
2592 else
2593 i1 = S_offd_j[jj];
2594 if ( CF_marker_offd[i1] > 0)
2595 {
2596 CF_marker_offd[i1] = 2;
2597 if(P_marker_offd[i1] < jj_begin_row_offd)
2598 {
2599 P_marker_offd[i1] = jj_counter_offd;
2600 P_offd_j[jj_counter_offd] = i1;
2601 P_offd_data[jj_counter_offd] = zero;
2602 jj_counter_offd++;
2603 }
2604 }
2605 }
2606 }
2607
2608 for(jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2609 { /* Search through F points */
2610 i1 = S_diag_j[jj];
2611 if(CF_marker[i1] == -1)
2612 {
2613 P_marker[i1] = strong_f_marker;
2614 common_c = 0;
2615 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
2616 {
2617 k1 = S_diag_j[kk];
2618 if (CF_marker[k1] == 2)
2619 {
2620 common_c = 1;
2621 break;
2622 }
2623 }
2624 if(num_procs > 1 && common_c == 0)
2625 { /* no common c point yet, check offd */
2626 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
2627 {
2628 if(col_offd_S_to_A)
2629 k1 = col_offd_S_to_A[S_offd_j[kk]];
2630 else
2631 k1 = S_offd_j[kk];
2632
2633 if (CF_marker_offd[k1] == 2)
2634 { /* k1 is a c point check if it is common */
2635 common_c = 1;
2636 break;
2637 }
2638 }
2639 }
2640 if(!common_c)
2641 { /* No common c point, extend the interp set */
2642 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
2643 {
2644 k1 = S_diag_j[kk];
2645 if (CF_marker[k1] >= 0)
2646 {
2647 if(P_marker[k1] < jj_begin_row)
2648 {
2649 P_marker[k1] = jj_counter;
2650 P_diag_j[jj_counter] = fine_to_coarse[k1];
2651 P_diag_data[jj_counter] = zero;
2652 jj_counter++;
2653 }
2654 }
2655 }
2656 if(num_procs > 1)
2657 {
2658 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
2659 {
2660 if(col_offd_S_to_A)
2661 k1 = col_offd_S_to_A[S_offd_j[kk]];
2662 else
2663 k1 = S_offd_j[kk];
2664 if(CF_marker_offd[k1] >= 0)
2665 {
2666 if(P_marker_offd[k1] < jj_begin_row_offd)
2667 {
2668 P_marker_offd[k1] = jj_counter_offd;
2669 P_offd_j[jj_counter_offd] = k1;
2670 P_offd_data[jj_counter_offd] = zero;
2671 jj_counter_offd++;
2672 }
2673 }
2674 }
2675 }
2676 }
2677 }
2678 }
2679 if ( num_procs > 1)
2680 {
2681 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2682 {
2683 i1 = S_offd_j[jj];
2684 if(col_offd_S_to_A)
2685 i1 = col_offd_S_to_A[i1];
2686 if(CF_marker_offd[i1] == -1)
2687 { /* F points that are off proc */
2688 P_marker_offd[i1] = strong_f_marker;
2689 common_c = 0;
2690 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
2691 { /* Check if common c */
2692 loc_col = Sop_j[kk];
2693 if(loc_col > -1)
2694 { /* In S_diag */
2695 if(CF_marker[loc_col] == 2)
2696 {
2697 common_c = 1;
2698 break;
2699 }
2700 }
2701 else
2702 {
2703 loc_col = -loc_col - 1;
2704 if(CF_marker_offd[loc_col] == 2)
2705 {
2706 common_c = 1;
2707 break;
2708 }
2709 }
2710 }
2711 if(!common_c)
2712 {
2713 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
2714 {
2715 loc_col = Sop_j[kk];
2716 /* Find local col number */
2717 if(loc_col > -1)
2718 {
2719 if(CF_marker[loc_col] > 0)
2720 {
2721 if(P_marker[loc_col] < jj_begin_row)
2722 {
2723 P_marker[loc_col] = jj_counter;
2724 P_diag_j[jj_counter] = fine_to_coarse[loc_col];
2725 P_diag_data[jj_counter] = zero;
2726 jj_counter++;
2727 }
2728 }
2729 }
2730 else
2731 {
2732 loc_col = -loc_col - 1;
2733 if(CF_marker_offd[loc_col] > 0)
2734 {
2735 if(P_marker_offd[loc_col] < jj_begin_row_offd)
2736 {
2737 P_marker_offd[loc_col] = jj_counter_offd;
2738 P_offd_j[jj_counter_offd]=loc_col;
2739 P_offd_data[jj_counter_offd] = zero;
2740 jj_counter_offd++;
2741 }
2742 }
2743 }
2744 }
2745 }
2746 }
2747 }
2748 }
2749 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2750 { /* Search C points only */
2751 i1 = S_diag_j[jj];
2752
2753 /*--------------------------------------------------------------
2754 * If neighbor i1 is a C-point, set column number in P_diag_j
2755 * and initialize interpolation weight to zero.
2756 *--------------------------------------------------------------*/
2757
2758 if (CF_marker[i1] == 2)
2759 {
2760 CF_marker[i1] = 1;
2761 }
2762 }
2763 if ( num_procs > 1)
2764 {
2765 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2766 {
2767 if(col_offd_S_to_A)
2768 i1 = col_offd_S_to_A[S_offd_j[jj]];
2769 else
2770 i1 = S_offd_j[jj];
2771 if ( CF_marker_offd[i1] == 2)
2772 {
2773 CF_marker_offd[i1] = 1;
2774 }
2775 }
2776 }
2777
2778
2779 jj_end_row = jj_counter;
2780 jj_end_row_offd = jj_counter_offd;
2781
2782 diagonal = A_diag_data[A_diag_i[i]];
2783 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2784 { /* i1 is a c-point and strongly influences i, accumulate
2785 * a_(i,i1) into interpolation weight */
2786 i1 = A_diag_j[jj];
2787 if (P_marker[i1] >= jj_begin_row)
2788 {
2789 P_diag_data[P_marker[i1]] += A_diag_data[jj];
2790 }
2791 else if(P_marker[i1] == strong_f_marker)
2792 {
2793 sum = zero;
2794 sgn = 1;
2795 if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
2796 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
2797 {
2798 i2 = A_diag_j[jj1];
2799 if((P_marker[i2] >= jj_begin_row || i2 == i) && (sgn*A_diag_data[jj1]) < 0)
2800 sum += A_diag_data[jj1];
2801 }
2802 if(num_procs > 1)
2803 {
2804 for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++)
2805 {
2806 i2 = A_offd_j[jj1];
2807 if(P_marker_offd[i2] >= jj_begin_row_offd &&
2808 (sgn*A_offd_data[jj1]) < 0)
2809 sum += A_offd_data[jj1];
2810 }
2811 }
2812 if(sum != 0)
2813 {
2814 distribute = A_diag_data[jj]/sum;
2815 /* Loop over row of A for point i1 and do the distribution */
2816 for(jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
2817 {
2818 i2 = A_diag_j[jj1];
2819 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
2820 P_diag_data[P_marker[i2]] +=
2821 distribute*A_diag_data[jj1];
2822 if(i2 == i && (sgn*A_diag_data[jj1]) < 0)
2823 diagonal += distribute*A_diag_data[jj1];
2824 }
2825 if(num_procs > 1)
2826 {
2827 for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
2828 {
2829 i2 = A_offd_j[jj1];
2830 if(P_marker_offd[i2] >= jj_begin_row_offd &&
2831 (sgn*A_offd_data[jj1]) < 0)
2832 P_offd_data[P_marker_offd[i2]] +=
2833 distribute*A_offd_data[jj1];
2834 }
2835 }
2836 }
2837 else
2838 diagonal += A_diag_data[jj];
2839 }
2840 /* neighbor i1 weakly influences i, accumulate a_(i,i1) into
2841 * diagonal */
2842 else if (CF_marker[i1] != -3)
2843 {
2844 if(num_functions == 1 || dof_func[i] == dof_func[i1])
2845 diagonal += A_diag_data[jj];
2846 }
2847 }
2848 if(num_procs > 1)
2849 {
2850 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2851 {
2852 i1 = A_offd_j[jj];
2853 if(P_marker_offd[i1] >= jj_begin_row_offd)
2854 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
2855 else if(P_marker_offd[i1] == strong_f_marker)
2856 {
2857 sum = zero;
2858 sgn = 1;
2859 if(A_ext_data[A_ext_i[i1]] < 0) sgn = -1;
2860 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
2861 {
2862 loc_col = A_ext_j[jj1];
2863 if(loc_col > -1)
2864 { /* diag */
2865 if((P_marker[loc_col] >= jj_begin_row || loc_col == i)
2866 && (sgn*A_ext_data[jj1]) < 0)
2867 sum += A_ext_data[jj1];
2868 }
2869 else
2870 {
2871 loc_col = -loc_col - 1;
2872 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
2873 (sgn*A_ext_data[jj1]) < 0)
2874 sum += A_ext_data[jj1];
2875 }
2876 }
2877 if(sum != 0)
2878 {
2879 distribute = A_offd_data[jj] / sum;
2880 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
2881 {
2882 loc_col = A_ext_j[jj1];
2883 if(loc_col > -1)
2884 { /* diag */
2885 if(P_marker[loc_col] >= jj_begin_row &&
2886 (sgn*A_ext_data[jj1]) < 0)
2887 P_diag_data[P_marker[loc_col]] += distribute*
2888 A_ext_data[jj1];
2889 if(loc_col == i && (sgn*A_ext_data[jj1]) < 0)
2890 diagonal += distribute*A_ext_data[jj1];
2891 }
2892 else
2893 {
2894 loc_col = -loc_col - 1;
2895 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
2896 (sgn*A_ext_data[jj1]) < 0)
2897 P_offd_data[P_marker_offd[loc_col]] += distribute*
2898 A_ext_data[jj1];
2899 }
2900 }
2901 }
2902 else
2903 diagonal += A_offd_data[jj];
2904 }
2905 else if (CF_marker_offd[i1] != -3)
2906 {
2907 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
2908 diagonal += A_offd_data[jj];
2909 }
2910 }
2911 }
2912 for(jj = jj_begin_row; jj < jj_end_row; jj++)
2913 P_diag_data[jj] /= -diagonal;
2914 for(jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
2915 P_offd_data[jj] /= -diagonal;
2916 }
2917 strong_f_marker--;
2918 }
2919
2920 P = hypre_ParCSRMatrixCreate(comm,
2921 hypre_ParCSRMatrixGlobalNumRows(A),
2922 total_global_cpts,
2923 hypre_ParCSRMatrixColStarts(A),
2924 num_cpts_global,
2925 0,
2926 P_diag_i[n_fine],
2927 P_offd_i[n_fine]);
2928
2929 P_diag = hypre_ParCSRMatrixDiag(P);
2930 hypre_CSRMatrixData(P_diag) = P_diag_data;
2931 hypre_CSRMatrixI(P_diag) = P_diag_i;
2932 hypre_CSRMatrixJ(P_diag) = P_diag_j;
2933 P_offd = hypre_ParCSRMatrixOffd(P);
2934 hypre_CSRMatrixData(P_offd) = P_offd_data;
2935 hypre_CSRMatrixI(P_offd) = P_offd_i;
2936 hypre_CSRMatrixJ(P_offd) = P_offd_j;
2937 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
2938
2939 /* Compress P, removing coefficients smaller than trunc_factor * Max */
2940 if (trunc_factor != 0.0 || max_elmts > 0)
2941 {
2942 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
2943 P_diag_data = hypre_CSRMatrixData(P_diag);
2944 P_diag_i = hypre_CSRMatrixI(P_diag);
2945 P_diag_j = hypre_CSRMatrixJ(P_diag);
2946 P_offd_data = hypre_CSRMatrixData(P_offd);
2947 P_offd_i = hypre_CSRMatrixI(P_offd);
2948 P_offd_j = hypre_CSRMatrixJ(P_offd);
2949 P_diag_size = P_diag_i[n_fine];
2950 P_offd_size = P_offd_i[n_fine];
2951 }
2952
2953 /* This builds col_map, col_map should be monotone increasing and contain
2954 * global numbers. */
2955 num_cols_P_offd = 0;
2956 if(P_offd_size)
2957 {
2958 num_cols_P_offd = 0;
2959 for (i=0; i < P_offd_size; i++)
2960 {
2961 index = P_offd_j[i];
2962 if(tmp_CF_marker_offd[index] == 1)
2963 {
2964 num_cols_P_offd++;
2965 tmp_CF_marker_offd[index] = 2;
2966 }
2967 }
2968
2969 if (num_cols_P_offd)
2970 {
2971 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
2972 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
2973 }
2974
2975 index = 0;
2976 for(i = 0; i < num_cols_P_offd; i++)
2977 {
2978 while( tmp_CF_marker_offd[index] != 2) index++;
2979 col_map_offd_P[i] = fine_to_coarse_offd[index];
2980 tmp_map_offd[i] = index++;
2981 }
2982
2983 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
2984
2985 for (i = 0; i < num_cols_P_offd; i++)
2986 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
2987
2988 for(i = 0; i < P_offd_size; i++)
2989 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
2990 }
2991
2992 if (num_cols_P_offd)
2993 {
2994 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
2995 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
2996 hypre_TFree(tmp_map_offd);
2997 }
2998
2999#ifdef HYPRE_NO_GLOBAL_PARTITION
3000 hypre_NewCommPkgCreate(P);
3001#else
3002 hypre_MatvecCommPkgCreate(P);
3003#endif
3004
3005 for (i=0; i < n_fine; i++)
3006 if (CF_marker[i] == -3) CF_marker[i] = -1;
3007
3008 *P_ptr = P;
3009
3010 /* Deallocate memory */
3011 hypre_TFree(fine_to_coarse);
3012 hypre_TFree(P_marker);
3013
3014 if (num_procs > 1)
3015 {
3016 /*hypre_TFree(clist_offd);*/
3017 hypre_BigCSRMatrixDestroy(Sop);
3018 hypre_BigCSRMatrixDestroy(A_ext);
3019 hypre_TFree(fine_to_coarse_offd);
3020 hypre_TFree(P_marker_offd);
3021 hypre_TFree(CF_marker_offd);
3022 hypre_TFree(tmp_CF_marker_offd);
3023 if(num_functions > 1)
3024 hypre_TFree(dof_func_offd);
3025 hypre_TFree(found);
3026
3027 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
3028 }
3029
3030 return hypre_error_flag;
3031}
3032
3033/*---------------------------------------------------------------------------
3034 * hypre_BoomerAMGBuildFFInterp
3035 * Comment: Only use FF when there is no common c point.
3036 *--------------------------------------------------------------------------*/
3037int
3038hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, int *CF_marker,
3039 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
3040 int num_functions, int *dof_func, int debug_flag,
3041 double trunc_factor, int max_elmts,
3042 int *col_offd_S_to_A,
3043 hypre_ParCSRMatrix **P_ptr)
3044{
3045 /* Communication Variables */
3046 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
3047 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3048
3049 int my_id, num_procs;
3050
3051 /* Variables to store input variables */
3052 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
3053 double *A_diag_data = hypre_CSRMatrixData(A_diag);
3054 int *A_diag_i = hypre_CSRMatrixI(A_diag);
3055 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
3056
3057 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
3058 double *A_offd_data = hypre_CSRMatrixData(A_offd);
3059 int *A_offd_i = hypre_CSRMatrixI(A_offd);
3060 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
3061
3062 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
3063 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
3064 int n_fine = hypre_CSRMatrixNumRows(A_diag);
3065 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
3066 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
3067 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) local_numrows;
3068 HYPRE_BigInt total_global_cpts, my_first_cpt;
3069
3070 /* Variables to store strong connection matrix info */
3071 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
3072 int *S_diag_i = hypre_CSRMatrixI(S_diag);
3073 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
3074
3075 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
3076 int *S_offd_i = hypre_CSRMatrixI(S_offd);
3077 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
3078
3079 /* Interpolation matrix P */
3080 hypre_ParCSRMatrix *P;
3081 hypre_CSRMatrix *P_diag;
3082 hypre_CSRMatrix *P_offd;
3083
3084 double *P_diag_data = NULL;
3085 int *P_diag_i, *P_diag_j = NULL;
3086 double *P_offd_data = NULL;
3087 int *P_offd_i, *P_offd_j = NULL;
3088
3089 HYPRE_BigInt *col_map_offd_P;
3090 int *tmp_map_offd;
3091 int P_diag_size;
3092 int P_offd_size;
3093 int *P_marker = NULL;
3094 int *P_marker_offd = NULL;
3095 int *CF_marker_offd = NULL;
3096 int *tmp_CF_marker_offd = NULL;
3097 int *dof_func_offd = NULL;
3098 int ccounter_offd;
3099 int common_c;
3100
3101 /* Full row information for columns of A that are off diag*/
3102 hypre_BigCSRMatrix *A_ext;
3103 double *A_ext_data;
3104 int *A_ext_i;
3105 HYPRE_BigInt *big_A_ext_j;
3106 int *A_ext_j;
3107
3108 int *fine_to_coarse = NULL;
3109 HYPRE_BigInt *fine_to_coarse_offd = NULL;
3110 HYPRE_BigInt *found;
3111
3112 int num_cols_P_offd;
3113 int newoff, loc_col;
3114 int A_ext_rows, full_off_procNodes;
3115
3116 hypre_BigCSRMatrix *Sop;
3117 int *Sop_i;
3118 HYPRE_BigInt *big_Sop_j;
3119 int *Sop_j;
3120
3121 int Soprows;
3122
3123 /* Variables to keep count of interpolatory points */
3124 int jj_counter, jj_counter_offd;
3125 int jj_begin_row, jj_end_row;
3126 int jj_begin_row_offd = 0;
3127 int jj_end_row_offd = 0;
3128 int coarse_counter, coarse_counter_offd;
3129
3130 /* Interpolation weight variables */
3131 double sum, diagonal, distribute;
3132 int strong_f_marker = -2;
3133 int sgn;
3134
3135 /* Loop variables */
3136 int index;
3137 int start_indexing = 0;
3138 int i, i1, i2, jj, kk, k1, jj1;
3139 int ccounter;
3140
3141 /* Definitions */
3142 double zero = 0.0;
3143 double one = 1.0;
3144
3145 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
3146
3147 /* BEGIN */
3148 MPI_Comm_size(comm, &num_procs);
3149 MPI_Comm_rank(comm,&my_id);
3150
3151#ifdef HYPRE_NO_GLOBAL_PARTITION
3152 my_first_cpt = num_cpts_global[0];
3153 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
3154 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
3155#else
3156 my_first_cpt = num_cpts_global[my_id];
3157 total_global_cpts = num_cpts_global[num_procs];
3158#endif
3159
3160 if (!comm_pkg)
3161 {
3162#ifdef HYPRE_NO_GLOBAL_PARTITION
3163 hypre_NewCommPkgCreate(A);
3164#else
3165 hypre_MatvecCommPkgCreate(A);
3166#endif
3167 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
3168 }
3169
3170 /* Set up off processor information (specifically for neighbors of
3171 * neighbors */
3172 newoff = 0;
3173 full_off_procNodes = 0;
3174 if (num_procs > 1)
3175 {
3176 /*----------------------------------------------------------------------
3177 * Get the off processors rows for A and S, associated with columns in
3178 * A_offd and S_offd.
3179 *---------------------------------------------------------------------*/
3180 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
3181 A_ext_i = hypre_BigCSRMatrixI(A_ext);
3182 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
3183 A_ext_data = hypre_BigCSRMatrixData(A_ext);
3184 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
3185
3186 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
3187 Sop_i = hypre_BigCSRMatrixI(Sop);
3188 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
3189 Soprows = hypre_BigCSRMatrixNumRows(Sop);
3190
3191 /* Find nodes that are neighbors of neighbors, not found in offd */
3192 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
3193 &A_ext_j,
3194 Soprows, col_map_offd, col_1, col_n,
3195 Sop_i, big_Sop_j, &Sop_j, CF_marker, comm_pkg);
3196
3197 hypre_BigCSRMatrixJ(A_ext) = NULL;
3198 hypre_BigCSRMatrixJ(Sop) = NULL;
3199
3200 if(newoff >= 0)
3201 full_off_procNodes = newoff + num_cols_A_offd;
3202 else
3203 return(1);
3204
3205 /* Possibly add new points and new processors to the comm_pkg, all
3206 * processors need new_comm_pkg */
3207
3208 /* AHB - create a new comm package just for extended info -
3209 this will work better with the assumed partition*/
3210 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
3211 &extend_comm_pkg);
3212
3213 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
3214
3215 if (num_functions > 1 && full_off_procNodes > 0)
3216 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
3217
3218 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
3219 full_off_procNodes, CF_marker_offd);
3220
3221 if(num_functions > 1)
3222 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
3223 full_off_procNodes, dof_func_offd);
3224
3225
3226 }
3227
3228 /*-----------------------------------------------------------------------
3229 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
3230 *-----------------------------------------------------------------------*/
3231
3232 /*-----------------------------------------------------------------------
3233 * Intialize counters and allocate mapping vector.
3234 *-----------------------------------------------------------------------*/
3235 P_diag_i = hypre_CTAlloc(int, n_fine+1);
3236 P_offd_i = hypre_CTAlloc(int, n_fine+1);
3237
3238 if (n_fine)
3239 {
3240 fine_to_coarse = hypre_CTAlloc(int, n_fine);
3241 P_marker = hypre_CTAlloc(int, n_fine);
3242 }
3243
3244 if (full_off_procNodes)
3245 {
3246 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
3247 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
3248 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
3249 }
3250
3251 for (i=0; i < full_off_procNodes; i++)
3252 {
3253 P_marker_offd[i] = -1;
3254 tmp_CF_marker_offd[i] = -1;
3255 fine_to_coarse_offd[i] = -1;
3256 }
3257
3258 for (i=0; i < n_fine; i++)
3259 {
3260 P_marker[i] = -1;
3261 fine_to_coarse[i] = -1;
3262 }
3263
3264 jj_counter = start_indexing;
3265 jj_counter_offd = start_indexing;
3266 coarse_counter = 0;
3267 coarse_counter_offd = 0;
3268
3269 /*-----------------------------------------------------------------------
3270 * Loop over fine grid.
3271 *-----------------------------------------------------------------------*/
3272 for (i = 0; i < n_fine; i++)
3273 {
3274 P_diag_i[i] = jj_counter;
3275 if (num_procs > 1)
3276 P_offd_i[i] = jj_counter_offd;
3277
3278 if (CF_marker[i] > 0)
3279 {
3280 jj_counter++;
3281 fine_to_coarse[i] = coarse_counter;
3282 coarse_counter++;
3283 }
3284
3285 /*--------------------------------------------------------------------
3286 * If i is an F-point, interpolation is from the C-points that
3287 * strongly influence i, or C-points that stronly influence F-points
3288 * that strongly influence i.
3289 *--------------------------------------------------------------------*/
3290 else
3291 {
3292 /* Initialize ccounter for each f point */
3293 ccounter = 0;
3294 ccounter_offd = 0;
3295 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
3296 { /* search through diag to find all c neighbors */
3297 i1 = S_diag_j[jj];
3298 if (CF_marker[i1] > 0)
3299 { /* i1 is a C point */
3300 CF_marker[i1] = 2;
3301 if (P_marker[i1] < P_diag_i[i])
3302 {
3303 P_marker[i1] = jj_counter;
3304 jj_counter++;
3305 }
3306 }
3307 }
3308 if(num_procs > 1)
3309 {
3310 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3311 { /* search through offd to find all c neighbors */
3312 if(col_offd_S_to_A)
3313 i1 = col_offd_S_to_A[S_offd_j[jj]];
3314 else
3315 i1 = S_offd_j[jj];
3316 if(CF_marker_offd[i1] > 0)
3317 { /* i1 is a C point direct neighbor */
3318 CF_marker_offd[i1] = 2;
3319 if(P_marker_offd[i1] < P_offd_i[i])
3320 {
3321 tmp_CF_marker_offd[i1] = 1;
3322 P_marker_offd[i1] = jj_counter_offd;
3323 jj_counter_offd++;
3324 }
3325 }
3326 }
3327 }
3328 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
3329 { /* Search diag to find f neighbors and determine if common c point */
3330 i1 = S_diag_j[jj];
3331 if (CF_marker[i1] < 0)
3332 { /* i1 is a F point, loop through it's strong neighbors */
3333 common_c = 0;
3334 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
3335 {
3336 k1 = S_diag_j[kk];
3337 if (CF_marker[k1] == 2)
3338 {
3339 common_c = 1;
3340 break;
3341 }
3342 }
3343 if(num_procs > 1 && common_c == 0)
3344 { /* no common c point yet, check offd */
3345 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
3346 {
3347 if(col_offd_S_to_A)
3348 k1 = col_offd_S_to_A[S_offd_j[kk]];
3349 else
3350 k1 = S_offd_j[kk];
3351
3352 if (CF_marker_offd[k1] == 2)
3353 {
3354 common_c = 1;
3355 break;
3356 }
3357 }
3358 }
3359 if(!common_c)
3360 { /* No common c point, extend the interp set */
3361 for(kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
3362 {
3363 k1 = S_diag_j[kk];
3364 if(CF_marker[k1] > 0)
3365 {
3366 if(P_marker[k1] < P_diag_i[i])
3367 {
3368 P_marker[k1] = jj_counter;
3369 jj_counter++;
3370 }
3371 }
3372 }
3373 if(num_procs > 1)
3374 {
3375 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
3376 {
3377 if(col_offd_S_to_A)
3378 k1 = col_offd_S_to_A[S_offd_j[kk]];
3379 else
3380 k1 = S_offd_j[kk];
3381 if (CF_marker_offd[k1] > 0)
3382 {
3383 if(P_marker_offd[k1] < P_offd_i[i])
3384 {
3385 tmp_CF_marker_offd[k1] = 1;
3386 P_marker_offd[k1] = jj_counter_offd;
3387 jj_counter_offd++;
3388 }
3389 }
3390 }
3391 }
3392 }
3393 }
3394 }
3395 /* Look at off diag strong connections of i */
3396 if (num_procs > 1)
3397 {
3398 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3399 {
3400 i1 = S_offd_j[jj];
3401 if(col_offd_S_to_A)
3402 i1 = col_offd_S_to_A[i1];
3403 if (CF_marker_offd[i1] < 0)
3404 { /* F point; look at neighbors of i1. Sop contains global col
3405 * numbers and entries that could be in S_diag or S_offd or
3406 * neither. */
3407 common_c = 0;
3408 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
3409 { /* Check if common c */
3410 loc_col = Sop_j[kk];
3411 if(loc_col > -1)
3412 { /* In S_diag */
3413 if(CF_marker[loc_col] == 2)
3414 {
3415 common_c = 1;
3416 break;
3417 }
3418 }
3419 else
3420 {
3421 loc_col = -loc_col - 1;
3422 if(CF_marker_offd[loc_col] == 2)
3423 {
3424 common_c = 1;
3425 break;
3426 }
3427 }
3428 }
3429 if(!common_c)
3430 {
3431 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
3432 { /* Check if common c */
3433 loc_col = Sop_j[kk];
3434 if(loc_col > -1)
3435 { /* In S_diag */
3436 if(CF_marker[loc_col] > 0)
3437 {
3438 if(P_marker[loc_col] < P_diag_i[i])
3439 {
3440 P_marker[loc_col] = jj_counter;
3441 jj_counter++;
3442 }
3443 }
3444 }
3445 else
3446 {
3447 loc_col = -loc_col - 1;
3448 if(CF_marker_offd[loc_col] > 0)
3449 {
3450 if(P_marker_offd[loc_col] < P_offd_i[i])
3451 {
3452 P_marker_offd[loc_col] = jj_counter_offd;
3453 tmp_CF_marker_offd[loc_col] = 1;
3454 jj_counter_offd++;
3455 }
3456 }
3457 }
3458 }
3459 }
3460 }
3461 }
3462 }
3463 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
3464 { /* search through diag to find all c neighbors */
3465 i1 = S_diag_j[jj];
3466 if (CF_marker[i1] == 2)
3467 CF_marker[i1] = 1;
3468 }
3469 if(num_procs > 1)
3470 {
3471 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3472 { /* search through offd to find all c neighbors */
3473 if(col_offd_S_to_A)
3474 i1 = col_offd_S_to_A[S_offd_j[jj]];
3475 else
3476 i1 = S_offd_j[jj];
3477 if(CF_marker_offd[i1] == 2)
3478 { /* i1 is a C point direct neighbor */
3479 CF_marker_offd[i1] = 1;
3480 }
3481 }
3482 }
3483 }
3484 }
3485
3486 /*-----------------------------------------------------------------------
3487 * Allocate arrays.
3488 *-----------------------------------------------------------------------*/
3489
3490 P_diag_size = jj_counter;
3491 P_offd_size = jj_counter_offd;
3492
3493 if (P_diag_size)
3494 {
3495 P_diag_j = hypre_CTAlloc(int, P_diag_size);
3496 P_diag_data = hypre_CTAlloc(double, P_diag_size);
3497 }
3498
3499 if (P_offd_size)
3500 {
3501 P_offd_j = hypre_CTAlloc(int, P_offd_size);
3502 P_offd_data = hypre_CTAlloc(double, P_offd_size);
3503 }
3504
3505 P_diag_i[n_fine] = jj_counter;
3506 P_offd_i[n_fine] = jj_counter_offd;
3507
3508 jj_counter = start_indexing;
3509 jj_counter_offd = start_indexing;
3510 ccounter = start_indexing;
3511 ccounter_offd = start_indexing;
3512
3513 /* Fine to coarse mapping */
3514 if(num_procs > 1)
3515 {
3516 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
3517 full_off_procNodes, my_first_cpt,
3518 fine_to_coarse_offd);
3519 }
3520
3521 for (i = 0; i < n_fine; i++)
3522 P_marker[i] = -1;
3523
3524 for (i = 0; i < full_off_procNodes; i++)
3525 P_marker_offd[i] = -1;
3526
3527 /*-----------------------------------------------------------------------
3528 * Loop over fine grid points.
3529 *-----------------------------------------------------------------------*/
3530 jj_begin_row_offd = 0;
3531 for (i = 0; i < n_fine; i++)
3532 {
3533 jj_begin_row = jj_counter;
3534 if(num_procs > 1)
3535 jj_begin_row_offd = jj_counter_offd;
3536
3537 /*--------------------------------------------------------------------
3538 * If i is a c-point, interpolation is the identity.
3539 *--------------------------------------------------------------------*/
3540
3541 if (CF_marker[i] > 0)
3542 {
3543 P_diag_j[jj_counter] = fine_to_coarse[i];
3544 P_diag_data[jj_counter] = one;
3545 jj_counter++;
3546 }
3547
3548 /*--------------------------------------------------------------------
3549 * If i is an F-point, build interpolation.
3550 *--------------------------------------------------------------------*/
3551
3552 else if (CF_marker[i] != -3)
3553 {
3554 ccounter = 0;
3555 ccounter_offd = 0;
3556 strong_f_marker--;
3557
3558 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
3559 { /* Search C points only */
3560 i1 = S_diag_j[jj];
3561
3562 /*--------------------------------------------------------------
3563 * If neighbor i1 is a C-point, set column number in P_diag_j
3564 * and initialize interpolation weight to zero.
3565 *--------------------------------------------------------------*/
3566
3567 if (CF_marker[i1] > 0)
3568 {
3569 CF_marker[i1] = 2;
3570 if (P_marker[i1] < jj_begin_row)
3571 {
3572 P_marker[i1] = jj_counter;
3573 P_diag_j[jj_counter] = fine_to_coarse[i1];
3574 P_diag_data[jj_counter] = zero;
3575 jj_counter++;
3576 }
3577 }
3578 }
3579 if ( num_procs > 1)
3580 {
3581 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3582 {
3583 if(col_offd_S_to_A)
3584 i1 = col_offd_S_to_A[S_offd_j[jj]];
3585 else
3586 i1 = S_offd_j[jj];
3587 if ( CF_marker_offd[i1] > 0)
3588 {
3589 CF_marker_offd[i1] = 2;
3590 if(P_marker_offd[i1] < jj_begin_row_offd)
3591 {
3592 P_marker_offd[i1] = jj_counter_offd;
3593 P_offd_j[jj_counter_offd] = i1;
3594 P_offd_data[jj_counter_offd] = zero;
3595 jj_counter_offd++;
3596 }
3597 }
3598 }
3599 }
3600
3601 for(jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
3602 { /* Search through F points */
3603 i1 = S_diag_j[jj];
3604 if(CF_marker[i1] == -1)
3605 {
3606 P_marker[i1] = strong_f_marker;
3607 common_c = 0;
3608 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
3609 {
3610 k1 = S_diag_j[kk];
3611 if (CF_marker[k1] == 2)
3612 {
3613 common_c = 1;
3614 break;
3615 }
3616 }
3617 if(num_procs > 1 && common_c == 0)
3618 { /* no common c point yet, check offd */
3619 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
3620 {
3621 if(col_offd_S_to_A)
3622 k1 = col_offd_S_to_A[S_offd_j[kk]];
3623 else
3624 k1 = S_offd_j[kk];
3625
3626 if (CF_marker_offd[k1] == 2)
3627 {
3628 common_c = 1;
3629 break;
3630 }
3631 }
3632 }
3633 if(!common_c)
3634 { /* No common c point, extend the interp set */
3635 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
3636 {
3637 k1 = S_diag_j[kk];
3638 if (CF_marker[k1] >= 0)
3639 {
3640 if(P_marker[k1] < jj_begin_row)
3641 {
3642 P_marker[k1] = jj_counter;
3643 P_diag_j[jj_counter] = fine_to_coarse[k1];
3644 P_diag_data[jj_counter] = zero;
3645 jj_counter++;
3646 }
3647 }
3648 }
3649 if(num_procs > 1)
3650 {
3651 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
3652 {
3653 if(col_offd_S_to_A)
3654 k1 = col_offd_S_to_A[S_offd_j[kk]];
3655 else
3656 k1 = S_offd_j[kk];
3657 if(CF_marker_offd[k1] >= 0)
3658 {
3659 if(P_marker_offd[k1] < jj_begin_row_offd)
3660 {
3661 P_marker_offd[k1] = jj_counter_offd;
3662 P_offd_j[jj_counter_offd] = k1;
3663 P_offd_data[jj_counter_offd] = zero;
3664 jj_counter_offd++;
3665 }
3666 }
3667 }
3668 }
3669 }
3670 }
3671 }
3672 if ( num_procs > 1)
3673 {
3674 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3675 {
3676 i1 = S_offd_j[jj];
3677 if(col_offd_S_to_A)
3678 i1 = col_offd_S_to_A[i1];
3679 if(CF_marker_offd[i1] == -1)
3680 { /* F points that are off proc */
3681 P_marker_offd[i1] = strong_f_marker;
3682 common_c = 0;
3683 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
3684 { /* Check if common c */
3685 loc_col = Sop_j[kk];
3686 if(loc_col > -1)
3687 { /* In S_diag */
3688 if(CF_marker[loc_col] == 2)
3689 {
3690 common_c = 1;
3691 break;
3692 }
3693 }
3694 else
3695 {
3696 loc_col = -loc_col - 1;
3697 if(CF_marker_offd[loc_col] == 2)
3698 {
3699 common_c = 1;
3700 break;
3701 }
3702 }
3703 }
3704 if(!common_c)
3705 {
3706 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
3707 {
3708 loc_col = Sop_j[kk];
3709 /* Find local col number */
3710 if(loc_col > -1)
3711 {
3712 if(CF_marker[loc_col] > 0)
3713 {
3714 if(P_marker[loc_col] < jj_begin_row)
3715 {
3716 P_marker[loc_col] = jj_counter;
3717 P_diag_j[jj_counter] = fine_to_coarse[loc_col];
3718 P_diag_data[jj_counter] = zero;
3719 jj_counter++;
3720 }
3721 }
3722 }
3723 else
3724 {
3725 loc_col = -loc_col - 1;
3726 if(CF_marker_offd[loc_col] > 0)
3727 {
3728 if(P_marker_offd[loc_col] < jj_begin_row_offd)
3729 {
3730 P_marker_offd[loc_col] = jj_counter_offd;
3731 P_offd_j[jj_counter_offd]=loc_col;
3732 P_offd_data[jj_counter_offd] = zero;
3733 jj_counter_offd++;
3734 }
3735 }
3736 }
3737 }
3738 }
3739 }
3740 }
3741 }
3742 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
3743 { /* Search C points only */
3744 i1 = S_diag_j[jj];
3745
3746 /*--------------------------------------------------------------
3747 * If neighbor i1 is a C-point, set column number in P_diag_j
3748 * and initialize interpolation weight to zero.
3749 *--------------------------------------------------------------*/
3750
3751 if (CF_marker[i1] == 2)
3752 {
3753 CF_marker[i1] = 1;
3754 }
3755 }
3756 if ( num_procs > 1)
3757 {
3758 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
3759 {
3760 if(col_offd_S_to_A)
3761 i1 = col_offd_S_to_A[S_offd_j[jj]];
3762 else
3763 i1 = S_offd_j[jj];
3764 if ( CF_marker_offd[i1] == 2)
3765 {
3766 CF_marker_offd[i1] = 1;
3767 }
3768 }
3769 }
3770
3771
3772 jj_end_row = jj_counter;
3773 jj_end_row_offd = jj_counter_offd;
3774
3775 diagonal = A_diag_data[A_diag_i[i]];
3776 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
3777 { /* i1 is a c-point and strongly influences i, accumulate
3778 * a_(i,i1) into interpolation weight */
3779 i1 = A_diag_j[jj];
3780 if (P_marker[i1] >= jj_begin_row)
3781 {
3782 P_diag_data[P_marker[i1]] += A_diag_data[jj];
3783 }
3784 else if(P_marker[i1] == strong_f_marker)
3785 {
3786 sum = zero;
3787 if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
3788 /* Loop over row of A for point i1 and calculate the sum
3789 * of the connections to c-points that strongly incluence i. */
3790 for(jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
3791 {
3792 i2 = A_diag_j[jj1];
3793 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
3794 sum += A_diag_data[jj1];
3795 }
3796 if(num_procs > 1)
3797 {
3798 for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++)
3799 {
3800 i2 = A_offd_j[jj1];
3801 if(P_marker_offd[i2] >= jj_begin_row_offd &&
3802 (sgn*A_offd_data[jj1]) < 0)
3803 sum += A_offd_data[jj1];
3804 }
3805 }
3806 if(sum != 0)
3807 {
3808 distribute = A_diag_data[jj]/sum;
3809 /* Loop over row of A for point i1 and do the distribution */
3810 for(jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
3811 {
3812 i2 = A_diag_j[jj1];
3813 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
3814 P_diag_data[P_marker[i2]] +=
3815 distribute*A_diag_data[jj1];
3816 }
3817 if(num_procs > 1)
3818 {
3819 for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
3820 {
3821 i2 = A_offd_j[jj1];
3822 if(P_marker_offd[i2] >= jj_begin_row_offd &&
3823 (sgn*A_offd_data[jj1]) < 0)
3824 P_offd_data[P_marker_offd[i2]] +=
3825 distribute*A_offd_data[jj1];
3826 }
3827 }
3828 }
3829 else
3830 diagonal += A_diag_data[jj];
3831 }
3832 /* neighbor i1 weakly influences i, accumulate a_(i,i1) into
3833 * diagonal */
3834 else if (CF_marker[i1] != -3)
3835 {
3836 if(num_functions == 1 || dof_func[i] == dof_func[i1])
3837 diagonal += A_diag_data[jj];
3838 }
3839 }
3840 if(num_procs > 1)
3841 {
3842 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
3843 {
3844 i1 = A_offd_j[jj];
3845 if(P_marker_offd[i1] >= jj_begin_row_offd)
3846 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
3847 else if(P_marker_offd[i1] == strong_f_marker)
3848 {
3849 sum = zero;
3850 sgn = 1;
3851 if(A_ext_data[A_ext_i[i1]] < 0) sgn = -1;
3852 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
3853 {
3854 loc_col = A_ext_j[jj1];
3855 if(loc_col > -1)
3856 { /* diag */
3857 if(P_marker[loc_col] >= jj_begin_row && (sgn*A_ext_data[jj1])<0)
3858 sum += A_ext_data[jj1];
3859 }
3860 else
3861 {
3862 loc_col = -loc_col - 1;
3863 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
3864 (sgn*A_ext_data[jj1]) < 0)
3865 sum += A_ext_data[jj1];
3866 }
3867 }
3868 if(sum != 0)
3869 {
3870 distribute = A_offd_data[jj] / sum;
3871 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
3872 {
3873 loc_col = A_ext_j[jj1];
3874 if(loc_col > -1)
3875 { /* diag */
3876 if(P_marker[loc_col] >= jj_begin_row &&
3877 (sgn*A_ext_data[jj1]) < 0)
3878 P_diag_data[P_marker[loc_col]] += distribute*
3879 A_ext_data[jj1];
3880 }
3881 else
3882 {
3883 loc_col = -loc_col - 1;
3884 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
3885 (sgn*A_ext_data[jj1]) < 0)
3886 P_offd_data[P_marker_offd[loc_col]] += distribute*
3887 A_ext_data[jj1];
3888 }
3889 }
3890 }
3891 else
3892 diagonal += A_offd_data[jj];
3893 }
3894 else if (CF_marker_offd[i1] != -3)
3895 {
3896 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
3897 diagonal += A_offd_data[jj];
3898 }
3899 }
3900 }
3901 for(jj = jj_begin_row; jj < jj_end_row; jj++)
3902 P_diag_data[jj] /= -diagonal;
3903 for(jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
3904 P_offd_data[jj] /= -diagonal;
3905 }
3906 strong_f_marker--;
3907 }
3908
3909 P = hypre_ParCSRMatrixCreate(comm,
3910 hypre_ParCSRMatrixGlobalNumRows(A),
3911 total_global_cpts,
3912 hypre_ParCSRMatrixColStarts(A),
3913 num_cpts_global,
3914 0,
3915 P_diag_i[n_fine],
3916 P_offd_i[n_fine]);
3917
3918 P_diag = hypre_ParCSRMatrixDiag(P);
3919 hypre_CSRMatrixData(P_diag) = P_diag_data;
3920 hypre_CSRMatrixI(P_diag) = P_diag_i;
3921 hypre_CSRMatrixJ(P_diag) = P_diag_j;
3922 P_offd = hypre_ParCSRMatrixOffd(P);
3923 hypre_CSRMatrixData(P_offd) = P_offd_data;
3924 hypre_CSRMatrixI(P_offd) = P_offd_i;
3925 hypre_CSRMatrixJ(P_offd) = P_offd_j;
3926 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
3927
3928 /* Compress P, removing coefficients smaller than trunc_factor * Max */
3929 if (trunc_factor != 0.0 || max_elmts > 0)
3930 {
3931 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
3932 P_diag_data = hypre_CSRMatrixData(P_diag);
3933 P_diag_i = hypre_CSRMatrixI(P_diag);
3934 P_diag_j = hypre_CSRMatrixJ(P_diag);
3935 P_offd_data = hypre_CSRMatrixData(P_offd);
3936 P_offd_i = hypre_CSRMatrixI(P_offd);
3937 P_offd_j = hypre_CSRMatrixJ(P_offd);
3938 P_diag_size = P_diag_i[n_fine];
3939 P_offd_size = P_offd_i[n_fine];
3940 }
3941
3942 /* This builds col_map, col_map should be monotone increasing and contain
3943 * global numbers. */
3944 num_cols_P_offd = 0;
3945 if(P_offd_size)
3946 {
3947 num_cols_P_offd = 0;
3948 for (i=0; i < P_offd_size; i++)
3949 {
3950 index = P_offd_j[i];
3951 if(tmp_CF_marker_offd[index] == 1)
3952 {
3953 num_cols_P_offd++;
3954 tmp_CF_marker_offd[index] = 2;
3955 }
3956 }
3957
3958 if (num_cols_P_offd)
3959 {
3960 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
3961 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
3962 }
3963
3964 index = 0;
3965 for(i = 0; i < num_cols_P_offd; i++)
3966 {
3967 while( tmp_CF_marker_offd[index] == -1) index++;
3968 col_map_offd_P[i] = fine_to_coarse_offd[index];
3969 tmp_map_offd[i] = index++;
3970 }
3971
3972 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
3973
3974 for (i = 0; i < num_cols_P_offd; i++)
3975 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
3976
3977 for(i = 0; i < P_offd_size; i++)
3978 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
3979 }
3980
3981 if (num_cols_P_offd)
3982 {
3983 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
3984 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
3985 hypre_TFree(tmp_map_offd);
3986 }
3987
3988#ifdef HYPRE_NO_GLOBAL_PARTITION
3989 hypre_NewCommPkgCreate(P);
3990#else
3991 hypre_MatvecCommPkgCreate(P);
3992#endif
3993
3994 for (i=0; i < n_fine; i++)
3995 if (CF_marker[i] == -3) CF_marker[i] = -1;
3996
3997 *P_ptr = P;
3998
3999 /* Deallocate memory */
4000 hypre_TFree(fine_to_coarse);
4001 hypre_TFree(P_marker);
4002
4003 if (num_procs > 1)
4004 {
4005 hypre_BigCSRMatrixDestroy(Sop);
4006 hypre_BigCSRMatrixDestroy(A_ext);
4007 hypre_TFree(fine_to_coarse_offd);
4008 hypre_TFree(P_marker_offd);
4009 hypre_TFree(CF_marker_offd);
4010 hypre_TFree(tmp_CF_marker_offd);
4011 if(num_functions > 1)
4012 hypre_TFree(dof_func_offd);
4013 hypre_TFree(found);
4014
4015 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
4016
4017 }
4018
4019 return hypre_error_flag;
4020}
4021/*---------------------------------------------------------------------------
4022 * hypre_BoomerAMGBuildFF1Interp
4023 * Comment: Only use FF when there is no common c point.
4024 *--------------------------------------------------------------------------*/
4025int
4026hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, int *CF_marker,
4027 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
4028 int num_functions, int *dof_func, int debug_flag,
4029 double trunc_factor, int max_elmts,
4030 int *col_offd_S_to_A,
4031 hypre_ParCSRMatrix **P_ptr)
4032{
4033 /* Communication Variables */
4034 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
4035 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
4036
4037 int my_id, num_procs;
4038
4039 /* Variables to store input variables */
4040 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
4041 double *A_diag_data = hypre_CSRMatrixData(A_diag);
4042 int *A_diag_i = hypre_CSRMatrixI(A_diag);
4043 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
4044
4045 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
4046 double *A_offd_data = hypre_CSRMatrixData(A_offd);
4047 int *A_offd_i = hypre_CSRMatrixI(A_offd);
4048 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
4049
4050 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
4051 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
4052 int n_fine = hypre_CSRMatrixNumRows(A_diag);
4053 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
4054 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
4055 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) local_numrows;
4056 HYPRE_BigInt total_global_cpts, my_first_cpt;
4057
4058 /* Variables to store strong connection matrix info */
4059 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
4060 int *S_diag_i = hypre_CSRMatrixI(S_diag);
4061 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
4062
4063 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
4064 int *S_offd_i = hypre_CSRMatrixI(S_offd);
4065 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
4066
4067 /* Interpolation matrix P */
4068 hypre_ParCSRMatrix *P;
4069 hypre_CSRMatrix *P_diag;
4070 hypre_CSRMatrix *P_offd;
4071
4072 double *P_diag_data = NULL;
4073 int *P_diag_i, *P_diag_j = NULL;
4074 double *P_offd_data = NULL;
4075 int *P_offd_i, *P_offd_j = NULL;
4076
4077 HYPRE_BigInt *col_map_offd_P;
4078 int *tmp_map_offd;
4079 int P_diag_size;
4080 int P_offd_size;
4081 int *P_marker = NULL;
4082 int *P_marker_offd = NULL;
4083 int *CF_marker_offd = NULL;
4084 int *tmp_CF_marker_offd = NULL;
4085 int *dof_func_offd = NULL;
4086 int ccounter_offd;
4087 int common_c;
4088
4089 /* Full row information for columns of A that are off diag*/
4090 hypre_BigCSRMatrix *A_ext;
4091 double *A_ext_data;
4092 int *A_ext_i;
4093 HYPRE_BigInt *big_A_ext_j;
4094 int *A_ext_j;
4095
4096 int *fine_to_coarse = NULL;
4097 HYPRE_BigInt *fine_to_coarse_offd = NULL;
4098 HYPRE_BigInt *found;
4099
4100 int num_cols_P_offd;
4101 int newoff, loc_col;
4102 int A_ext_rows, full_off_procNodes;
4103
4104 hypre_BigCSRMatrix *Sop;
4105 int *Sop_i;
4106 HYPRE_BigInt *big_Sop_j;
4107 int *Sop_j;
4108
4109 int Soprows;
4110
4111 /* Variables to keep count of interpolatory points */
4112 int jj_counter, jj_counter_offd;
4113 int jj_begin_row, jj_end_row;
4114 int jj_begin_row_offd = 0;
4115 int jj_end_row_offd = 0;
4116 int coarse_counter, coarse_counter_offd;
4117
4118 /* Interpolation weight variables */
4119 double sum, diagonal, distribute;
4120 int strong_f_marker = -2;
4121 int sgn;
4122
4123 /* Loop variables */
4124 int index;
4125 int start_indexing = 0;
4126 int i, i1, i2, jj, kk, k1, jj1;
4127 int ccounter;
4128 int found_c = 0;
4129
4130 /* Definitions */
4131 double zero = 0.0;
4132 double one = 1.0;
4133
4134 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
4135 /* BEGIN */
4136 MPI_Comm_size(comm, &num_procs);
4137 MPI_Comm_rank(comm,&my_id);
4138
4139#ifdef HYPRE_NO_GLOBAL_PARTITION
4140 my_first_cpt = num_cpts_global[0];
4141 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
4142 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
4143#else
4144 my_first_cpt = num_cpts_global[my_id];
4145 total_global_cpts = num_cpts_global[num_procs];
4146#endif
4147
4148 if (!comm_pkg)
4149 {
4150#ifdef HYPRE_NO_GLOBAL_PARTITION
4151 hypre_NewCommPkgCreate(A);
4152#else
4153 hypre_MatvecCommPkgCreate(A);
4154#endif
4155 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
4156 }
4157
4158 /* Set up off processor information (specifically for neighbors of
4159 * neighbors */
4160 newoff = 0;
4161 full_off_procNodes = 0;
4162 if (num_procs > 1)
4163 {
4164 /*----------------------------------------------------------------------
4165 * Get the off processors rows for A and S, associated with columns in
4166 * A_offd and S_offd.
4167 *---------------------------------------------------------------------*/
4168 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
4169 A_ext_i = hypre_BigCSRMatrixI(A_ext);
4170 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
4171 A_ext_data = hypre_BigCSRMatrixData(A_ext);
4172 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
4173
4174 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
4175 Sop_i = hypre_BigCSRMatrixI(Sop);
4176 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
4177 Soprows = hypre_BigCSRMatrixNumRows(Sop);
4178
4179 /* Find nodes that are neighbors of neighbors, not found in offd */
4180 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
4181 &A_ext_j,
4182 Soprows, col_map_offd, col_1, col_n,
4183 Sop_i, big_Sop_j, &Sop_j, CF_marker, comm_pkg);
4184
4185 hypre_BigCSRMatrixJ(A_ext) = NULL;
4186 hypre_BigCSRMatrixJ(Sop) = NULL;
4187
4188 if(newoff >= 0)
4189 full_off_procNodes = newoff + num_cols_A_offd;
4190 else
4191 return(1);
4192
4193 /* Possibly add new points and new processors to the comm_pkg, all
4194 * processors need new_comm_pkg */
4195
4196 /* AHB - create a new comm package just for extended info -
4197 this will work better with the assumed partition*/
4198 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
4199 &extend_comm_pkg);
4200
4201 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
4202
4203 if (num_functions > 1 && full_off_procNodes > 0)
4204 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
4205
4206 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
4207 full_off_procNodes, CF_marker_offd);
4208
4209 if(num_functions > 1)
4210 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
4211 full_off_procNodes, dof_func_offd);
4212 }
4213
4214 /*-----------------------------------------------------------------------
4215 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
4216 *-----------------------------------------------------------------------*/
4217
4218 /*-----------------------------------------------------------------------
4219 * Intialize counters and allocate mapping vector.
4220 *-----------------------------------------------------------------------*/
4221 P_diag_i = hypre_CTAlloc(int, n_fine+1);
4222 P_offd_i = hypre_CTAlloc(int, n_fine+1);
4223
4224 if (n_fine)
4225 {
4226 fine_to_coarse = hypre_CTAlloc(int, n_fine);
4227 P_marker = hypre_CTAlloc(int, n_fine);
4228 }
4229
4230 if (full_off_procNodes)
4231 {
4232 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
4233 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
4234 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
4235 }
4236
4237 for (i=0; i < full_off_procNodes; i++)
4238 {
4239 P_marker_offd[i] = -1;
4240 tmp_CF_marker_offd[i] = -1;
4241 fine_to_coarse_offd[i] = -1;
4242 }
4243
4244 for (i=0; i < n_fine; i++)
4245 {
4246 P_marker[i] = -1;
4247 fine_to_coarse[i] = -1;
4248 }
4249
4250 jj_counter = start_indexing;
4251 jj_counter_offd = start_indexing;
4252 coarse_counter = 0;
4253 coarse_counter_offd = 0;
4254
4255 /*-----------------------------------------------------------------------
4256 * Loop over fine grid.
4257 *-----------------------------------------------------------------------*/
4258 for (i = 0; i < n_fine; i++)
4259 {
4260 P_diag_i[i] = jj_counter;
4261 if (num_procs > 1)
4262 P_offd_i[i] = jj_counter_offd;
4263
4264 if (CF_marker[i] >= 0)
4265 {
4266 jj_counter++;
4267 fine_to_coarse[i] = coarse_counter;
4268 coarse_counter++;
4269 }
4270
4271 /*--------------------------------------------------------------------
4272 * If i is an F-point, interpolation is from the C-points that
4273 * strongly influence i, or C-points that stronly influence F-points
4274 * that strongly influence i.
4275 *--------------------------------------------------------------------*/
4276 else
4277 {
4278 /* Initialize ccounter for each f point */
4279 ccounter = 0;
4280 ccounter_offd = 0;
4281 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4282 { /* search through diag to find all c neighbors */
4283 i1 = S_diag_j[jj];
4284 if (CF_marker[i1] > 0)
4285 { /* i1 is a C point */
4286 CF_marker[i1] = 2;
4287 if (P_marker[i1] < P_diag_i[i])
4288 {
4289 P_marker[i1] = jj_counter;
4290 jj_counter++;
4291 }
4292 }
4293 }
4294 if(num_procs > 1)
4295 {
4296 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4297 { /* search through offd to find all c neighbors */
4298 if(col_offd_S_to_A)
4299 i1 = col_offd_S_to_A[S_offd_j[jj]];
4300 else
4301 i1 = S_offd_j[jj];
4302 if(CF_marker_offd[i1] > 0)
4303 { /* i1 is a C point direct neighbor */
4304 CF_marker_offd[i1] = 2;
4305 if(P_marker_offd[i1] < P_offd_i[i])
4306 {
4307 tmp_CF_marker_offd[i1] = 1;
4308 P_marker_offd[i1] = jj_counter_offd;
4309 jj_counter_offd++;
4310 }
4311 }
4312 }
4313 }
4314 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4315 { /* Search diag to find f neighbors and determine if common c point */
4316 i1 = S_diag_j[jj];
4317 if (CF_marker[i1] < 0)
4318 { /* i1 is a F point, loop through it's strong neighbors */
4319 common_c = 0;
4320 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
4321 {
4322 k1 = S_diag_j[kk];
4323 if (CF_marker[k1] == 2)
4324 {
4325 common_c = 1;
4326 break;
4327 }
4328 }
4329 if(num_procs > 1 && common_c == 0)
4330 { /* no common c point yet, check offd */
4331 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
4332 {
4333 if(col_offd_S_to_A)
4334 k1 = col_offd_S_to_A[S_offd_j[kk]];
4335 else
4336 k1 = S_offd_j[kk];
4337
4338 if (CF_marker_offd[k1] == 2)
4339 { /* k1 is a c point check if it is common */
4340 common_c = 1;
4341 break;
4342 }
4343 }
4344 }
4345 if(!common_c)
4346 { /* No common c point, extend the interp set */
4347 found_c = 0;
4348 for(kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
4349 {
4350 k1 = S_diag_j[kk];
4351 if(CF_marker[k1] > 0)
4352 {
4353 if(P_marker[k1] < P_diag_i[i])
4354 {
4355 P_marker[k1] = jj_counter;
4356 jj_counter++;
4357 found_c = 1;
4358 break;
4359 }
4360 }
4361 }
4362 if(num_procs > 1 && !found_c)
4363 {
4364 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
4365 {
4366 if(col_offd_S_to_A)
4367 k1 = col_offd_S_to_A[S_offd_j[kk]];
4368 else
4369 k1 = S_offd_j[kk];
4370 if (CF_marker_offd[k1] > 0)
4371 {
4372 if(P_marker_offd[k1] < P_offd_i[i])
4373 {
4374 tmp_CF_marker_offd[k1] = 1;
4375 P_marker_offd[k1] = jj_counter_offd;
4376 jj_counter_offd++;
4377 break;
4378 }
4379 }
4380 }
4381 }
4382 }
4383 }
4384 }
4385 /* Look at off diag strong connections of i */
4386 if (num_procs > 1)
4387 {
4388 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4389 {
4390 i1 = S_offd_j[jj];
4391 if(col_offd_S_to_A)
4392 i1 = col_offd_S_to_A[i1];
4393 if (CF_marker_offd[i1] < 0)
4394 { /* F point; look at neighbors of i1. Sop contains global col
4395 * numbers and entries that could be in S_diag or S_offd or
4396 * neither. */
4397 common_c = 0;
4398 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
4399 { /* Check if common c */
4400 loc_col = Sop_j[kk];
4401 if(loc_col > -1)
4402 { /* In S_diag */
4403 if(CF_marker[loc_col] == 2)
4404 {
4405 common_c = 1;
4406 break;
4407 }
4408 }
4409 else
4410 {
4411 loc_col = -loc_col - 1;
4412 if(CF_marker_offd[loc_col] == 2)
4413 {
4414 common_c = 1;
4415 break;
4416 }
4417 }
4418 }
4419 if(!common_c)
4420 {
4421 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
4422 { /* Check if common c */
4423 loc_col = Sop_j[kk];
4424 if(loc_col > -1)
4425 { /* In S_diag */
4426 if(CF_marker[loc_col] > 0)
4427 {
4428 if(P_marker[loc_col] < P_diag_i[i])
4429 {
4430 P_marker[loc_col] = jj_counter;
4431 jj_counter++;
4432 break;
4433 }
4434 }
4435 }
4436 else
4437 {
4438 loc_col = -loc_col - 1;
4439 if(CF_marker_offd[loc_col] > 0)
4440 {
4441 if(P_marker_offd[loc_col] < P_offd_i[i])
4442 {
4443 P_marker_offd[loc_col] = jj_counter_offd;
4444 tmp_CF_marker_offd[loc_col] = 1;
4445 jj_counter_offd++;
4446 break;
4447 }
4448 }
4449 }
4450 }
4451 }
4452 }
4453 }
4454 }
4455 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4456 { /* search through diag to find all c neighbors */
4457 i1 = S_diag_j[jj];
4458 if (CF_marker[i1] == 2)
4459 CF_marker[i1] = 1;
4460 }
4461 if(num_procs > 1)
4462 {
4463 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4464 { /* search through offd to find all c neighbors */
4465 if(col_offd_S_to_A)
4466 i1 = col_offd_S_to_A[S_offd_j[jj]];
4467 else
4468 i1 = S_offd_j[jj];
4469 if(CF_marker_offd[i1] == 2)
4470 { /* i1 is a C point direct neighbor */
4471 CF_marker_offd[i1] = 1;
4472 }
4473 }
4474 }
4475 }
4476 }
4477
4478 /*-----------------------------------------------------------------------
4479 * Allocate arrays.
4480 *-----------------------------------------------------------------------*/
4481
4482 P_diag_size = jj_counter;
4483 P_offd_size = jj_counter_offd;
4484 if (P_diag_size)
4485 {
4486 P_diag_j = hypre_CTAlloc(int, P_diag_size);
4487 P_diag_data = hypre_CTAlloc(double, P_diag_size);
4488 }
4489
4490 if (P_offd_size)
4491 {
4492 P_offd_j = hypre_CTAlloc(int, P_offd_size);
4493 P_offd_data = hypre_CTAlloc(double, P_offd_size);
4494 }
4495
4496 P_diag_i[n_fine] = jj_counter;
4497 P_offd_i[n_fine] = jj_counter_offd;
4498
4499 jj_counter = start_indexing;
4500 jj_counter_offd = start_indexing;
4501 ccounter = start_indexing;
4502 ccounter_offd = start_indexing;
4503
4504 /* Fine to coarse mapping */
4505 if(num_procs > 1)
4506 {
4507 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
4508 full_off_procNodes, my_first_cpt,
4509 fine_to_coarse_offd);
4510 }
4511
4512 for (i = 0; i < n_fine; i++)
4513 P_marker[i] = -1;
4514
4515 for (i = 0; i < full_off_procNodes; i++)
4516 P_marker_offd[i] = -1;
4517
4518 /*-----------------------------------------------------------------------
4519 * Loop over fine grid points.
4520 *-----------------------------------------------------------------------*/
4521 jj_begin_row_offd = 0;
4522 for (i = 0; i < n_fine; i++)
4523 {
4524 jj_begin_row = jj_counter;
4525 if(num_procs > 1)
4526 jj_begin_row_offd = jj_counter_offd;
4527
4528 /*--------------------------------------------------------------------
4529 * If i is a c-point, interpolation is the identity.
4530 *--------------------------------------------------------------------*/
4531
4532 if (CF_marker[i] > 0)
4533 {
4534 P_diag_j[jj_counter] = fine_to_coarse[i];
4535 P_diag_data[jj_counter] = one;
4536 jj_counter++;
4537 }
4538
4539 /*--------------------------------------------------------------------
4540 * If i is an F-point, build interpolation.
4541 *--------------------------------------------------------------------*/
4542
4543 else if (CF_marker[i] != -3)
4544 {
4545 ccounter = 0;
4546 ccounter_offd = 0;
4547 strong_f_marker--;
4548
4549 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4550 { /* Search C points only */
4551 i1 = S_diag_j[jj];
4552
4553 /*--------------------------------------------------------------
4554 * If neighbor i1 is a C-point, set column number in P_diag_j
4555 * and initialize interpolation weight to zero.
4556 *--------------------------------------------------------------*/
4557
4558 if (CF_marker[i1] > 0)
4559 {
4560 CF_marker[i1] = 2;
4561 if (P_marker[i1] < jj_begin_row)
4562 {
4563 P_marker[i1] = jj_counter;
4564 P_diag_j[jj_counter] = fine_to_coarse[i1];
4565 P_diag_data[jj_counter] = zero;
4566 jj_counter++;
4567 }
4568 }
4569 }
4570 if ( num_procs > 1)
4571 {
4572 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4573 {
4574 if(col_offd_S_to_A)
4575 i1 = col_offd_S_to_A[S_offd_j[jj]];
4576 else
4577 i1 = S_offd_j[jj];
4578 if ( CF_marker_offd[i1] > 0)
4579 {
4580 CF_marker_offd[i1] = 2;
4581 if(P_marker_offd[i1] < jj_begin_row_offd)
4582 {
4583 P_marker_offd[i1] = jj_counter_offd;
4584 P_offd_j[jj_counter_offd] = i1;
4585 P_offd_data[jj_counter_offd] = zero;
4586 jj_counter_offd++;
4587 }
4588 }
4589 }
4590 }
4591
4592 for(jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4593 { /* Search through F points */
4594 i1 = S_diag_j[jj];
4595 if(CF_marker[i1] == -1)
4596 {
4597 P_marker[i1] = strong_f_marker;
4598 common_c = 0;
4599 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
4600 {
4601 k1 = S_diag_j[kk];
4602 if (CF_marker[k1] == 2)
4603 {
4604 common_c = 1;
4605 break;
4606 }
4607 }
4608 if(num_procs > 1 && common_c == 0)
4609 { /* no common c point yet, check offd */
4610 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
4611 {
4612 if(col_offd_S_to_A)
4613 k1 = col_offd_S_to_A[S_offd_j[kk]];
4614 else
4615 k1 = S_offd_j[kk];
4616
4617 if (CF_marker_offd[k1] == 2)
4618 { /* k1 is a c point check if it is common */
4619 common_c = 1;
4620 break;
4621 }
4622 }
4623 }
4624 if(!common_c)
4625 { /* No common c point, extend the interp set */
4626 found_c = 0;
4627 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
4628 {
4629 k1 = S_diag_j[kk];
4630 if (CF_marker[k1] >= 0)
4631 {
4632 if(P_marker[k1] < jj_begin_row)
4633 {
4634 P_marker[k1] = jj_counter;
4635 P_diag_j[jj_counter] = fine_to_coarse[k1];
4636 P_diag_data[jj_counter] = zero;
4637 jj_counter++;
4638 found_c = 1;
4639 break;
4640 }
4641 }
4642 }
4643 if(num_procs > 1 && !found_c)
4644 {
4645 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
4646 {
4647 if(col_offd_S_to_A)
4648 k1 = col_offd_S_to_A[S_offd_j[kk]];
4649 else
4650 k1 = S_offd_j[kk];
4651 if(CF_marker_offd[k1] >= 0)
4652 {
4653 if(P_marker_offd[k1] < jj_begin_row_offd)
4654 {
4655 P_marker_offd[k1] = jj_counter_offd;
4656 P_offd_j[jj_counter_offd] = k1;
4657 P_offd_data[jj_counter_offd] = zero;
4658 jj_counter_offd++;
4659 break;
4660 }
4661 }
4662 }
4663 }
4664 }
4665 }
4666 }
4667 if ( num_procs > 1)
4668 {
4669 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4670 {
4671 i1 = S_offd_j[jj];
4672 if(col_offd_S_to_A)
4673 i1 = col_offd_S_to_A[i1];
4674 if(CF_marker_offd[i1] == -1)
4675 { /* F points that are off proc */
4676 P_marker_offd[i1] = strong_f_marker;
4677 common_c = 0;
4678 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
4679 { /* Check if common c */
4680 loc_col = Sop_j[kk];
4681 if(loc_col > -1)
4682 { /* In S_diag */
4683 if(CF_marker[loc_col] == 2)
4684 {
4685 common_c = 1;
4686 break;
4687 }
4688 }
4689 else
4690 {
4691 loc_col = -loc_col - 1;
4692 if(CF_marker_offd[loc_col] == 2)
4693 {
4694 common_c = 1;
4695 break;
4696 }
4697 }
4698 }
4699 if(!common_c)
4700 {
4701 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
4702 {
4703 loc_col = Sop_j[kk];
4704 /* Find local col number */
4705 if(loc_col > -1)
4706 {
4707 if(CF_marker[loc_col] > 0)
4708 {
4709 if(P_marker[loc_col] < jj_begin_row)
4710 {
4711 P_marker[loc_col] = jj_counter;
4712 P_diag_j[jj_counter] = fine_to_coarse[loc_col];
4713 P_diag_data[jj_counter] = zero;
4714 jj_counter++;
4715 break;
4716 }
4717 }
4718 }
4719 else
4720 {
4721 loc_col = -loc_col - 1;
4722 if(CF_marker_offd[loc_col] > 0)
4723 {
4724 if(P_marker_offd[loc_col] < jj_begin_row_offd)
4725 {
4726 P_marker_offd[loc_col] = jj_counter_offd;
4727 P_offd_j[jj_counter_offd]=loc_col;
4728 P_offd_data[jj_counter_offd] = zero;
4729 jj_counter_offd++;
4730 break;
4731 }
4732 }
4733 }
4734 }
4735 }
4736 }
4737 }
4738 }
4739 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
4740 { /* Search C points only */
4741 i1 = S_diag_j[jj];
4742
4743 /*--------------------------------------------------------------
4744 * If neighbor i1 is a C-point, set column number in P_diag_j
4745 * and initialize interpolation weight to zero.
4746 *--------------------------------------------------------------*/
4747
4748 if (CF_marker[i1] == 2)
4749 {
4750 CF_marker[i1] = 1;
4751 }
4752 }
4753 if ( num_procs > 1)
4754 {
4755 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
4756 {
4757 if(col_offd_S_to_A)
4758 i1 = col_offd_S_to_A[S_offd_j[jj]];
4759 else
4760 i1 = S_offd_j[jj];
4761 if ( CF_marker_offd[i1] == 2)
4762 {
4763 CF_marker_offd[i1] = 1;
4764 }
4765 }
4766 }
4767
4768
4769 jj_end_row = jj_counter;
4770 jj_end_row_offd = jj_counter_offd;
4771
4772 diagonal = A_diag_data[A_diag_i[i]];
4773 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
4774 { /* i1 is a c-point and strongly influences i, accumulate
4775 * a_(i,i1) into interpolation weight */
4776 i1 = A_diag_j[jj];
4777 if (P_marker[i1] >= jj_begin_row)
4778 {
4779 P_diag_data[P_marker[i1]] += A_diag_data[jj];
4780 }
4781 else if(P_marker[i1] == strong_f_marker)
4782 {
4783 sum = zero;
4784 if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
4785 /* Loop over row of A for point i1 and calculate the sum
4786 * of the connections to c-points that strongly incluence i. */
4787 for(jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
4788 {
4789 i2 = A_diag_j[jj1];
4790 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
4791 sum += A_diag_data[jj1];
4792 }
4793 if(num_procs > 1)
4794 {
4795 for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++)
4796 {
4797 i2 = A_offd_j[jj1];
4798 if(P_marker_offd[i2] >= jj_begin_row_offd &&
4799 (sgn*A_offd_data[jj1]) < 0)
4800 sum += A_offd_data[jj1];
4801 }
4802 }
4803 if(sum != 0)
4804 {
4805 distribute = A_diag_data[jj]/sum;
4806 /* Loop over row of A for point i1 and do the distribution */
4807 for(jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
4808 {
4809 i2 = A_diag_j[jj1];
4810 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
4811 P_diag_data[P_marker[i2]] +=
4812 distribute*A_diag_data[jj1];
4813 }
4814 if(num_procs > 1)
4815 {
4816 for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
4817 {
4818 i2 = A_offd_j[jj1];
4819 if(P_marker_offd[i2] >= jj_begin_row_offd &&
4820 (sgn*A_offd_data[jj1]) < 0)
4821 P_offd_data[P_marker_offd[i2]] +=
4822 distribute*A_offd_data[jj1];
4823 }
4824 }
4825 }
4826 else
4827 diagonal += A_diag_data[jj];
4828 }
4829 /* neighbor i1 weakly influences i, accumulate a_(i,i1) into
4830 * diagonal */
4831 else if (CF_marker[i1] != -3)
4832 {
4833 if(num_functions == 1 || dof_func[i] == dof_func[i1])
4834 diagonal += A_diag_data[jj];
4835 }
4836 }
4837 if(num_procs > 1)
4838 {
4839 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
4840 {
4841 i1 = A_offd_j[jj];
4842 if(P_marker_offd[i1] >= jj_begin_row_offd)
4843 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
4844 else if(P_marker_offd[i1] == strong_f_marker)
4845 {
4846 sum = zero;
4847 sgn = 1;
4848 if(A_ext_data[A_ext_i[i1]] < 0) sgn = -1;
4849 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
4850 {
4851 loc_col = A_ext_j[jj1];
4852 if(loc_col > -1)
4853 { /* diag */
4854 if(P_marker[loc_col] >= jj_begin_row && (sgn*A_ext_data[jj1])<0)
4855 sum += A_ext_data[jj1];
4856 }
4857 else
4858 {
4859 loc_col = -loc_col - 1;
4860 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
4861 (sgn*A_ext_data[jj1]) < 0)
4862 sum += A_ext_data[jj1];
4863 }
4864 }
4865 if(sum != 0)
4866 {
4867 distribute = A_offd_data[jj] / sum;
4868 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
4869 {
4870 loc_col = A_ext_j[jj1];
4871 if(loc_col > -1)
4872 { /* diag */
4873 if(P_marker[loc_col] >= jj_begin_row &&
4874 (sgn*A_ext_data[jj1]) < 0)
4875 P_diag_data[P_marker[loc_col]] += distribute*
4876 A_ext_data[jj1];
4877 }
4878 else
4879 {
4880 loc_col = -loc_col - 1;
4881 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
4882 (sgn*A_ext_data[jj1]) < 0)
4883 P_offd_data[P_marker_offd[loc_col]] += distribute*
4884 A_ext_data[jj1];
4885 }
4886 }
4887 }
4888 else
4889 diagonal += A_offd_data[jj];
4890 }
4891 else if (CF_marker_offd[i1] != -3)
4892 {
4893 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
4894 diagonal += A_offd_data[jj];
4895 }
4896 }
4897 }
4898 for(jj = jj_begin_row; jj < jj_end_row; jj++)
4899 P_diag_data[jj] /= -diagonal;
4900 for(jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
4901 P_offd_data[jj] /= -diagonal;
4902 }
4903 strong_f_marker--;
4904 }
4905
4906 P = hypre_ParCSRMatrixCreate(comm,
4907 hypre_ParCSRMatrixGlobalNumRows(A),
4908 total_global_cpts,
4909 hypre_ParCSRMatrixColStarts(A),
4910 num_cpts_global,
4911 0,
4912 P_diag_i[n_fine],
4913 P_offd_i[n_fine]);
4914
4915 P_diag = hypre_ParCSRMatrixDiag(P);
4916 hypre_CSRMatrixData(P_diag) = P_diag_data;
4917 hypre_CSRMatrixI(P_diag) = P_diag_i;
4918 hypre_CSRMatrixJ(P_diag) = P_diag_j;
4919 P_offd = hypre_ParCSRMatrixOffd(P);
4920 hypre_CSRMatrixData(P_offd) = P_offd_data;
4921 hypre_CSRMatrixI(P_offd) = P_offd_i;
4922 hypre_CSRMatrixJ(P_offd) = P_offd_j;
4923 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
4924
4925 /* Compress P, removing coefficients smaller than trunc_factor * Max */
4926 if (trunc_factor != 0.0 || max_elmts > 0)
4927 {
4928 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
4929 P_diag_data = hypre_CSRMatrixData(P_diag);
4930 P_diag_i = hypre_CSRMatrixI(P_diag);
4931 P_diag_j = hypre_CSRMatrixJ(P_diag);
4932 P_offd_data = hypre_CSRMatrixData(P_offd);
4933 P_offd_i = hypre_CSRMatrixI(P_offd);
4934 P_offd_j = hypre_CSRMatrixJ(P_offd);
4935 P_diag_size = P_diag_i[n_fine];
4936 P_offd_size = P_offd_i[n_fine];
4937 }
4938
4939 /* This builds col_map, col_map should be monotone increasing and contain
4940 * global numbers. */
4941 num_cols_P_offd = 0;
4942 if(P_offd_size)
4943 {
4944 num_cols_P_offd = 0;
4945 for (i=0; i < P_offd_size; i++)
4946 {
4947 index = P_offd_j[i];
4948 if(tmp_CF_marker_offd[index] == 1)
4949 {
4950 num_cols_P_offd++;
4951 tmp_CF_marker_offd[index] = 2;
4952 }
4953 }
4954 if (num_cols_P_offd)
4955 {
4956 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
4957 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
4958 }
4959
4960 index = 0;
4961 for(i = 0; i < num_cols_P_offd; i++)
4962 {
4963 while( tmp_CF_marker_offd[index] == -1) index++;
4964 col_map_offd_P[i] = fine_to_coarse_offd[index];
4965 tmp_map_offd[i] = index++;
4966 }
4967
4968 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
4969
4970 for (i = 0; i < num_cols_P_offd; i++)
4971 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
4972
4973 for(i = 0; i < P_offd_size; i++)
4974 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
4975 }
4976
4977 if (num_cols_P_offd)
4978 {
4979 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
4980 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
4981 hypre_TFree(tmp_map_offd);
4982 }
4983
4984#ifdef HYPRE_NO_GLOBAL_PARTITION
4985 hypre_NewCommPkgCreate(P);
4986#else
4987 hypre_MatvecCommPkgCreate(P);
4988#endif
4989
4990 for (i=0; i < n_fine; i++)
4991 if (CF_marker[i] == -3) CF_marker[i] = -1;
4992
4993 *P_ptr = P;
4994
4995 /* Deallocate memory */
4996 hypre_TFree(fine_to_coarse);
4997 hypre_TFree(P_marker);
4998
4999 if (num_procs > 1)
5000 {
5001
5002 hypre_BigCSRMatrixDestroy(Sop);
5003 hypre_BigCSRMatrixDestroy(A_ext);
5004 hypre_TFree(fine_to_coarse_offd);
5005 hypre_TFree(P_marker_offd);
5006 hypre_TFree(CF_marker_offd);
5007 hypre_TFree(tmp_CF_marker_offd);
5008 if(num_functions > 1)
5009 hypre_TFree(dof_func_offd);
5010 hypre_TFree(found);
5011
5012 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
5013
5014 }
5015
5016 return hypre_error_flag;
5017}
5018
5019/*---------------------------------------------------------------------------
5020 * hypre_BoomerAMGBuildExtInterp
5021 * Comment:
5022 *--------------------------------------------------------------------------*/
5023int
5024hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A, int *CF_marker,
5025 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
5026 int num_functions, int *dof_func, int debug_flag,
5027 double trunc_factor, int max_elmts,
5028 int *col_offd_S_to_A,
5029 hypre_ParCSRMatrix **P_ptr)
5030{
5031 /* Communication Variables */
5032 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
5033 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
5034
5035
5036 int my_id, num_procs;
5037
5038 /* Variables to store input variables */
5039 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
5040 double *A_diag_data = hypre_CSRMatrixData(A_diag);
5041 int *A_diag_i = hypre_CSRMatrixI(A_diag);
5042 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
5043
5044 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
5045 double *A_offd_data = hypre_CSRMatrixData(A_offd);
5046 int *A_offd_i = hypre_CSRMatrixI(A_offd);
5047 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
5048
5049 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
5050 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
5051 int n_fine = hypre_CSRMatrixNumRows(A_diag);
5052 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
5053 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
5054 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) local_numrows;
5055 HYPRE_BigInt total_global_cpts, my_first_cpt;
5056
5057 /* Variables to store strong connection matrix info */
5058 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
5059 int *S_diag_i = hypre_CSRMatrixI(S_diag);
5060 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
5061
5062 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
5063 int *S_offd_i = hypre_CSRMatrixI(S_offd);
5064 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
5065
5066 /* Interpolation matrix P */
5067 hypre_ParCSRMatrix *P;
5068 hypre_CSRMatrix *P_diag;
5069 hypre_CSRMatrix *P_offd;
5070
5071 double *P_diag_data = NULL;
5072 int *P_diag_i, *P_diag_j = NULL;
5073 double *P_offd_data = NULL;
5074 int *P_offd_i, *P_offd_j = NULL;
5075
5076 HYPRE_BigInt *col_map_offd_P;
5077 int *tmp_map_offd;
5078 int P_diag_size;
5079 int P_offd_size;
5080 int *P_marker = NULL;
5081 int *P_marker_offd = NULL;
5082 int *CF_marker_offd = NULL;
5083 int *tmp_CF_marker_offd = NULL;
5084 int *dof_func_offd = NULL;
5085
5086 /* Full row information for columns of A that are off diag*/
5087 hypre_BigCSRMatrix *A_ext;
5088 double *A_ext_data;
5089 int *A_ext_i;
5090 HYPRE_BigInt *big_A_ext_j;
5091 int *A_ext_j;
5092
5093 int *fine_to_coarse = NULL;
5094 HYPRE_BigInt *fine_to_coarse_offd = NULL;
5095 HYPRE_BigInt *found;
5096
5097 int num_cols_P_offd;
5098 int newoff, loc_col;
5099 int A_ext_rows, full_off_procNodes;
5100
5101 hypre_BigCSRMatrix *Sop;
5102 int *Sop_i;
5103 HYPRE_BigInt *big_Sop_j;
5104 int *Sop_j;
5105
5106 int Soprows, sgn;
5107
5108 /* Variables to keep count of interpolatory points */
5109 int jj_counter, jj_counter_offd;
5110 int jj_begin_row, jj_end_row;
5111 int jj_begin_row_offd = 0;
5112 int jj_end_row_offd = 0;
5113 int coarse_counter, coarse_counter_offd;
5114
5115 /* Interpolation weight variables */
5116 double sum, diagonal, distribute;
5117 int strong_f_marker = -2;
5118
5119 /* Loop variables */
5120 int index;
5121 int start_indexing = 0;
5122 int i, i1, i2, jj, kk, k1, jj1;
5123
5124 /* Definitions */
5125 double zero = 0.0;
5126 double one = 1.0;
5127 double wall_time;
5128
5129
5130 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
5131
5132 if (debug_flag==4) wall_time = time_getWallclockSeconds();
5133
5134 /* BEGIN */
5135 MPI_Comm_size(comm, &num_procs);
5136 MPI_Comm_rank(comm,&my_id);
5137
5138#ifdef HYPRE_NO_GLOBAL_PARTITION
5139 my_first_cpt = num_cpts_global[0];
5140 if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
5141 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
5142#else
5143 my_first_cpt = num_cpts_global[my_id];
5144 total_global_cpts = num_cpts_global[num_procs];
5145#endif
5146
5147 if (!comm_pkg)
5148 {
5149#ifdef HYPRE_NO_GLOBAL_PARTITION
5150 hypre_NewCommPkgCreate(A);
5151#else
5152 hypre_MatvecCommPkgCreate(A);
5153#endif
5154 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
5155 }
5156
5157 /* Set up off processor information (specifically for neighbors of
5158 * neighbors */
5159 newoff = 0;
5160 full_off_procNodes = 0;
5161 if (num_procs > 1)
5162 {
5163 /*----------------------------------------------------------------------
5164 * Get the off processors rows for A and S, associated with columns in
5165 * A_offd and S_offd.
5166 *---------------------------------------------------------------------*/
5167 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
5168 A_ext_i = hypre_BigCSRMatrixI(A_ext);
5169 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
5170 A_ext_data = hypre_BigCSRMatrixData(A_ext);
5171 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
5172
5173 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
5174 Sop_i = hypre_BigCSRMatrixI(Sop);
5175 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
5176 Soprows = hypre_BigCSRMatrixNumRows(Sop);
5177
5178 /* Find nodes that are neighbors of neighbors, not found in offd */
5179 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
5180 &A_ext_j,
5181 Soprows, col_map_offd, col_1, col_n,
5182 Sop_i, big_Sop_j, &Sop_j, CF_marker, comm_pkg);
5183
5184 hypre_BigCSRMatrixJ(A_ext) = NULL;
5185 hypre_BigCSRMatrixJ(Sop) = NULL;
5186
5187 if(newoff >= 0)
5188 full_off_procNodes = newoff + num_cols_A_offd;
5189 else
5190 return(1);
5191
5192 /* Possibly add new points and new processors to the comm_pkg, all
5193 * processors need new_comm_pkg */
5194
5195 /* AHB - create a new comm package just for extended info -
5196 this will work better with the assumed partition*/
5197 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
5198 &extend_comm_pkg);
5199
5200 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
5201
5202 if (num_functions > 1 && full_off_procNodes > 0)
5203 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
5204
5205 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
5206 full_off_procNodes, CF_marker_offd);
5207
5208 if(num_functions > 1)
5209 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
5210 full_off_procNodes, dof_func_offd);
5211 }
5212
5213
5214 /*-----------------------------------------------------------------------
5215 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
5216 *-----------------------------------------------------------------------*/
5217
5218 /*-----------------------------------------------------------------------
5219 * Intialize counters and allocate mapping vector.
5220 *-----------------------------------------------------------------------*/
5221
5222 P_diag_i = hypre_CTAlloc(int, n_fine+1);
5223 P_offd_i = hypre_CTAlloc(int, n_fine+1);
5224
5225 if (n_fine)
5226 {
5227 fine_to_coarse = hypre_CTAlloc(int, n_fine);
5228 P_marker = hypre_CTAlloc(int, n_fine);
5229 }
5230
5231 if (full_off_procNodes)
5232 {
5233 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
5234 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
5235 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
5236 }
5237
5238 for (i=0; i < full_off_procNodes; i++)
5239 {
5240 P_marker_offd[i] = -1;
5241 tmp_CF_marker_offd[i] = -1;
5242 fine_to_coarse_offd[i] = -1;
5243 }
5244
5245 for (i=0; i < n_fine; i++)
5246 {
5247 P_marker[i] = -1;
5248 fine_to_coarse[i] = -1;
5249 }
5250
5251 jj_counter = start_indexing;
5252 jj_counter_offd = start_indexing;
5253 coarse_counter = 0;
5254 coarse_counter_offd = 0;
5255
5256 /*-----------------------------------------------------------------------
5257 * Loop over fine grid.
5258 *-----------------------------------------------------------------------*/
5259 for (i = 0; i < n_fine; i++)
5260 {
5261 P_diag_i[i] = jj_counter;
5262 if (num_procs > 1)
5263 P_offd_i[i] = jj_counter_offd;
5264
5265 if (CF_marker[i] >= 0)
5266 {
5267 jj_counter++;
5268 fine_to_coarse[i] = coarse_counter;
5269 coarse_counter++;
5270 }
5271
5272 /*--------------------------------------------------------------------
5273 * If i is an F-point, interpolation is from the C-points that
5274 * strongly influence i, or C-points that stronly influence F-points
5275 * that strongly influence i.
5276 *--------------------------------------------------------------------*/
5277 else if (CF_marker[i] != -3)
5278 {
5279 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
5280 {
5281 i1 = S_diag_j[jj];
5282 if (CF_marker[i1] > 0)
5283 { /* i1 is a C point */
5284 if (P_marker[i1] < P_diag_i[i])
5285 {
5286 P_marker[i1] = jj_counter;
5287 jj_counter++;
5288 }
5289 }
5290 else if (CF_marker[i1] != -3)
5291 { /* i1 is a F point, loop through it's strong neighbors */
5292 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
5293 {
5294 k1 = S_diag_j[kk];
5295 if (CF_marker[k1] > 0)
5296 {
5297 if(P_marker[k1] < P_diag_i[i])
5298 {
5299 P_marker[k1] = jj_counter;
5300 jj_counter++;
5301 }
5302 }
5303 }
5304 if(num_procs > 1)
5305 {
5306 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
5307 {
5308 if(col_offd_S_to_A)
5309 k1 = col_offd_S_to_A[S_offd_j[kk]];
5310 else
5311 k1 = S_offd_j[kk];
5312 if (CF_marker_offd[k1] > 0)
5313 {
5314 if(P_marker_offd[k1] < P_offd_i[i])
5315 {
5316 tmp_CF_marker_offd[k1] = 1;
5317 P_marker_offd[k1] = jj_counter_offd;
5318 jj_counter_offd++;
5319 }
5320 }
5321 }
5322 }
5323 }
5324 }
5325 /* Look at off diag strong connections of i */
5326 if (num_procs > 1)
5327 {
5328 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
5329 {
5330 i1 = S_offd_j[jj];
5331 if(col_offd_S_to_A)
5332 i1 = col_offd_S_to_A[i1];
5333 if (CF_marker_offd[i1] > 0)
5334 {
5335 if(P_marker_offd[i1] < P_offd_i[i])
5336 {
5337 tmp_CF_marker_offd[i1] = 1;
5338 P_marker_offd[i1] = jj_counter_offd;
5339 jj_counter_offd++;
5340 }
5341 }
5342 else if (CF_marker_offd[i1] != -3)
5343 { /* F point; look at neighbors of i1. Sop contains global col
5344 * numbers and entries that could be in S_diag or S_offd or
5345 * neither. */
5346 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
5347 {
5348 loc_col = Sop_j[kk];
5349 if(loc_col > -1)
5350 { /* In S_diag */
5351 if(CF_marker[loc_col] > 0)
5352 {
5353 if(P_marker[loc_col] < P_diag_i[i])
5354 {
5355 P_marker[loc_col] = jj_counter;
5356 jj_counter++;
5357 }
5358 }
5359 }
5360 else
5361 {
5362 loc_col = -loc_col - 1;
5363 if(CF_marker_offd[loc_col] > 0)
5364 {
5365 if(P_marker_offd[loc_col] < P_offd_i[i])
5366 {
5367 P_marker_offd[loc_col] = jj_counter_offd;
5368 tmp_CF_marker_offd[loc_col] = 1;
5369 jj_counter_offd++;
5370 }
5371 }
5372 }
5373 }
5374 }
5375 }
5376 }
5377 }
5378 }
5379
5380 if (debug_flag==4)
5381 {
5382 wall_time = time_getWallclockSeconds() - wall_time;
5383 printf("Proc = %d determine structure %f\n",
5384 my_id, wall_time);
5385 fflush(NULL);
5386 }
5387 /*-----------------------------------------------------------------------
5388 * Allocate arrays.
5389 *-----------------------------------------------------------------------*/
5390
5391 if (debug_flag== 4) wall_time = time_getWallclockSeconds();
5392
5393 P_diag_size = jj_counter;
5394 P_offd_size = jj_counter_offd;
5395 if (P_diag_size)
5396 {
5397 P_diag_j = hypre_CTAlloc(int, P_diag_size);
5398 P_diag_data = hypre_CTAlloc(double, P_diag_size);
5399 }
5400
5401 if (P_offd_size)
5402 {
5403 P_offd_j = hypre_CTAlloc(int, P_offd_size);
5404 P_offd_data = hypre_CTAlloc(double, P_offd_size);
5405 }
5406 P_diag_i[n_fine] = jj_counter;
5407 P_offd_i[n_fine] = jj_counter_offd;
5408
5409 jj_counter = start_indexing;
5410 jj_counter_offd = start_indexing;
5411
5412 /* Fine to coarse mapping */
5413 if(num_procs > 1)
5414 {
5415 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
5416 full_off_procNodes, my_first_cpt,
5417 fine_to_coarse_offd);
5418 }
5419
5420 for (i = 0; i < n_fine; i++)
5421 P_marker[i] = -1;
5422
5423 for (i = 0; i < full_off_procNodes; i++)
5424 P_marker_offd[i] = -1;
5425
5426 /*-----------------------------------------------------------------------
5427 * Loop over fine grid points.
5428 *-----------------------------------------------------------------------*/
5429 for (i = 0; i < n_fine; i++)
5430 {
5431 jj_begin_row = jj_counter;
5432 jj_begin_row_offd = jj_counter_offd;
5433
5434 /*--------------------------------------------------------------------
5435 * If i is a c-point, interpolation is the identity.
5436 *--------------------------------------------------------------------*/
5437
5438 if (CF_marker[i] >= 0)
5439 {
5440 P_diag_j[jj_counter] = fine_to_coarse[i];
5441 P_diag_data[jj_counter] = one;
5442 jj_counter++;
5443 }
5444
5445 /*--------------------------------------------------------------------
5446 * If i is an F-point, build interpolation.
5447 *--------------------------------------------------------------------*/
5448
5449 else if (CF_marker[i] != -3)
5450 {
5451 strong_f_marker--;
5452 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
5453 {
5454 i1 = S_diag_j[jj];
5455
5456 /*--------------------------------------------------------------
5457 * If neighbor i1 is a C-point, set column number in P_diag_j
5458 * and initialize interpolation weight to zero.
5459 *--------------------------------------------------------------*/
5460
5461 if (CF_marker[i1] >= 0)
5462 {
5463 if (P_marker[i1] < jj_begin_row)
5464 {
5465 P_marker[i1] = jj_counter;
5466 P_diag_j[jj_counter] = fine_to_coarse[i1];
5467 P_diag_data[jj_counter] = zero;
5468 jj_counter++;
5469 }
5470 }
5471 else if (CF_marker[i1] != -3)
5472 {
5473 P_marker[i1] = strong_f_marker;
5474 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
5475 {
5476 k1 = S_diag_j[kk];
5477 if (CF_marker[k1] > 0)
5478 {
5479 if(P_marker[k1] < jj_begin_row)
5480 {
5481 P_marker[k1] = jj_counter;
5482 P_diag_j[jj_counter] = fine_to_coarse[k1];
5483 P_diag_data[jj_counter] = zero;
5484 jj_counter++;
5485 }
5486 }
5487 }
5488 if(num_procs > 1)
5489 {
5490 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
5491 {
5492 if(col_offd_S_to_A)
5493 k1 = col_offd_S_to_A[S_offd_j[kk]];
5494 else
5495 k1 = S_offd_j[kk];
5496 if(CF_marker_offd[k1] > 0)
5497 {
5498 if(P_marker_offd[k1] < jj_begin_row_offd)
5499 {
5500 P_marker_offd[k1] = jj_counter_offd;
5501 P_offd_j[jj_counter_offd] = k1;
5502 P_offd_data[jj_counter_offd] = zero;
5503 jj_counter_offd++;
5504 }
5505 }
5506 }
5507 }
5508 }
5509 }
5510
5511 if ( num_procs > 1)
5512 {
5513 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
5514 {
5515 i1 = S_offd_j[jj];
5516 if(col_offd_S_to_A)
5517 i1 = col_offd_S_to_A[i1];
5518 if ( CF_marker_offd[i1] > 0)
5519 {
5520 if(P_marker_offd[i1] < jj_begin_row_offd)
5521 {
5522 P_marker_offd[i1] = jj_counter_offd;
5523 P_offd_j[jj_counter_offd] = i1;
5524 P_offd_data[jj_counter_offd] = zero;
5525 jj_counter_offd++;
5526 }
5527 }
5528 else if (CF_marker_offd[i1] != -3)
5529 {
5530 P_marker_offd[i1] = strong_f_marker;
5531 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
5532 {
5533 loc_col = Sop_j[kk];
5534 if(loc_col > -1)
5535 {
5536 if(CF_marker[loc_col] > 0)
5537 {
5538 if(P_marker[loc_col] < jj_begin_row)
5539 {
5540 P_marker[loc_col] = jj_counter;
5541 P_diag_j[jj_counter] = fine_to_coarse[loc_col];
5542 P_diag_data[jj_counter] = zero;
5543 jj_counter++;
5544 }
5545 }
5546 }
5547 else
5548 {
5549 loc_col = - loc_col - 1;
5550 if(CF_marker_offd[loc_col] > 0)
5551 {
5552 if(P_marker_offd[loc_col] < jj_begin_row_offd)
5553 {
5554 P_marker_offd[loc_col] = jj_counter_offd;
5555 P_offd_j[jj_counter_offd]=loc_col;
5556 P_offd_data[jj_counter_offd] = zero;
5557 jj_counter_offd++;
5558 }
5559 }
5560 }
5561 }
5562 }
5563 }
5564 }
5565
5566 jj_end_row = jj_counter;
5567 jj_end_row_offd = jj_counter_offd;
5568
5569 diagonal = A_diag_data[A_diag_i[i]];
5570
5571 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
5572 { /* i1 is a c-point and strongly influences i, accumulate
5573 * a_(i,i1) into interpolation weight */
5574 i1 = A_diag_j[jj];
5575 if (P_marker[i1] >= jj_begin_row)
5576 {
5577 P_diag_data[P_marker[i1]] += A_diag_data[jj];
5578 }
5579 else if(P_marker[i1] == strong_f_marker)
5580 {
5581 sum = zero;
5582 sgn = 1;
5583 if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
5584 /* Loop over row of A for point i1 and calculate the sum
5585 * of the connections to c-points that strongly incluence i. */
5586 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
5587 {
5588 i2 = A_diag_j[jj1];
5589 if((P_marker[i2] >= jj_begin_row) && (sgn*A_diag_data[jj1]) < 0)
5590 sum += A_diag_data[jj1];
5591 }
5592 if(num_procs > 1)
5593 {
5594 for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++)
5595 {
5596 i2 = A_offd_j[jj1];
5597 if(P_marker_offd[i2] >= jj_begin_row_offd &&
5598 (sgn*A_offd_data[jj1]) < 0)
5599 sum += A_offd_data[jj1];
5600 }
5601 }
5602 if(sum != 0)
5603 {
5604 distribute = A_diag_data[jj]/sum;
5605 /* Loop over row of A for point i1 and do the distribution */
5606 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
5607 {
5608 i2 = A_diag_j[jj1];
5609 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
5610 P_diag_data[P_marker[i2]] +=
5611 distribute*A_diag_data[jj1];
5612 }
5613 if(num_procs > 1)
5614 {
5615 for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
5616 {
5617 i2 = A_offd_j[jj1];
5618 if(P_marker_offd[i2] >= jj_begin_row_offd &&
5619 (sgn*A_offd_data[jj1]) < 0)
5620 P_offd_data[P_marker_offd[i2]] +=
5621 distribute*A_offd_data[jj1];
5622 }
5623 }
5624 }
5625 else
5626 {
5627 diagonal += A_diag_data[jj];
5628 }
5629 }
5630 /* neighbor i1 weakly influences i, accumulate a_(i,i1) into
5631 * diagonal */
5632 else if (CF_marker[i1] != -3)
5633 {
5634 if(num_functions == 1 || dof_func[i] == dof_func[i1])
5635 diagonal += A_diag_data[jj];
5636 }
5637 }
5638 if(num_procs > 1)
5639 {
5640 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
5641 {
5642 i1 = A_offd_j[jj];
5643 if(P_marker_offd[i1] >= jj_begin_row_offd)
5644 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
5645 else if(P_marker_offd[i1] == strong_f_marker)
5646 {
5647 sum = zero;
5648 sgn = 1;
5649 if(A_ext_data[A_ext_i[i1]] < 0) sgn = -1;
5650 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
5651 {
5652 loc_col = A_ext_j[jj1];
5653 if(loc_col > -1)
5654 { /* diag */
5655 if((P_marker[loc_col] >= jj_begin_row)
5656 && (sgn*A_ext_data[jj1]) < 0)
5657 sum += A_ext_data[jj1];
5658 }
5659 else
5660 {
5661 loc_col = - loc_col - 1;
5662 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
5663 (sgn*A_ext_data[jj1]) < 0)
5664 sum += A_ext_data[jj1];
5665 }
5666 }
5667 if(sum != 0)
5668 {
5669 distribute = A_offd_data[jj] / sum;
5670 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
5671 {
5672 loc_col = A_ext_j[jj1];
5673 if(loc_col > -1)
5674 { /* diag */
5675 if(P_marker[loc_col] >= jj_begin_row &&
5676 (sgn*A_ext_data[jj1]) < 0)
5677 P_diag_data[P_marker[loc_col]] += distribute*
5678 A_ext_data[jj1];
5679 }
5680 else
5681 {
5682 loc_col = - loc_col - 1;
5683 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
5684 (sgn*A_ext_data[jj1]) < 0)
5685 P_offd_data[P_marker_offd[loc_col]] += distribute*
5686 A_ext_data[jj1];
5687 }
5688 }
5689 }
5690 else
5691 {
5692 diagonal += A_offd_data[jj];
5693 }
5694 }
5695 else if (CF_marker_offd[i1] != -3)
5696 {
5697 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
5698 diagonal += A_offd_data[jj];
5699 }
5700 }
5701 }
5702 for(jj = jj_begin_row; jj < jj_end_row; jj++)
5703 P_diag_data[jj] /= -diagonal;
5704 for(jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
5705 P_offd_data[jj] /= -diagonal;
5706 }
5707 strong_f_marker--;
5708 }
5709
5710 if (debug_flag==4)
5711 {
5712 wall_time = time_getWallclockSeconds() - wall_time;
5713 printf("Proc = %d fill structure %f\n",
5714 my_id, wall_time);
5715 fflush(NULL);
5716 }
5717 /*-----------------------------------------------------------------------
5718 * Allocate arrays.
5719 *-----------------------------------------------------------------------*/
5720
5721 P = hypre_ParCSRMatrixCreate(comm,
5722 hypre_ParCSRMatrixGlobalNumRows(A),
5723 total_global_cpts,
5724 hypre_ParCSRMatrixColStarts(A),
5725 num_cpts_global,
5726 0,
5727 P_diag_i[n_fine],
5728 P_offd_i[n_fine]);
5729
5730 P_diag = hypre_ParCSRMatrixDiag(P);
5731 hypre_CSRMatrixData(P_diag) = P_diag_data;
5732 hypre_CSRMatrixI(P_diag) = P_diag_i;
5733 hypre_CSRMatrixJ(P_diag) = P_diag_j;
5734 P_offd = hypre_ParCSRMatrixOffd(P);
5735 hypre_CSRMatrixData(P_offd) = P_offd_data;
5736 hypre_CSRMatrixI(P_offd) = P_offd_i;
5737 hypre_CSRMatrixJ(P_offd) = P_offd_j;
5738 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
5739
5740 /* Compress P, removing coefficients smaller than trunc_factor * Max */
5741 if (trunc_factor != 0.0 || max_elmts > 0)
5742 {
5743 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
5744 P_diag_data = hypre_CSRMatrixData(P_diag);
5745 P_diag_i = hypre_CSRMatrixI(P_diag);
5746 P_diag_j = hypre_CSRMatrixJ(P_diag);
5747 P_offd_data = hypre_CSRMatrixData(P_offd);
5748 P_offd_i = hypre_CSRMatrixI(P_offd);
5749 P_offd_j = hypre_CSRMatrixJ(P_offd);
5750 P_diag_size = P_diag_i[n_fine];
5751 P_offd_size = P_offd_i[n_fine];
5752 }
5753
5754 /* This builds col_map, col_map should be monotone increasing and contain
5755 * global numbers. */
5756 num_cols_P_offd = 0;
5757 if(P_offd_size)
5758 {
5759 num_cols_P_offd = 0;
5760 for (i=0; i < P_offd_size; i++)
5761 {
5762 index = P_offd_j[i];
5763 if (tmp_CF_marker_offd[index] == 1)
5764 {
5765 num_cols_P_offd++;
5766 tmp_CF_marker_offd[index] = 2;
5767 }
5768 }
5769
5770 if (num_cols_P_offd)
5771 {
5772 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
5773 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
5774 }
5775
5776 index = 0;
5777 for(i = 0; i < num_cols_P_offd; i++)
5778 {
5779 while( tmp_CF_marker_offd[index] != 2) index++;
5780 /*printf("index %d tmp_CF %d\n", index, tmp_CF_marker_offd[index]);*/
5781 col_map_offd_P[i] = fine_to_coarse_offd[index];
5782 tmp_map_offd[i] = index++;
5783 }
5784
5785 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
5786
5787 for (i = 0; i < num_cols_P_offd; i++)
5788 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
5789
5790 for(i = 0; i < P_offd_size; i++)
5791 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
5792 }
5793
5794 if (num_cols_P_offd)
5795 {
5796 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
5797 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
5798 hypre_TFree(tmp_map_offd);
5799 }
5800
5801#ifdef HYPRE_NO_GLOBAL_PARTITION
5802 hypre_NewCommPkgCreate(P);
5803#else
5804 hypre_MatvecCommPkgCreate(P);
5805#endif
5806
5807 for (i=0; i < n_fine; i++)
5808 if (CF_marker[i] == -3) CF_marker[i] = -1;
5809
5810 *P_ptr = P;
5811
5812 /* Deallocate memory */
5813 hypre_TFree(fine_to_coarse);
5814 hypre_TFree(P_marker);
5815
5816 if (num_procs > 1)
5817 {
5818 hypre_BigCSRMatrixDestroy(Sop);
5819 hypre_BigCSRMatrixDestroy(A_ext);
5820 hypre_TFree(fine_to_coarse_offd);
5821 hypre_TFree(P_marker_offd);
5822 hypre_TFree(CF_marker_offd);
5823 hypre_TFree(tmp_CF_marker_offd);
5824 if(num_functions > 1)
5825 hypre_TFree(dof_func_offd);
5826 hypre_TFree(found);
5827
5828
5829 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
5830
5831
5832 }
5833
5834 return hypre_error_flag;
5835}
5836
Note: See TracBrowser for help on using the repository browser.