source: CIVL/examples/mpi-omp/AMG2013/parcsr_ls/partial.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 81.5 KB
Line 
1/*BHEADER**********************************************************************
2 * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3 * Produced at the Lawrence Livermore National Laboratory.
4 * This file is part of HYPRE. See file COPYRIGHT for details.
5 *
6 * HYPRE is free software; you can redistribute it and/or modify it under the
7 * terms of the GNU Lesser General Public License (as published by the Free
8 * Software Foundation) version 2.1 dated February 1999.
9 *
10 * $Revision: 2.4 $
11 ***********************************************************************EHEADER*/
12
13
14#include "headers.h"
15#include "aux_interp.h"
16
17#define MAX_C_CONNECTIONS 100
18#define HAVE_COMMON_C 1
19
20/*---------------------------------------------------------------------------
21 * hypre_BoomerAMGBuildPartialStdInterp
22 * Comment: The interpolatory weighting can be changed with the sep_weight
23 * variable. This can enable not separating negative and positive
24 * off diagonals in the weight formula.
25 *--------------------------------------------------------------------------*/
26
27int
28hypre_BoomerAMGBuildPartialStdInterp(hypre_ParCSRMatrix *A, int *CF_marker,
29 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
30 HYPRE_BigInt *num_old_cpts_global,
31 int num_functions, int *dof_func, int debug_flag,
32 double trunc_factor, int max_elmts,
33 int sep_weight, int *col_offd_S_to_A,
34 hypre_ParCSRMatrix **P_ptr)
35{
36 /* Communication Variables */
37 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
38 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
39 int my_id, num_procs;
40
41 /* Variables to store input variables */
42 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
43 double *A_diag_data = hypre_CSRMatrixData(A_diag);
44 int *A_diag_i = hypre_CSRMatrixI(A_diag);
45 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
46
47 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
48 double *A_offd_data = hypre_CSRMatrixData(A_offd);
49 int *A_offd_i = hypre_CSRMatrixI(A_offd);
50 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
51
52 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
53 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
54 int n_fine = hypre_CSRMatrixNumRows(A_diag);
55 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
56 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
57 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)local_numrows;
58 HYPRE_BigInt total_global_cpts, my_first_cpt;
59 HYPRE_BigInt total_old_global_cpts, my_first_old_cpt;
60 int n_coarse, n_coarse_old;
61
62 /* Variables to store strong connection matrix info */
63 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
64 int *S_diag_i = hypre_CSRMatrixI(S_diag);
65 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
66
67 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
68 int *S_offd_i = hypre_CSRMatrixI(S_offd);
69 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
70
71 /* Interpolation matrix P */
72 hypre_ParCSRMatrix *P;
73 hypre_CSRMatrix *P_diag;
74 hypre_CSRMatrix *P_offd;
75
76 double *P_diag_data = NULL;
77 int *P_diag_i, *P_diag_j = NULL;
78 double *P_offd_data = NULL;
79 int *P_offd_i, *P_offd_j = NULL;
80
81 HYPRE_BigInt *col_map_offd_P;
82 int *tmp_map_offd;
83 int P_diag_size;
84 int P_offd_size;
85 int *P_marker = NULL;
86 int *P_marker_offd = NULL;
87 int *CF_marker_offd = NULL;
88 int *tmp_CF_marker_offd = NULL;
89 int *dof_func_offd = NULL;
90
91 /* Full row information for columns of A that are off diag*/
92 hypre_BigCSRMatrix *A_ext;
93 double *A_ext_data;
94 int *A_ext_i;
95 HYPRE_BigInt *big_A_ext_j;
96 int *A_ext_j;
97
98 int *fine_to_coarse = NULL;
99 int *old_coarse_to_fine = NULL;
100 HYPRE_BigInt *fine_to_coarse_offd = NULL;
101 HYPRE_BigInt *found;
102
103 int num_cols_P_offd;
104 int newoff, loc_col;
105 int A_ext_rows, full_off_procNodes;
106
107 hypre_BigCSRMatrix *Sop;
108 int *Sop_i;
109 HYPRE_BigInt *big_Sop_j;
110 int *Sop_j;
111
112 int Soprows;
113
114 /* Variables to keep count of interpolatory points */
115 int jj_counter, jj_counter_offd;
116 int jj_begin_row, jj_end_row;
117 int jj_begin_row_offd = 0;
118 int jj_end_row_offd = 0;
119 int coarse_counter, coarse_counter_offd;
120 int *ihat = NULL;
121 int *ihat_offd = NULL;
122 int *ipnt = NULL;
123 int *ipnt_offd = NULL;
124 int strong_f_marker = -2;
125
126 /* Interpolation weight variables */
127 double *ahat_offd = NULL;
128 double *ahat = NULL;
129 double sum_pos, sum_pos_C, sum_neg, sum_neg_C, sum, sum_C;
130 double diagonal, distribute;
131 double alfa, beta;
132
133 /* Loop variables */
134 int index, cnt, old_cnt;
135 int start_indexing = 0;
136 int i, ii, i1, j1, jj, kk, k1;
137 int cnt_c, cnt_f, cnt_c_offd, cnt_f_offd, indx;
138
139 /* Definitions */
140 double zero = 0.0;
141 double one = 1.0;
142 double wall_time;
143 double wall_1 = 0;
144 double wall_2 = 0;
145 double wall_3 = 0;
146
147
148 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
149
150 if (debug_flag== 4) wall_time = time_getWallclockSeconds();
151
152 /* BEGIN */
153 MPI_Comm_size(comm, &num_procs);
154 MPI_Comm_rank(comm,&my_id);
155
156#ifdef HYPRE_NO_GLOBAL_PARTITION
157 my_first_cpt = num_cpts_global[0];
158 my_first_old_cpt = num_old_cpts_global[0];
159 n_coarse_old = (int)(num_old_cpts_global[1] - num_old_cpts_global[0]);
160 n_coarse = (int)(num_cpts_global[1] - num_cpts_global[0]);
161 if (my_id == (num_procs -1))
162 {
163 total_global_cpts = num_cpts_global[1];
164 total_old_global_cpts = num_old_cpts_global[1];
165 }
166 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
167 MPI_Bcast(&total_old_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
168#else
169 my_first_cpt = num_cpts_global[my_id];
170 my_first_old_cpt = num_old_cpts_global[my_id];
171 total_global_cpts = num_cpts_global[num_procs];
172 total_old_global_cpts = num_old_cpts_global[num_procs];
173 n_coarse_old = (int)(num_old_cpts_global[my_id+1] - num_old_cpts_global[my_id]);
174 n_coarse = (int)(num_cpts_global[my_id+1] - num_cpts_global[my_id]);
175#endif
176
177 if (!comm_pkg)
178 {
179#ifdef HYPRE_NO_GLOBAL_PARTITION
180 hypre_NewCommPkgCreate(A);
181#else
182 hypre_MatvecCommPkgCreate(A);
183#endif
184 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
185 }
186
187 /* Set up off processor information (specifically for neighbors of
188 * neighbors */
189 newoff = 0;
190 full_off_procNodes = 0;
191 if (num_procs > 1)
192 {
193 /*----------------------------------------------------------------------
194 * Get the off processors rows for A and S, associated with columns in
195 * A_offd and S_offd.
196 *---------------------------------------------------------------------*/
197 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
198 A_ext_i = hypre_BigCSRMatrixI(A_ext);
199 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
200 A_ext_data = hypre_BigCSRMatrixData(A_ext);
201 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
202
203 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
204 Sop_i = hypre_BigCSRMatrixI(Sop);
205 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
206 Soprows = hypre_BigCSRMatrixNumRows(Sop);
207
208 /* Find nodes that are neighbors of neighbors, not found in offd */
209 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
210 &A_ext_j, Soprows, col_map_offd, col_1,
211 col_n, Sop_i, big_Sop_j, &Sop_j, CF_marker,
212 comm_pkg);
213
214 hypre_BigCSRMatrixJ(A_ext) = NULL;
215 hypre_BigCSRMatrixJ(Sop) = NULL;
216
217 if(newoff >= 0)
218 full_off_procNodes = newoff + num_cols_A_offd;
219 else
220 return(1);
221
222 /* Possibly add new points and new processors to the comm_pkg, all
223 * processors need new_comm_pkg */
224
225 /* AHB - create a new comm package just for extended info -
226 this will work better with the assumed partition*/
227 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
228 &extend_comm_pkg);
229
230 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
231
232 if (num_functions > 1 && full_off_procNodes > 0)
233 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
234
235 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
236 full_off_procNodes, CF_marker_offd);
237
238 if(num_functions > 1)
239 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
240 full_off_procNodes, dof_func_offd);
241 }
242
243
244 /*-----------------------------------------------------------------------
245 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
246 *-----------------------------------------------------------------------*/
247
248 /*-----------------------------------------------------------------------
249 * Intialize counters and allocate mapping vector.
250 *-----------------------------------------------------------------------*/
251 P_diag_i = hypre_CTAlloc(int, n_coarse_old+1);
252 P_offd_i = hypre_CTAlloc(int, n_coarse_old+1);
253
254 if (n_coarse_old) old_coarse_to_fine = hypre_CTAlloc(int, n_coarse_old);
255 if (n_fine)
256 {
257 fine_to_coarse = hypre_CTAlloc(int, n_fine);
258 P_marker = hypre_CTAlloc(int, n_fine);
259 }
260
261 if (full_off_procNodes)
262 {
263 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
264 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
265 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
266 }
267
268 for (i=0; i < full_off_procNodes; i++)
269 {
270 P_marker_offd[i] = -1;
271 tmp_CF_marker_offd[i] = -1;
272 fine_to_coarse_offd[i] = -1;
273 }
274
275 for (i=0; i < n_fine; i++)
276 {
277 P_marker[i] = -1;
278 fine_to_coarse[i] = -1;
279 }
280
281 jj_counter = start_indexing;
282 jj_counter_offd = start_indexing;
283 coarse_counter = 0;
284 coarse_counter_offd = 0;
285
286 cnt = 0;
287 old_cnt = 0;
288 for (i = 0; i < n_fine; i++)
289 {
290 fine_to_coarse[i] = -1;
291 if (CF_marker[i] == 1)
292 {
293 fine_to_coarse[i] = cnt++;
294 old_coarse_to_fine[old_cnt++] = i;
295 }
296 else if (CF_marker[i] == -2)
297 {
298 old_coarse_to_fine[old_cnt++] = i;
299 }
300 }
301
302 /*-----------------------------------------------------------------------
303 * Loop over fine grid.
304 *-----------------------------------------------------------------------*/
305 for (ii = 0; ii < n_coarse_old; ii++)
306 {
307 P_diag_i[ii] = jj_counter;
308 if (num_procs > 1)
309 P_offd_i[ii] = jj_counter_offd;
310
311 i = old_coarse_to_fine[ii];
312 if (CF_marker[i] > 0)
313 {
314 jj_counter++;
315 coarse_counter++;
316 }
317
318 /*--------------------------------------------------------------------
319 * If i is an F-point, interpolation is from the C-points that
320 * strongly influence i, or C-points that stronly influence F-points
321 * that strongly influence i.
322 *--------------------------------------------------------------------*/
323 else if (CF_marker[i] == -2)
324 {
325 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
326 {
327 i1 = S_diag_j[jj];
328 if (CF_marker[i1] > 0)
329 { /* i1 is a C point */
330 if (P_marker[i1] < P_diag_i[ii])
331 {
332 P_marker[i1] = jj_counter;
333 jj_counter++;
334 }
335 }
336 else if (CF_marker[i1] != -3)
337 { /* i1 is a F point, loop through it's strong neighbors */
338 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
339 {
340 k1 = S_diag_j[kk];
341 if (CF_marker[k1] > 0)
342 {
343 if(P_marker[k1] < P_diag_i[ii])
344 {
345 P_marker[k1] = jj_counter;
346 jj_counter++;
347 }
348 }
349 }
350 if(num_procs > 1)
351 {
352 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
353 {
354 if(col_offd_S_to_A)
355 k1 = col_offd_S_to_A[S_offd_j[kk]];
356 else
357 k1 = S_offd_j[kk];
358 if (CF_marker_offd[k1] > 0)
359 {
360 if(P_marker_offd[k1] < P_offd_i[ii])
361 {
362 tmp_CF_marker_offd[k1] = 1;
363 P_marker_offd[k1] = jj_counter_offd;
364 jj_counter_offd++;
365 }
366 }
367 }
368 }
369 }
370 }
371 /* Look at off diag strong connections of i */
372 if (num_procs > 1)
373 {
374 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
375 {
376 i1 = S_offd_j[jj];
377 if(col_offd_S_to_A)
378 i1 = col_offd_S_to_A[i1];
379 if (CF_marker_offd[i1] > 0)
380 {
381 if(P_marker_offd[i1] < P_offd_i[ii])
382 {
383 tmp_CF_marker_offd[i1] = 1;
384 P_marker_offd[i1] = jj_counter_offd;
385 jj_counter_offd++;
386 }
387 }
388 else if (CF_marker_offd[i1] != -3)
389 { /* F point; look at neighbors of i1. Sop contains global col
390 * numbers and entries that could be in S_diag or S_offd or
391 * neither. */
392 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
393 {
394 loc_col = Sop_j[kk];
395 if(loc_col > -1)
396 { /* In S_diag */
397 if(CF_marker[loc_col] > 0)
398 {
399 if(P_marker[loc_col] < P_diag_i[ii])
400 {
401 P_marker[loc_col] = jj_counter;
402 jj_counter++;
403 }
404 }
405 }
406 else
407 {
408 loc_col = - loc_col - 1;
409 if(CF_marker_offd[loc_col] > 0)
410 {
411 if(P_marker_offd[loc_col] < P_offd_i[ii])
412 {
413 P_marker_offd[loc_col] = jj_counter_offd;
414 tmp_CF_marker_offd[loc_col] = 1;
415 jj_counter_offd++;
416 }
417 }
418 }
419 }
420 }
421 }
422 }
423 }
424 }
425
426 if (debug_flag==4)
427 {
428 wall_time = time_getWallclockSeconds() - wall_time;
429 printf("Proc = %d determine structure %f\n",
430 my_id, wall_time);
431 fflush(NULL);
432 }
433 /*-----------------------------------------------------------------------
434 * Allocate arrays.
435 *-----------------------------------------------------------------------*/
436
437
438 P_diag_size = jj_counter;
439 P_offd_size = jj_counter_offd;
440
441 if (P_diag_size)
442 {
443 P_diag_j = hypre_CTAlloc(int, P_diag_size);
444 P_diag_data = hypre_CTAlloc(double, P_diag_size);
445 }
446
447 if (P_offd_size)
448 {
449 P_offd_j = hypre_CTAlloc(int, P_offd_size);
450 P_offd_data = hypre_CTAlloc(double, P_offd_size);
451 }
452
453 P_diag_i[n_coarse_old] = jj_counter;
454 P_offd_i[n_coarse_old] = jj_counter_offd;
455
456 jj_counter = start_indexing;
457 jj_counter_offd = start_indexing;
458
459 /* Fine to coarse mapping */
460 if(num_procs > 1)
461 {
462 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
463 full_off_procNodes, my_first_cpt,
464 fine_to_coarse_offd);
465 }
466
467 /* Initialize ahat, which is a modification to a, used in the standard
468 * interpolation routine. */
469 if (n_fine)
470 {
471 ahat = hypre_CTAlloc(double, n_fine);
472 ihat = hypre_CTAlloc(int, n_fine);
473 ipnt = hypre_CTAlloc(int, n_fine);
474 }
475 if (full_off_procNodes)
476 {
477 ahat_offd = hypre_CTAlloc(double, full_off_procNodes);
478 ihat_offd = hypre_CTAlloc(int, full_off_procNodes);
479 ipnt_offd = hypre_CTAlloc(int, full_off_procNodes);
480 }
481
482 for (i = 0; i < n_fine; i++)
483 {
484 P_marker[i] = -1;
485 ahat[i] = 0;
486 ihat[i] = -1;
487 }
488 for (i = 0; i < full_off_procNodes; i++)
489 {
490 P_marker_offd[i] = -1;
491 ahat_offd[i] = 0;
492 ihat_offd[i] = -1;
493 }
494
495 /*-----------------------------------------------------------------------
496 * Loop over fine grid points.
497 *-----------------------------------------------------------------------*/
498 for (ii = 0; ii < n_coarse_old; ii++)
499 {
500 jj_begin_row = jj_counter;
501 jj_begin_row_offd = jj_counter_offd;
502 i = old_coarse_to_fine[ii];
503
504 /*--------------------------------------------------------------------
505 * If i is a c-point, interpolation is the identity.
506 *--------------------------------------------------------------------*/
507
508 if (CF_marker[i] > 0)
509 {
510 P_diag_j[jj_counter] = fine_to_coarse[i];
511 P_diag_data[jj_counter] = one;
512 jj_counter++;
513 }
514
515 /*--------------------------------------------------------------------
516 * If i is an F-point, build interpolation.
517 *--------------------------------------------------------------------*/
518
519 else if (CF_marker[i] == -2)
520 {
521 if (debug_flag==4) wall_time = time_getWallclockSeconds();
522 strong_f_marker--;
523 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
524 {
525 i1 = S_diag_j[jj];
526
527 /*--------------------------------------------------------------
528 * If neighbor i1 is a C-point, set column number in P_diag_j
529 * and initialize interpolation weight to zero.
530 *--------------------------------------------------------------*/
531
532 if (CF_marker[i1] > 0)
533 {
534 if (P_marker[i1] < jj_begin_row)
535 {
536 P_marker[i1] = jj_counter;
537 P_diag_j[jj_counter] = i1;
538 P_diag_data[jj_counter] = zero;
539 jj_counter++;
540 }
541 }
542 else if (CF_marker[i1] != -3)
543 {
544 P_marker[i1] = strong_f_marker;
545 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
546 {
547 k1 = S_diag_j[kk];
548 if (CF_marker[k1] > 0)
549 {
550 if(P_marker[k1] < jj_begin_row)
551 {
552 P_marker[k1] = jj_counter;
553 P_diag_j[jj_counter] = k1;
554 P_diag_data[jj_counter] = zero;
555 jj_counter++;
556 }
557 }
558 }
559 if(num_procs > 1)
560 {
561 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
562 {
563 if(col_offd_S_to_A)
564 k1 = col_offd_S_to_A[S_offd_j[kk]];
565 else
566 k1 = S_offd_j[kk];
567 if(CF_marker_offd[k1] > 0)
568 {
569 if(P_marker_offd[k1] < jj_begin_row_offd)
570 {
571 P_marker_offd[k1] = jj_counter_offd;
572 P_offd_j[jj_counter_offd] = k1;
573 P_offd_data[jj_counter_offd] = zero;
574 jj_counter_offd++;
575 }
576 }
577 }
578 }
579 }
580 }
581
582 if ( num_procs > 1)
583 {
584 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
585 {
586 i1 = S_offd_j[jj];
587 if(col_offd_S_to_A)
588 i1 = col_offd_S_to_A[i1];
589 if ( CF_marker_offd[i1] > 0)
590 {
591 if(P_marker_offd[i1] < jj_begin_row_offd)
592 {
593 P_marker_offd[i1] = jj_counter_offd;
594 P_offd_j[jj_counter_offd]=i1;
595 P_offd_data[jj_counter_offd] = zero;
596 jj_counter_offd++;
597 }
598 }
599 else if (CF_marker_offd[i1] != -3)
600 {
601 P_marker_offd[i1] = strong_f_marker;
602 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
603 {
604 loc_col = Sop_j[kk];
605 if(loc_col > -1)
606 {
607 if(CF_marker[loc_col] > 0)
608 {
609 if(P_marker[loc_col] < jj_begin_row)
610 {
611 P_marker[loc_col] = jj_counter;
612 P_diag_j[jj_counter] = loc_col;
613 P_diag_data[jj_counter] = zero;
614 jj_counter++;
615 }
616 }
617 }
618 else
619 {
620 loc_col = -loc_col - 1;
621 if(CF_marker_offd[loc_col] > 0)
622 {
623 if(P_marker_offd[loc_col] < jj_begin_row_offd)
624 {
625 P_marker_offd[loc_col] = jj_counter_offd;
626 P_offd_j[jj_counter_offd]=loc_col;
627 P_offd_data[jj_counter_offd] = zero;
628 jj_counter_offd++;
629 }
630 }
631 }
632 }
633 }
634 }
635 }
636
637 jj_end_row = jj_counter;
638 jj_end_row_offd = jj_counter_offd;
639
640 if (debug_flag==4)
641 {
642 wall_time = time_getWallclockSeconds() - wall_time;
643 wall_1 += wall_time;
644 fflush(NULL);
645 }
646 if (debug_flag==4) wall_time = time_getWallclockSeconds();
647 cnt_c = 0;
648 cnt_f = jj_end_row-jj_begin_row;
649 cnt_c_offd = 0;
650 cnt_f_offd = jj_end_row_offd-jj_begin_row_offd;
651 ihat[i] = cnt_f;
652 ipnt[cnt_f] = i;
653 ahat[cnt_f++] = A_diag_data[A_diag_i[i]];
654 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
655 { /* i1 is direct neighbor */
656 i1 = A_diag_j[jj];
657 if (P_marker[i1] != strong_f_marker)
658 {
659 indx = ihat[i1];
660 if (indx > -1)
661 ahat[indx] += A_diag_data[jj];
662 else if (P_marker[i1] >= jj_begin_row)
663 {
664 ihat[i1] = cnt_c;
665 ipnt[cnt_c] = i1;
666 ahat[cnt_c++] += A_diag_data[jj];
667 }
668 else if (CF_marker[i1] != -3)
669 {
670 ihat[i1] = cnt_f;
671 ipnt[cnt_f] = i1;
672 ahat[cnt_f++] += A_diag_data[jj];
673 }
674 }
675 else
676 {
677 if(num_functions == 1 || dof_func[i] == dof_func[i1])
678 {
679 distribute = A_diag_data[jj]/A_diag_data[A_diag_i[i1]];
680 for (kk = A_diag_i[i1]+1; kk < A_diag_i[i1+1]; kk++)
681 {
682 k1 = A_diag_j[kk];
683 indx = ihat[k1];
684 if (indx > -1)
685 ahat[indx] -= A_diag_data[kk]*distribute;
686 else if (P_marker[k1] >= jj_begin_row)
687 {
688 ihat[k1] = cnt_c;
689 ipnt[cnt_c] = k1;
690 ahat[cnt_c++] -= A_diag_data[kk]*distribute;
691 }
692 else
693 {
694 ihat[k1] = cnt_f;
695 ipnt[cnt_f] = k1;
696 ahat[cnt_f++] -= A_diag_data[kk]*distribute;
697 }
698 }
699 if(num_procs > 1)
700 {
701 for (kk = A_offd_i[i1]; kk < A_offd_i[i1+1]; kk++)
702 {
703 k1 = A_offd_j[kk];
704 indx = ihat_offd[k1];
705 if(num_functions == 1 || dof_func[i1] == dof_func_offd[k1])
706 {
707 if (indx > -1)
708 ahat_offd[indx] -= A_offd_data[kk]*distribute;
709 else if (P_marker_offd[k1] >= jj_begin_row_offd)
710 {
711 ihat_offd[k1] = cnt_c_offd;
712 ipnt_offd[cnt_c_offd] = k1;
713 ahat_offd[cnt_c_offd++] -= A_offd_data[kk]*distribute;
714 }
715 else
716 {
717 ihat_offd[k1] = cnt_f_offd;
718 ipnt_offd[cnt_f_offd] = k1;
719 ahat_offd[cnt_f_offd++] -= A_offd_data[kk]*distribute;
720 }
721 }
722 }
723 }
724 }
725 }
726 }
727 if(num_procs > 1)
728 {
729 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
730 {
731 i1 = A_offd_j[jj];
732 if(P_marker_offd[i1] != strong_f_marker)
733 {
734 indx = ihat_offd[i1];
735 if (indx > -1)
736 ahat_offd[indx] += A_offd_data[jj];
737 else if (P_marker_offd[i1] >= jj_begin_row_offd)
738 {
739 ihat_offd[i1] = cnt_c_offd;
740 ipnt_offd[cnt_c_offd] = i1;
741 ahat_offd[cnt_c_offd++] += A_offd_data[jj];
742 }
743 else if (CF_marker_offd[i1] != -3)
744 {
745 ihat_offd[i1] = cnt_f_offd;
746 ipnt_offd[cnt_f_offd] = i1;
747 ahat_offd[cnt_f_offd++] += A_offd_data[jj];
748 }
749 }
750 else
751 {
752 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
753 {
754 distribute = A_offd_data[jj]/A_ext_data[A_ext_i[i1]];
755 for (kk = A_ext_i[i1]+1; kk < A_ext_i[i1+1]; kk++)
756 {
757 loc_col = A_ext_j[kk];
758 if(loc_col > -1)
759 { /*diag*/
760 indx = ihat[loc_col];
761 if (indx > -1)
762 ahat[indx] -= A_ext_data[kk]*distribute;
763 else if (P_marker[loc_col] >= jj_begin_row)
764 {
765 ihat[loc_col] = cnt_c;
766 ipnt[cnt_c] = loc_col;
767 ahat[cnt_c++] -= A_ext_data[kk]*distribute;
768 }
769 else
770 {
771 ihat[loc_col] = cnt_f;
772 ipnt[cnt_f] = loc_col;
773 ahat[cnt_f++] -= A_ext_data[kk]*distribute;
774 }
775 }
776 else
777 {
778 loc_col = - loc_col - 1;
779 if(num_functions == 1 ||
780 dof_func_offd[loc_col] == dof_func_offd[i1])
781 {
782 indx = ihat_offd[loc_col];
783 if (indx > -1)
784 ahat_offd[indx] -= A_ext_data[kk]*distribute;
785 else if(P_marker_offd[loc_col] >= jj_begin_row_offd)
786 {
787 ihat_offd[loc_col] = cnt_c_offd;
788 ipnt_offd[cnt_c_offd] = loc_col;
789 ahat_offd[cnt_c_offd++] -= A_ext_data[kk]*distribute;
790 }
791 else
792 {
793 ihat_offd[loc_col] = cnt_f_offd;
794 ipnt_offd[cnt_f_offd] = loc_col;
795 ahat_offd[cnt_f_offd++] -= A_ext_data[kk]*distribute;
796 }
797 }
798 }
799 }
800 }
801 }
802 }
803 }
804 if (debug_flag==4)
805 {
806 wall_time = time_getWallclockSeconds() - wall_time;
807 wall_2 += wall_time;
808 fflush(NULL);
809 }
810
811 if (debug_flag==4) wall_time = time_getWallclockSeconds();
812 diagonal = ahat[cnt_c];
813 ahat[cnt_c] = 0;
814 sum_pos = 0;
815 sum_pos_C = 0;
816 sum_neg = 0;
817 sum_neg_C = 0;
818 sum = 0;
819 sum_C = 0;
820 if(sep_weight == 1)
821 {
822 for (jj=0; jj < cnt_c; jj++)
823 {
824 if (ahat[jj] > 0)
825 {
826 sum_pos_C += ahat[jj];
827 }
828 else
829 {
830 sum_neg_C += ahat[jj];
831 }
832 }
833 if(num_procs > 1)
834 {
835 for (jj=0; jj < cnt_c_offd; jj++)
836 {
837 if (ahat_offd[jj] > 0)
838 {
839 sum_pos_C += ahat_offd[jj];
840 }
841 else
842 {
843 sum_neg_C += ahat_offd[jj];
844 }
845 }
846 }
847 sum_pos = sum_pos_C;
848 sum_neg = sum_neg_C;
849 for (jj=cnt_c+1; jj < cnt_f; jj++)
850 {
851 if (ahat[jj] > 0)
852 {
853 sum_pos += ahat[jj];
854 }
855 else
856 {
857 sum_neg += ahat[jj];
858 }
859 ahat[jj] = 0;
860 }
861 if(num_procs > 1)
862 {
863 for (jj=cnt_c_offd; jj < cnt_f_offd; jj++)
864 {
865 if (ahat_offd[jj] > 0)
866 {
867 sum_pos += ahat_offd[jj];
868 }
869 else
870 {
871 sum_neg += ahat_offd[jj];
872 }
873 ahat_offd[jj] = 0;
874 }
875 }
876 if (sum_neg_C) alfa = sum_neg/sum_neg_C/diagonal;
877 if (sum_pos_C) beta = sum_pos/sum_pos_C/diagonal;
878
879 /*-----------------------------------------------------------------
880 * Set interpolation weight by dividing by the diagonal.
881 *-----------------------------------------------------------------*/
882
883 for (jj = jj_begin_row; jj < jj_end_row; jj++)
884 {
885 j1 = ihat[P_diag_j[jj]];
886 if (ahat[j1] > 0)
887 P_diag_data[jj] = -beta*ahat[j1];
888 else
889 P_diag_data[jj] = -alfa*ahat[j1];
890
891 P_diag_j[jj] = fine_to_coarse[P_diag_j[jj]];
892 ahat[j1] = 0;
893 }
894 for (jj=0; jj < cnt_f; jj++)
895 ihat[ipnt[jj]] = -1;
896 if(num_procs > 1)
897 {
898 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
899 {
900 j1 = ihat_offd[P_offd_j[jj]];
901 if (ahat_offd[j1] > 0)
902 P_offd_data[jj] = -beta*ahat_offd[j1];
903 else
904 P_offd_data[jj] = -alfa*ahat_offd[j1];
905
906 ahat_offd[j1] = 0;
907 }
908 for (jj=0; jj < cnt_f_offd; jj++)
909 ihat_offd[ipnt_offd[jj]] = -1;
910 }
911 }
912 else
913 {
914 for (jj=0; jj < cnt_c; jj++)
915 {
916 sum_C += ahat[jj];
917 }
918 if(num_procs > 1)
919 {
920 for (jj=0; jj < cnt_c_offd; jj++)
921 {
922 sum_C += ahat_offd[jj];
923 }
924 }
925 sum = sum_C;
926 for (jj=cnt_c+1; jj < cnt_f; jj++)
927 {
928 sum += ahat[jj];
929 ahat[jj] = 0;
930 }
931 if(num_procs > 1)
932 {
933 for (jj=cnt_c_offd; jj < cnt_f_offd; jj++)
934 {
935 sum += ahat_offd[jj];
936 ahat_offd[jj] = 0;
937 }
938 }
939 if (sum_C) alfa = sum/sum_C/diagonal;
940
941 /*-----------------------------------------------------------------
942 * Set interpolation weight by dividing by the diagonal.
943 *-----------------------------------------------------------------*/
944
945 for (jj = jj_begin_row; jj < jj_end_row; jj++)
946 {
947 j1 = ihat[P_diag_j[jj]];
948 P_diag_data[jj] = -alfa*ahat[j1];
949 P_diag_j[jj] = fine_to_coarse[P_diag_j[jj]];
950 ahat[j1] = 0;
951 }
952 for (jj=0; jj < cnt_f; jj++)
953 ihat[ipnt[jj]] = -1;
954 if(num_procs > 1)
955 {
956 for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
957 {
958 j1 = ihat_offd[P_offd_j[jj]];
959 P_offd_data[jj] = -alfa*ahat_offd[j1];
960 ahat_offd[j1] = 0;
961 }
962 for (jj=0; jj < cnt_f_offd; jj++)
963 ihat_offd[ipnt_offd[jj]] = -1;
964 }
965 }
966 if (debug_flag==4)
967 {
968 wall_time = time_getWallclockSeconds() - wall_time;
969 wall_3 += wall_time;
970 fflush(NULL);
971 }
972 }
973 }
974
975 if (debug_flag==4)
976 {
977 printf("Proc = %d fill part 1 %f part 2 %f part 3 %f\n",
978 my_id, wall_1, wall_2, wall_3);
979 fflush(NULL);
980 }
981 P = hypre_ParCSRMatrixCreate(comm,
982 total_old_global_cpts,
983 total_global_cpts,
984 num_old_cpts_global,
985 num_cpts_global,
986 0,
987 P_diag_i[n_coarse_old],
988 P_offd_i[n_coarse_old]);
989
990 P_diag = hypre_ParCSRMatrixDiag(P);
991 hypre_CSRMatrixData(P_diag) = P_diag_data;
992 hypre_CSRMatrixI(P_diag) = P_diag_i;
993 hypre_CSRMatrixJ(P_diag) = P_diag_j;
994 P_offd = hypre_ParCSRMatrixOffd(P);
995 hypre_CSRMatrixData(P_offd) = P_offd_data;
996 hypre_CSRMatrixI(P_offd) = P_offd_i;
997 hypre_CSRMatrixJ(P_offd) = P_offd_j;
998 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
999
1000 /* Compress P, removing coefficients smaller than trunc_factor * Max */
1001 if (trunc_factor != 0.0 || max_elmts > 0)
1002 {
1003 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
1004 P_diag_data = hypre_CSRMatrixData(P_diag);
1005 P_diag_i = hypre_CSRMatrixI(P_diag);
1006 P_diag_j = hypre_CSRMatrixJ(P_diag);
1007 P_offd_data = hypre_CSRMatrixData(P_offd);
1008 P_offd_i = hypre_CSRMatrixI(P_offd);
1009 P_offd_j = hypre_CSRMatrixJ(P_offd);
1010 P_diag_size = P_diag_i[n_coarse_old];
1011 P_offd_size = P_offd_i[n_coarse_old];
1012 }
1013
1014 /* This builds col_map, col_map should be monotone increasing and contain
1015 * global numbers. */
1016 num_cols_P_offd = 0;
1017 if(P_offd_size)
1018 {
1019 num_cols_P_offd = 0;
1020 for (i=0; i < P_offd_size; i++)
1021 {
1022 index = P_offd_j[i];
1023 if(tmp_CF_marker_offd[index] == 1)
1024 {
1025 num_cols_P_offd++;
1026 tmp_CF_marker_offd[index] = 2;
1027 }
1028 }
1029
1030 if (num_cols_P_offd)
1031 {
1032 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
1033 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
1034 }
1035
1036 index = 0;
1037 for(i = 0; i < num_cols_P_offd; i++)
1038 {
1039 while( tmp_CF_marker_offd[index] == -1) index++;
1040 col_map_offd_P[i] = fine_to_coarse_offd[index];
1041 tmp_map_offd[i] = index++;
1042 }
1043
1044 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
1045
1046 for (i = 0; i < num_cols_P_offd; i++)
1047 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
1048
1049 for(i = 0; i < P_offd_size; i++)
1050 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
1051 /*index = 0;
1052 for(i = 0; i < num_cols_P_offd; i++)
1053 {
1054 while (P_marker[index] == 0) index++;
1055
1056 col_map_offd_P[i] = fine_to_coarse_offd[index];
1057 index++;
1058 }*/
1059
1060 /* Sort the col_map_offd_P and P_offd_j correctly */
1061 /* Check if sort actually changed anything */
1062 /*for(i = 0; i < num_cols_P_offd; i++)
1063 P_marker[i] = col_map_offd_P[i];
1064
1065 if(hypre_ssort(col_map_offd_P,num_cols_P_offd))
1066 {
1067 for(i = 0; i < P_offd_size; i++)
1068 for(j = 0; j < num_cols_P_offd; j++)
1069 if(P_marker[P_offd_j[i]] == col_map_offd_P[j])
1070 {
1071 P_offd_j[i] = j;
1072 j = num_cols_P_offd;
1073 }
1074 }
1075 hypre_TFree(P_marker); */
1076 }
1077
1078 if (num_cols_P_offd)
1079 {
1080 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
1081 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
1082 hypre_TFree(tmp_map_offd);
1083 }
1084
1085#ifdef HYPRE_NO_GLOBAL_PARTITION
1086 hypre_NewCommPkgCreate(P);
1087#else
1088 hypre_MatvecCommPkgCreate(P);
1089#endif
1090
1091 for (i=0; i < n_fine; i++)
1092 if (CF_marker[i] < -1) CF_marker[i] = -1;
1093
1094 *P_ptr = P;
1095
1096 /* Deallocate memory */
1097 hypre_TFree(fine_to_coarse);
1098 hypre_TFree(old_coarse_to_fine);
1099 hypre_TFree(P_marker);
1100 hypre_TFree(ahat);
1101 hypre_TFree(ihat);
1102 hypre_TFree(ipnt);
1103
1104 if (full_off_procNodes)
1105 {
1106 hypre_TFree(ahat_offd);
1107 hypre_TFree(ihat_offd);
1108 hypre_TFree(ipnt_offd);
1109 }
1110 if (num_procs > 1)
1111 {
1112 hypre_BigCSRMatrixDestroy(Sop);
1113 hypre_BigCSRMatrixDestroy(A_ext);
1114 hypre_TFree(fine_to_coarse_offd);
1115 hypre_TFree(P_marker_offd);
1116 hypre_TFree(CF_marker_offd);
1117 hypre_TFree(tmp_CF_marker_offd);
1118
1119 if(num_functions > 1)
1120 hypre_TFree(dof_func_offd);
1121 hypre_TFree(found);
1122
1123 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
1124
1125 }
1126
1127
1128 return hypre_error_flag;
1129}
1130
1131/*---------------------------------------------------------------------------
1132 * hypre_BoomerAMGBuildPartialExtPIInterp
1133 * Comment:
1134 *--------------------------------------------------------------------------*/
1135int
1136hypre_BoomerAMGBuildPartialExtPIInterp(hypre_ParCSRMatrix *A, int *CF_marker,
1137 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
1138 HYPRE_BigInt *num_old_cpts_global,
1139 int num_functions, int *dof_func, int debug_flag,
1140 double trunc_factor, int max_elmts,
1141 int *col_offd_S_to_A,
1142 hypre_ParCSRMatrix **P_ptr)
1143{
1144 /* Communication Variables */
1145 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1146 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1147
1148
1149 int my_id, num_procs;
1150
1151 /* Variables to store input variables */
1152 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1153 double *A_diag_data = hypre_CSRMatrixData(A_diag);
1154 int *A_diag_i = hypre_CSRMatrixI(A_diag);
1155 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
1156
1157 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1158 double *A_offd_data = hypre_CSRMatrixData(A_offd);
1159 int *A_offd_i = hypre_CSRMatrixI(A_offd);
1160 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
1161
1162 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
1163 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
1164 int n_fine = hypre_CSRMatrixNumRows(A_diag);
1165 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
1166 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
1167 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) local_numrows;
1168 HYPRE_BigInt total_global_cpts, my_first_cpt;
1169 HYPRE_BigInt total_old_global_cpts, my_first_old_cpt;
1170 int n_coarse, n_coarse_old;
1171
1172 /* Variables to store strong connection matrix info */
1173 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
1174 int *S_diag_i = hypre_CSRMatrixI(S_diag);
1175 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
1176
1177 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
1178 int *S_offd_i = hypre_CSRMatrixI(S_offd);
1179 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
1180
1181 /* Interpolation matrix P */
1182 hypre_ParCSRMatrix *P;
1183 hypre_CSRMatrix *P_diag;
1184 hypre_CSRMatrix *P_offd;
1185
1186 double *P_diag_data = NULL;
1187 int *P_diag_i, *P_diag_j = NULL;
1188 double *P_offd_data = NULL;
1189 int *P_offd_i, *P_offd_j = NULL;
1190
1191 HYPRE_BigInt *col_map_offd_P;
1192 int *tmp_map_offd;
1193 int P_diag_size;
1194 int P_offd_size;
1195 int *P_marker = NULL;
1196 int *P_marker_offd = NULL;
1197 int *CF_marker_offd = NULL;
1198 int *tmp_CF_marker_offd = NULL;
1199 int *dof_func_offd = NULL;
1200
1201 /* Full row information for columns of A that are off diag*/
1202 hypre_BigCSRMatrix *A_ext;
1203 double *A_ext_data;
1204 int *A_ext_i;
1205 HYPRE_BigInt *big_A_ext_j;
1206 int *A_ext_j;
1207
1208 int *fine_to_coarse = NULL;
1209 int *old_coarse_to_fine = NULL;
1210 HYPRE_BigInt *fine_to_coarse_offd = NULL;
1211 HYPRE_BigInt *found;
1212
1213 int num_cols_P_offd;
1214 int newoff, loc_col;
1215 int A_ext_rows, full_off_procNodes;
1216
1217 hypre_BigCSRMatrix *Sop;
1218 int *Sop_i;
1219 HYPRE_BigInt *big_Sop_j;
1220 int *Sop_j;
1221
1222 int Soprows, sgn;
1223
1224 /* Variables to keep count of interpolatory points */
1225 int jj_counter, jj_counter_offd;
1226 int jj_begin_row, jj_end_row;
1227 int jj_begin_row_offd = 0;
1228 int jj_end_row_offd = 0;
1229 int coarse_counter, coarse_counter_offd;
1230
1231 /* Interpolation weight variables */
1232 double sum, diagonal, distribute;
1233 int strong_f_marker = -2;
1234
1235 /* Loop variables */
1236 int index, ii, cnt, old_cnt;
1237 int start_indexing = 0;
1238 int i, i1, i2, jj, kk, k1, jj1;
1239
1240 /* Definitions */
1241 double zero = 0.0;
1242 double one = 1.0;
1243 double wall_time;
1244
1245
1246 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
1247
1248 if (debug_flag==4) wall_time = time_getWallclockSeconds();
1249
1250 /* BEGIN */
1251 MPI_Comm_size(comm, &num_procs);
1252 MPI_Comm_rank(comm,&my_id);
1253
1254#ifdef HYPRE_NO_GLOBAL_PARTITION
1255 my_first_cpt = num_cpts_global[0];
1256 my_first_old_cpt = num_old_cpts_global[0];
1257 n_coarse_old = (int)(num_old_cpts_global[1] - num_old_cpts_global[0]);
1258 n_coarse = (int)(num_cpts_global[1] - num_cpts_global[0]);
1259 if (my_id == (num_procs -1))
1260 {
1261 total_global_cpts = num_cpts_global[1];
1262 total_old_global_cpts = num_old_cpts_global[1];
1263 }
1264 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
1265 MPI_Bcast(&total_old_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
1266#else
1267 my_first_cpt = num_cpts_global[my_id];
1268 my_first_old_cpt = num_old_cpts_global[my_id];
1269 total_global_cpts = num_cpts_global[num_procs];
1270 total_old_global_cpts = num_old_cpts_global[num_procs];
1271 n_coarse_old = (int)(num_old_cpts_global[my_id+1] - num_old_cpts_global[my_id]);
1272 n_coarse = (int)(num_cpts_global[my_id+1] - num_cpts_global[my_id]);
1273#endif
1274
1275 if (!comm_pkg)
1276 {
1277#ifdef HYPRE_NO_GLOBAL_PARTITION
1278 hypre_NewCommPkgCreate(A);
1279#else
1280 hypre_MatvecCommPkgCreate(A);
1281#endif
1282 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1283 }
1284
1285 /* Set up off processor information (specifically for neighbors of
1286 * neighbors */
1287 newoff = 0;
1288 full_off_procNodes = 0;
1289 if (num_procs > 1)
1290 {
1291 /*----------------------------------------------------------------------
1292 * Get the off processors rows for A and S, associated with columns in
1293 * A_offd and S_offd.
1294 *---------------------------------------------------------------------*/
1295 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
1296 A_ext_i = hypre_BigCSRMatrixI(A_ext);
1297 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
1298 A_ext_data = hypre_BigCSRMatrixData(A_ext);
1299 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
1300
1301 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
1302 Sop_i = hypre_BigCSRMatrixI(Sop);
1303 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
1304 Soprows = hypre_BigCSRMatrixNumRows(Sop);
1305
1306 /* Find nodes that are neighbors of neighbors, not found in offd */
1307 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
1308 &A_ext_j,
1309 Soprows, col_map_offd, col_1, col_n,
1310 Sop_i, big_Sop_j, &Sop_j, CF_marker, comm_pkg);
1311
1312 hypre_BigCSRMatrixJ(A_ext) = NULL;
1313 hypre_BigCSRMatrixJ(Sop) = NULL;
1314
1315 if(newoff >= 0)
1316 full_off_procNodes = newoff + num_cols_A_offd;
1317 else
1318 return(1);
1319
1320 /* Possibly add new points and new processors to the comm_pkg, all
1321 * processors need new_comm_pkg */
1322
1323 /* AHB - create a new comm package just for extended info -
1324 this will work better with the assumed partition*/
1325 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
1326 &extend_comm_pkg);
1327
1328 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
1329
1330 if (num_functions > 1 && full_off_procNodes > 0)
1331 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
1332
1333 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
1334 full_off_procNodes, CF_marker_offd);
1335
1336 if(num_functions > 1)
1337 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
1338 full_off_procNodes, dof_func_offd);
1339 }
1340
1341
1342 /*-----------------------------------------------------------------------
1343 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
1344 *-----------------------------------------------------------------------*/
1345
1346 /*-----------------------------------------------------------------------
1347 * Intialize counters and allocate mapping vector.
1348 *-----------------------------------------------------------------------*/
1349 P_diag_i = hypre_CTAlloc(int, n_fine+1);
1350 P_offd_i = hypre_CTAlloc(int, n_fine+1);
1351
1352 if (n_coarse_old) old_coarse_to_fine = hypre_CTAlloc(int, n_coarse_old);
1353 if (n_fine)
1354 {
1355 fine_to_coarse = hypre_CTAlloc(int, n_fine);
1356 P_marker = hypre_CTAlloc(int, n_fine);
1357 }
1358
1359 if (full_off_procNodes)
1360 {
1361 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
1362 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
1363 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
1364 }
1365
1366 for (i=0; i < full_off_procNodes; i++)
1367 {
1368 P_marker_offd[i] = -1;
1369 tmp_CF_marker_offd[i] = -1;
1370 fine_to_coarse_offd[i] = -1;
1371 }
1372
1373 for (i=0; i < n_fine; i++)
1374 {
1375 P_marker[i] = -1;
1376 fine_to_coarse[i] = -1;
1377 }
1378
1379 jj_counter = start_indexing;
1380 jj_counter_offd = start_indexing;
1381 coarse_counter = 0;
1382 coarse_counter_offd = 0;
1383
1384 cnt = 0;
1385 old_cnt = 0;
1386 for (i = 0; i < n_fine; i++)
1387 {
1388 fine_to_coarse[i] = -1;
1389 if (CF_marker[i] == 1)
1390 {
1391 fine_to_coarse[i] = cnt++;
1392 old_coarse_to_fine[old_cnt++] = i;
1393 }
1394 else if (CF_marker[i] == -2)
1395 {
1396 old_coarse_to_fine[old_cnt++] = i;
1397 }
1398 }
1399
1400 /*-----------------------------------------------------------------------
1401 * Loop over fine grid.
1402 *-----------------------------------------------------------------------*/
1403 for (ii = 0; ii < n_coarse_old; ii++)
1404 {
1405 P_diag_i[ii] = jj_counter;
1406 if (num_procs > 1)
1407 P_offd_i[ii] = jj_counter_offd;
1408
1409 i = old_coarse_to_fine[ii];
1410 if (CF_marker[i] >= 0)
1411 {
1412 jj_counter++;
1413 coarse_counter++;
1414 }
1415
1416 /*--------------------------------------------------------------------
1417 * If i is an F-point, interpolation is from the C-points that
1418 * strongly influence i, or C-points that stronly influence F-points
1419 * that strongly influence i.
1420 *--------------------------------------------------------------------*/
1421 else if (CF_marker[i] == -2)
1422 {
1423 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
1424 {
1425 i1 = S_diag_j[jj];
1426 if (CF_marker[i1] > 0)
1427 { /* i1 is a C point */
1428 if (P_marker[i1] < P_diag_i[ii])
1429 {
1430 P_marker[i1] = jj_counter;
1431 jj_counter++;
1432 }
1433 }
1434 else if (CF_marker[i1] != -3)
1435 { /* i1 is a F point, loop through it's strong neighbors */
1436 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
1437 {
1438 k1 = S_diag_j[kk];
1439 if (CF_marker[k1] > 0)
1440 {
1441 if(P_marker[k1] < P_diag_i[ii])
1442 {
1443 P_marker[k1] = jj_counter;
1444 jj_counter++;
1445 }
1446 }
1447 }
1448 if(num_procs > 1)
1449 {
1450 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
1451 {
1452 if(col_offd_S_to_A)
1453 k1 = col_offd_S_to_A[S_offd_j[kk]];
1454 else
1455 k1 = S_offd_j[kk];
1456 if (CF_marker_offd[k1] > 0)
1457 {
1458 if(P_marker_offd[k1] < P_offd_i[ii])
1459 {
1460 tmp_CF_marker_offd[k1] = 1;
1461 P_marker_offd[k1] = jj_counter_offd;
1462 jj_counter_offd++;
1463 }
1464 }
1465 }
1466 }
1467 }
1468 }
1469 /* Look at off diag strong connections of i */
1470 if (num_procs > 1)
1471 {
1472 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1473 {
1474 i1 = S_offd_j[jj];
1475 if(col_offd_S_to_A)
1476 i1 = col_offd_S_to_A[i1];
1477 if (CF_marker_offd[i1] > 0)
1478 {
1479 if(P_marker_offd[i1] < P_offd_i[ii])
1480 {
1481 tmp_CF_marker_offd[i1] = 1;
1482 P_marker_offd[i1] = jj_counter_offd;
1483 jj_counter_offd++;
1484 }
1485 }
1486 else if (CF_marker_offd[i1] != -3)
1487 { /* F point; look at neighbors of i1. Sop contains global col
1488 * numbers and entries that could be in S_diag or S_offd or
1489 * neither. */
1490 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
1491 {
1492 loc_col = Sop_j[kk];
1493 if(loc_col > -1)
1494 { /* In S_diag */
1495 if(CF_marker[loc_col] > 0)
1496 {
1497 if(P_marker[loc_col] < P_diag_i[ii])
1498 {
1499 P_marker[loc_col] = jj_counter;
1500 jj_counter++;
1501 }
1502 }
1503 }
1504 else
1505 {
1506 loc_col = -loc_col - 1;
1507 if(CF_marker_offd[loc_col] > 0)
1508 {
1509 if(P_marker_offd[loc_col] < P_offd_i[ii])
1510 {
1511 P_marker_offd[loc_col] = jj_counter_offd;
1512 tmp_CF_marker_offd[loc_col] = 1;
1513 jj_counter_offd++;
1514 }
1515 }
1516 }
1517 }
1518 }
1519 }
1520 }
1521 }
1522 }
1523
1524 if (debug_flag==4)
1525 {
1526 wall_time = time_getWallclockSeconds() - wall_time;
1527 printf("Proc = %d determine structure %f\n",
1528 my_id, wall_time);
1529 fflush(NULL);
1530 }
1531 /*-----------------------------------------------------------------------
1532 * Allocate arrays.
1533 *-----------------------------------------------------------------------*/
1534
1535 if (debug_flag== 4) wall_time = time_getWallclockSeconds();
1536
1537 P_diag_size = jj_counter;
1538 P_offd_size = jj_counter_offd;
1539
1540 if (P_diag_size)
1541 {
1542 P_diag_j = hypre_CTAlloc(int, P_diag_size);
1543 P_diag_data = hypre_CTAlloc(double, P_diag_size);
1544 }
1545
1546 if (P_offd_size)
1547 {
1548 P_offd_j = hypre_CTAlloc(int, P_offd_size);
1549 P_offd_data = hypre_CTAlloc(double, P_offd_size);
1550 }
1551
1552 P_diag_i[n_coarse_old] = jj_counter;
1553 P_offd_i[n_coarse_old] = jj_counter_offd;
1554
1555 jj_counter = start_indexing;
1556 jj_counter_offd = start_indexing;
1557
1558 /* Fine to coarse mapping */
1559 if(num_procs > 1)
1560 {
1561 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
1562 full_off_procNodes, my_first_cpt,
1563 fine_to_coarse_offd);
1564 }
1565
1566 for (i = 0; i < n_fine; i++)
1567 P_marker[i] = -1;
1568
1569 for (i = 0; i < full_off_procNodes; i++)
1570 P_marker_offd[i] = -1;
1571
1572 /*-----------------------------------------------------------------------
1573 * Loop over fine grid points.
1574 *-----------------------------------------------------------------------*/
1575 for (ii = 0; ii < n_coarse_old; ii++)
1576 {
1577 jj_begin_row = jj_counter;
1578 jj_begin_row_offd = jj_counter_offd;
1579 i = old_coarse_to_fine[ii];
1580
1581 /*--------------------------------------------------------------------
1582 * If i is a c-point, interpolation is the identity.
1583 *--------------------------------------------------------------------*/
1584
1585 if (CF_marker[i] > 0)
1586 {
1587 P_diag_j[jj_counter] = fine_to_coarse[i];
1588 P_diag_data[jj_counter] = one;
1589 jj_counter++;
1590 }
1591
1592 /*--------------------------------------------------------------------
1593 * If i is an F-point, build interpolation.
1594 *--------------------------------------------------------------------*/
1595
1596 else if (CF_marker[i] == -2)
1597 {
1598 strong_f_marker--;
1599 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
1600 {
1601 i1 = S_diag_j[jj];
1602
1603 /*--------------------------------------------------------------
1604 * If neighbor i1 is a C-point, set column number in P_diag_j
1605 * and initialize interpolation weight to zero.
1606 *--------------------------------------------------------------*/
1607
1608 if (CF_marker[i1] > 0)
1609 {
1610 if (P_marker[i1] < jj_begin_row)
1611 {
1612 P_marker[i1] = jj_counter;
1613 P_diag_j[jj_counter] = fine_to_coarse[i1];
1614 P_diag_data[jj_counter] = zero;
1615 jj_counter++;
1616 }
1617 }
1618 else if (CF_marker[i1] != -3)
1619 {
1620 P_marker[i1] = strong_f_marker;
1621 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
1622 {
1623 k1 = S_diag_j[kk];
1624 if (CF_marker[k1] > 0)
1625 {
1626 if(P_marker[k1] < jj_begin_row)
1627 {
1628 P_marker[k1] = jj_counter;
1629 P_diag_j[jj_counter] = fine_to_coarse[k1];
1630 P_diag_data[jj_counter] = zero;
1631 jj_counter++;
1632 }
1633 }
1634 }
1635 if(num_procs > 1)
1636 {
1637 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
1638 {
1639 if(col_offd_S_to_A)
1640 k1 = col_offd_S_to_A[S_offd_j[kk]];
1641 else
1642 k1 = S_offd_j[kk];
1643 if(CF_marker_offd[k1] > 0)
1644 {
1645 if(P_marker_offd[k1] < jj_begin_row_offd)
1646 {
1647 P_marker_offd[k1] = jj_counter_offd;
1648 P_offd_j[jj_counter_offd] = k1;
1649 P_offd_data[jj_counter_offd] = zero;
1650 jj_counter_offd++;
1651 }
1652 }
1653 }
1654 }
1655 }
1656 }
1657
1658 if ( num_procs > 1)
1659 {
1660 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
1661 {
1662 i1 = S_offd_j[jj];
1663 if(col_offd_S_to_A)
1664 i1 = col_offd_S_to_A[i1];
1665 if ( CF_marker_offd[i1] > 0)
1666 {
1667 if(P_marker_offd[i1] < jj_begin_row_offd)
1668 {
1669 P_marker_offd[i1] = jj_counter_offd;
1670 P_offd_j[jj_counter_offd] = i1;
1671 P_offd_data[jj_counter_offd] = zero;
1672 jj_counter_offd++;
1673 }
1674 }
1675 else if (CF_marker_offd[i1] != -3)
1676 {
1677 P_marker_offd[i1] = strong_f_marker;
1678 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
1679 {
1680 loc_col = Sop_j[kk];
1681 if(loc_col > -1)
1682 {
1683 if(CF_marker[loc_col] > 0)
1684 {
1685 if(P_marker[loc_col] < jj_begin_row)
1686 {
1687 P_marker[loc_col] = jj_counter;
1688 P_diag_j[jj_counter] = fine_to_coarse[loc_col];
1689 P_diag_data[jj_counter] = zero;
1690 jj_counter++;
1691 }
1692 }
1693 }
1694 else
1695 {
1696 loc_col = - loc_col - 1;
1697 if(CF_marker_offd[loc_col] > 0)
1698 {
1699 if(P_marker_offd[loc_col] < jj_begin_row_offd)
1700 {
1701 P_marker_offd[loc_col] = jj_counter_offd;
1702 P_offd_j[jj_counter_offd]=loc_col;
1703 P_offd_data[jj_counter_offd] = zero;
1704 jj_counter_offd++;
1705 }
1706 }
1707 }
1708 }
1709 }
1710 }
1711 }
1712
1713 jj_end_row = jj_counter;
1714 jj_end_row_offd = jj_counter_offd;
1715
1716 diagonal = A_diag_data[A_diag_i[i]];
1717
1718 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
1719 { /* i1 is a c-point and strongly influences i, accumulate
1720 * a_(i,i1) into interpolation weight */
1721 i1 = A_diag_j[jj];
1722 if (P_marker[i1] >= jj_begin_row)
1723 {
1724 P_diag_data[P_marker[i1]] += A_diag_data[jj];
1725 }
1726 else if(P_marker[i1] == strong_f_marker)
1727 {
1728 sum = zero;
1729 sgn = 1;
1730 if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
1731 /* Loop over row of A for point i1 and calculate the sum
1732 * of the connections to c-points that strongly incluence i. */
1733 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
1734 {
1735 i2 = A_diag_j[jj1];
1736 if((P_marker[i2] >= jj_begin_row || i2 == i) && (sgn*A_diag_data[jj1]) < 0)
1737 sum += A_diag_data[jj1];
1738 }
1739 if(num_procs > 1)
1740 {
1741 for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++)
1742 {
1743 i2 = A_offd_j[jj1];
1744 if(P_marker_offd[i2] >= jj_begin_row_offd &&
1745 (sgn*A_offd_data[jj1]) < 0)
1746 sum += A_offd_data[jj1];
1747 }
1748 }
1749 if(sum != 0)
1750 {
1751 distribute = A_diag_data[jj]/sum;
1752 /* Loop over row of A for point i1 and do the distribution */
1753 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
1754 {
1755 i2 = A_diag_j[jj1];
1756 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
1757 P_diag_data[P_marker[i2]] +=
1758 distribute*A_diag_data[jj1];
1759 if(i2 == i && (sgn*A_diag_data[jj1]) < 0)
1760 diagonal += distribute*A_diag_data[jj1];
1761 }
1762 if(num_procs > 1)
1763 {
1764 for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
1765 {
1766 i2 = A_offd_j[jj1];
1767 if(P_marker_offd[i2] >= jj_begin_row_offd &&
1768 (sgn*A_offd_data[jj1]) < 0)
1769 P_offd_data[P_marker_offd[i2]] +=
1770 distribute*A_offd_data[jj1];
1771 }
1772 }
1773 }
1774 else
1775 {
1776 diagonal += A_diag_data[jj];
1777 }
1778 }
1779 /* neighbor i1 weakly influences i, accumulate a_(i,i1) into
1780 * diagonal */
1781 else if (CF_marker[i1] != -3)
1782 {
1783 if(num_functions == 1 || dof_func[i] == dof_func[i1])
1784 diagonal += A_diag_data[jj];
1785 }
1786 }
1787 if(num_procs > 1)
1788 {
1789 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
1790 {
1791 i1 = A_offd_j[jj];
1792 if(P_marker_offd[i1] >= jj_begin_row_offd)
1793 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
1794 else if(P_marker_offd[i1] == strong_f_marker)
1795 {
1796 sum = zero;
1797 sgn = 1;
1798 if(A_ext_data[A_ext_i[i1]] < 0) sgn = -1;
1799 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
1800 {
1801 loc_col = A_ext_j[jj1];
1802 if(loc_col > -1)
1803 { /* diag */
1804 if((P_marker[loc_col] >= jj_begin_row || loc_col == i)
1805 && (sgn*A_ext_data[jj1]) < 0)
1806 sum += A_ext_data[jj1];
1807 }
1808 else
1809 {
1810 loc_col = - loc_col - 1;
1811 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
1812 (sgn*A_ext_data[jj1]) < 0)
1813 sum += A_ext_data[jj1];
1814 }
1815 }
1816 if(sum != 0)
1817 {
1818 distribute = A_offd_data[jj] / sum;
1819 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
1820 {
1821 loc_col = A_ext_j[jj1];
1822 if(loc_col > -1)
1823 { /* diag */
1824 if(P_marker[loc_col] >= jj_begin_row &&
1825 (sgn*A_ext_data[jj1]) < 0)
1826 P_diag_data[P_marker[loc_col]] += distribute*
1827 A_ext_data[jj1];
1828 if(loc_col == i && (sgn*A_ext_data[jj1]) < 0)
1829 diagonal += distribute*A_ext_data[jj1];
1830 }
1831 else
1832 {
1833 loc_col = - loc_col - 1;
1834 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
1835 (sgn*A_ext_data[jj1]) < 0)
1836 P_offd_data[P_marker_offd[loc_col]] += distribute*
1837 A_ext_data[jj1];
1838 }
1839 }
1840 }
1841 else
1842 {
1843 diagonal += A_offd_data[jj];
1844 }
1845 }
1846 else if (CF_marker_offd[i1] != -3)
1847 {
1848 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
1849 diagonal += A_offd_data[jj];
1850 }
1851 }
1852 }
1853 for(jj = jj_begin_row; jj < jj_end_row; jj++)
1854 P_diag_data[jj] /= -diagonal;
1855 for(jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
1856 P_offd_data[jj] /= -diagonal;
1857 }
1858 strong_f_marker--;
1859 }
1860
1861 if (debug_flag==4)
1862 {
1863 wall_time = time_getWallclockSeconds() - wall_time;
1864 printf("Proc = %d fill structure %f\n",
1865 my_id, wall_time);
1866 fflush(NULL);
1867 }
1868 /*-----------------------------------------------------------------------
1869 * Allocate arrays.
1870 *-----------------------------------------------------------------------*/
1871
1872 P = hypre_ParCSRMatrixCreate(comm,
1873 total_old_global_cpts,
1874 total_global_cpts,
1875 num_old_cpts_global,
1876 num_cpts_global,
1877 0,
1878 P_diag_i[n_coarse_old],
1879 P_offd_i[n_coarse_old]);
1880
1881 P_diag = hypre_ParCSRMatrixDiag(P);
1882 hypre_CSRMatrixData(P_diag) = P_diag_data;
1883 hypre_CSRMatrixI(P_diag) = P_diag_i;
1884 hypre_CSRMatrixJ(P_diag) = P_diag_j;
1885 P_offd = hypre_ParCSRMatrixOffd(P);
1886 hypre_CSRMatrixData(P_offd) = P_offd_data;
1887 hypre_CSRMatrixI(P_offd) = P_offd_i;
1888 hypre_CSRMatrixJ(P_offd) = P_offd_j;
1889 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
1890
1891 /* Compress P, removing coefficients smaller than trunc_factor * Max */
1892 if (trunc_factor != 0.0 || max_elmts > 0)
1893 {
1894 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
1895 P_diag_data = hypre_CSRMatrixData(P_diag);
1896 P_diag_i = hypre_CSRMatrixI(P_diag);
1897 P_diag_j = hypre_CSRMatrixJ(P_diag);
1898 P_offd_data = hypre_CSRMatrixData(P_offd);
1899 P_offd_i = hypre_CSRMatrixI(P_offd);
1900 P_offd_j = hypre_CSRMatrixJ(P_offd);
1901 P_diag_size = P_diag_i[n_coarse_old];
1902 P_offd_size = P_offd_i[n_coarse_old];
1903 }
1904
1905 /* This builds col_map, col_map should be monotone increasing and contain
1906 * global numbers. */
1907 num_cols_P_offd = 0;
1908 if(P_offd_size)
1909 {
1910 num_cols_P_offd = 0;
1911 for (i=0; i < P_offd_size; i++)
1912 {
1913 index = P_offd_j[i];
1914 if (tmp_CF_marker_offd[index] == 1)
1915 {
1916 num_cols_P_offd++;
1917 tmp_CF_marker_offd[index] = 2;
1918 }
1919 }
1920
1921 if (num_cols_P_offd)
1922 {
1923 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
1924 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
1925 }
1926
1927 index = 0;
1928 for(i = 0; i < num_cols_P_offd; i++)
1929 {
1930 while( tmp_CF_marker_offd[index] != 2) index++;
1931 /*printf("index %d tmp_CF %d\n", index, tmp_CF_marker_offd[index]);*/
1932 col_map_offd_P[i] = fine_to_coarse_offd[index];
1933 tmp_map_offd[i] = index++;
1934 }
1935
1936 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
1937
1938 for (i = 0; i < num_cols_P_offd; i++)
1939 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
1940
1941 for(i = 0; i < P_offd_size; i++)
1942 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
1943 }
1944
1945 if (num_cols_P_offd)
1946 {
1947 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
1948 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
1949 hypre_TFree(tmp_map_offd);
1950 }
1951
1952#ifdef HYPRE_NO_GLOBAL_PARTITION
1953 hypre_NewCommPkgCreate(P);
1954#else
1955 hypre_MatvecCommPkgCreate(P);
1956#endif
1957
1958 for (i=0; i < n_fine; i++)
1959 if (CF_marker[i] < -1) CF_marker[i] = -1;
1960
1961 *P_ptr = P;
1962
1963 /* Deallocate memory */
1964 hypre_TFree(fine_to_coarse);
1965 hypre_TFree(old_coarse_to_fine);
1966 hypre_TFree(P_marker);
1967
1968 if (num_procs > 1)
1969 {
1970 hypre_BigCSRMatrixDestroy(Sop);
1971 hypre_BigCSRMatrixDestroy(A_ext);
1972 hypre_TFree(fine_to_coarse_offd);
1973 hypre_TFree(P_marker_offd);
1974 hypre_TFree(CF_marker_offd);
1975 hypre_TFree(tmp_CF_marker_offd);
1976 if(num_functions > 1)
1977 hypre_TFree(dof_func_offd);
1978 hypre_TFree(found);
1979
1980
1981 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
1982
1983
1984 }
1985
1986 return hypre_error_flag;
1987}
1988
1989/*---------------------------------------------------------------------------
1990 * hypre_BoomerAMGBuildPartialExtInterp
1991 * Comment:
1992 *--------------------------------------------------------------------------*/
1993int
1994hypre_BoomerAMGBuildPartialExtInterp(hypre_ParCSRMatrix *A, int *CF_marker,
1995 hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
1996 HYPRE_BigInt *num_old_cpts_global,
1997 int num_functions, int *dof_func, int debug_flag,
1998 double trunc_factor, int max_elmts,
1999 int *col_offd_S_to_A,
2000 hypre_ParCSRMatrix **P_ptr)
2001{
2002 /* Communication Variables */
2003 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
2004 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2005
2006
2007 int my_id, num_procs;
2008
2009 /* Variables to store input variables */
2010 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2011 double *A_diag_data = hypre_CSRMatrixData(A_diag);
2012 int *A_diag_i = hypre_CSRMatrixI(A_diag);
2013 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2014
2015 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2016 double *A_offd_data = hypre_CSRMatrixData(A_offd);
2017 int *A_offd_i = hypre_CSRMatrixI(A_offd);
2018 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2019
2020 int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
2021 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
2022 int n_fine = hypre_CSRMatrixNumRows(A_diag);
2023 HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstRowIndex(A);
2024 int local_numrows = hypre_CSRMatrixNumRows(A_diag);
2025 HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) local_numrows;
2026 HYPRE_BigInt total_global_cpts, my_first_cpt;
2027 HYPRE_BigInt total_old_global_cpts, my_first_old_cpt;
2028 int n_coarse, n_coarse_old;
2029
2030 /* Variables to store strong connection matrix info */
2031 hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
2032 int *S_diag_i = hypre_CSRMatrixI(S_diag);
2033 int *S_diag_j = hypre_CSRMatrixJ(S_diag);
2034
2035 hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
2036 int *S_offd_i = hypre_CSRMatrixI(S_offd);
2037 int *S_offd_j = hypre_CSRMatrixJ(S_offd);
2038
2039 /* Interpolation matrix P */
2040 hypre_ParCSRMatrix *P;
2041 hypre_CSRMatrix *P_diag;
2042 hypre_CSRMatrix *P_offd;
2043
2044 double *P_diag_data = NULL;
2045 int *P_diag_i, *P_diag_j = NULL;
2046 double *P_offd_data = NULL;
2047 int *P_offd_i, *P_offd_j = NULL;
2048
2049 HYPRE_BigInt *col_map_offd_P;
2050 int *tmp_map_offd;
2051 int P_diag_size;
2052 int P_offd_size;
2053 int *P_marker = NULL;
2054 int *P_marker_offd = NULL;
2055 int *CF_marker_offd = NULL;
2056 int *tmp_CF_marker_offd = NULL;
2057 int *dof_func_offd = NULL;
2058
2059 /* Full row information for columns of A that are off diag*/
2060 hypre_BigCSRMatrix *A_ext;
2061 double *A_ext_data;
2062 int *A_ext_i;
2063 HYPRE_BigInt *big_A_ext_j;
2064 int *A_ext_j;
2065
2066 int *fine_to_coarse = NULL;
2067 int *old_coarse_to_fine = NULL;
2068 HYPRE_BigInt *fine_to_coarse_offd = NULL;
2069 HYPRE_BigInt *found;
2070
2071 int num_cols_P_offd;
2072 int newoff, loc_col;
2073 int A_ext_rows, full_off_procNodes;
2074
2075 hypre_BigCSRMatrix *Sop;
2076 int *Sop_i;
2077 HYPRE_BigInt *big_Sop_j;
2078 int *Sop_j;
2079
2080 int Soprows, sgn;
2081
2082 /* Variables to keep count of interpolatory points */
2083 int jj_counter, jj_counter_offd;
2084 int jj_begin_row, jj_end_row;
2085 int jj_begin_row_offd = 0;
2086 int jj_end_row_offd = 0;
2087 int coarse_counter, coarse_counter_offd;
2088
2089 /* Interpolation weight variables */
2090 double sum, diagonal, distribute;
2091 int strong_f_marker = -2;
2092
2093 /* Loop variables */
2094 int index, ii, cnt, old_cnt;
2095 int start_indexing = 0;
2096 int i, i1, i2, jj, kk, k1, jj1;
2097
2098 /* Definitions */
2099 double zero = 0.0;
2100 double one = 1.0;
2101 double wall_time;
2102
2103
2104 hypre_ParCSRCommPkg *extend_comm_pkg = NULL;
2105
2106 if (debug_flag==4) wall_time = time_getWallclockSeconds();
2107
2108 /* BEGIN */
2109 MPI_Comm_size(comm, &num_procs);
2110 MPI_Comm_rank(comm,&my_id);
2111
2112#ifdef HYPRE_NO_GLOBAL_PARTITION
2113 my_first_cpt = num_cpts_global[0];
2114 my_first_old_cpt = num_old_cpts_global[0];
2115 n_coarse_old = (int)(num_old_cpts_global[1] - num_old_cpts_global[0]);
2116 n_coarse = (int)(num_cpts_global[1] - num_cpts_global[0]);
2117 if (my_id == (num_procs -1))
2118 {
2119 total_global_cpts = num_cpts_global[1];
2120 total_old_global_cpts = num_old_cpts_global[1];
2121 }
2122 MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
2123 MPI_Bcast(&total_old_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
2124#else
2125 my_first_cpt = num_cpts_global[my_id];
2126 my_first_old_cpt = num_old_cpts_global[my_id];
2127 total_global_cpts = num_cpts_global[num_procs];
2128 total_old_global_cpts = num_old_cpts_global[num_procs];
2129 n_coarse_old = (int)(num_old_cpts_global[my_id+1] - num_old_cpts_global[my_id]);
2130 n_coarse = (int)(num_cpts_global[my_id+1] - num_cpts_global[my_id]);
2131#endif
2132
2133 if (!comm_pkg)
2134 {
2135#ifdef HYPRE_NO_GLOBAL_PARTITION
2136 hypre_NewCommPkgCreate(A);
2137#else
2138 hypre_MatvecCommPkgCreate(A);
2139#endif
2140 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2141 }
2142
2143 /* Set up off processor information (specifically for neighbors of
2144 * neighbors */
2145 newoff = 0;
2146 full_off_procNodes = 0;
2147 if (num_procs > 1)
2148 {
2149 /*----------------------------------------------------------------------
2150 * Get the off processors rows for A and S, associated with columns in
2151 * A_offd and S_offd.
2152 *---------------------------------------------------------------------*/
2153 A_ext = hypre_ParCSRMatrixExtractBigExt(A,A,1);
2154 A_ext_i = hypre_BigCSRMatrixI(A_ext);
2155 big_A_ext_j = hypre_BigCSRMatrixJ(A_ext);
2156 A_ext_data = hypre_BigCSRMatrixData(A_ext);
2157 A_ext_rows = hypre_BigCSRMatrixNumRows(A_ext);
2158
2159 Sop = hypre_ParCSRMatrixExtractBigExt(S,A,0);
2160 Sop_i = hypre_BigCSRMatrixI(Sop);
2161 big_Sop_j = hypre_BigCSRMatrixJ(Sop);
2162 Soprows = hypre_BigCSRMatrixNumRows(Sop);
2163
2164 /* Find nodes that are neighbors of neighbors, not found in offd */
2165 newoff = new_offd_nodes(&found, A_ext_rows, A_ext_i, big_A_ext_j,
2166 &A_ext_j,
2167 Soprows, col_map_offd, col_1, col_n,
2168 Sop_i, big_Sop_j, &Sop_j, CF_marker, comm_pkg);
2169
2170 hypre_BigCSRMatrixJ(A_ext) = NULL;
2171 hypre_BigCSRMatrixJ(Sop) = NULL;
2172
2173 if(newoff >= 0)
2174 full_off_procNodes = newoff + num_cols_A_offd;
2175 else
2176 return(1);
2177
2178 /* Possibly add new points and new processors to the comm_pkg, all
2179 * processors need new_comm_pkg */
2180
2181 /* AHB - create a new comm package just for extended info -
2182 this will work better with the assumed partition*/
2183 hypre_ParCSRFindExtendCommPkg(A, newoff, found,
2184 &extend_comm_pkg);
2185
2186 CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
2187
2188 if (num_functions > 1 && full_off_procNodes > 0)
2189 dof_func_offd = hypre_CTAlloc(int, full_off_procNodes);
2190
2191 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, CF_marker,
2192 full_off_procNodes, CF_marker_offd);
2193
2194 if(num_functions > 1)
2195 alt_insert_new_nodes(comm_pkg, extend_comm_pkg, dof_func,
2196 full_off_procNodes, dof_func_offd);
2197 }
2198
2199
2200 /*-----------------------------------------------------------------------
2201 * First Pass: Determine size of P and fill in fine_to_coarse mapping.
2202 *-----------------------------------------------------------------------*/
2203
2204 /*-----------------------------------------------------------------------
2205 * Intialize counters and allocate mapping vector.
2206 *-----------------------------------------------------------------------*/
2207 P_diag_i = hypre_CTAlloc(int, n_fine+1);
2208 P_offd_i = hypre_CTAlloc(int, n_fine+1);
2209
2210 if (n_coarse_old) old_coarse_to_fine = hypre_CTAlloc(int, n_coarse_old);
2211 if (n_fine)
2212 {
2213 fine_to_coarse = hypre_CTAlloc(int, n_fine);
2214 P_marker = hypre_CTAlloc(int, n_fine);
2215 }
2216
2217 if (full_off_procNodes)
2218 {
2219 P_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
2220 fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, full_off_procNodes);
2221 tmp_CF_marker_offd = hypre_CTAlloc(int, full_off_procNodes);
2222 }
2223
2224 for (i=0; i < full_off_procNodes; i++)
2225 {
2226 P_marker_offd[i] = -1;
2227 tmp_CF_marker_offd[i] = -1;
2228 fine_to_coarse_offd[i] = -1;
2229 }
2230
2231 for (i=0; i < n_fine; i++)
2232 {
2233 P_marker[i] = -1;
2234 fine_to_coarse[i] = -1;
2235 }
2236
2237 jj_counter = start_indexing;
2238 jj_counter_offd = start_indexing;
2239 coarse_counter = 0;
2240 coarse_counter_offd = 0;
2241
2242 cnt = 0;
2243 old_cnt = 0;
2244 for (i = 0; i < n_fine; i++)
2245 {
2246 fine_to_coarse[i] = -1;
2247 if (CF_marker[i] == 1)
2248 {
2249 fine_to_coarse[i] = cnt++;
2250 old_coarse_to_fine[old_cnt++] = i;
2251 }
2252 else if (CF_marker[i] == -2)
2253 {
2254 old_coarse_to_fine[old_cnt++] = i;
2255 }
2256 }
2257
2258 /*-----------------------------------------------------------------------
2259 * Loop over fine grid.
2260 *-----------------------------------------------------------------------*/
2261 for (ii = 0; ii < n_coarse_old; ii++)
2262 {
2263 P_diag_i[ii] = jj_counter;
2264 if (num_procs > 1)
2265 P_offd_i[ii] = jj_counter_offd;
2266
2267 i = old_coarse_to_fine[ii];
2268 if (CF_marker[i] >= 0)
2269 {
2270 jj_counter++;
2271 coarse_counter++;
2272 }
2273
2274 /*--------------------------------------------------------------------
2275 * If i is an F-point, interpolation is from the C-points that
2276 * strongly influence i, or C-points that stronly influence F-points
2277 * that strongly influence i.
2278 *--------------------------------------------------------------------*/
2279 else if (CF_marker[i] == -2)
2280 {
2281 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2282 {
2283 i1 = S_diag_j[jj];
2284 if (CF_marker[i1] > 0)
2285 { /* i1 is a C point */
2286 if (P_marker[i1] < P_diag_i[ii])
2287 {
2288 P_marker[i1] = jj_counter;
2289 jj_counter++;
2290 }
2291 }
2292 else if (CF_marker[i1] != -3)
2293 { /* i1 is a F point, loop through it's strong neighbors */
2294 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
2295 {
2296 k1 = S_diag_j[kk];
2297 if (CF_marker[k1] > 0)
2298 {
2299 if(P_marker[k1] < P_diag_i[ii])
2300 {
2301 P_marker[k1] = jj_counter;
2302 jj_counter++;
2303 }
2304 }
2305 }
2306 if(num_procs > 1)
2307 {
2308 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
2309 {
2310 if(col_offd_S_to_A)
2311 k1 = col_offd_S_to_A[S_offd_j[kk]];
2312 else
2313 k1 = S_offd_j[kk];
2314 if (CF_marker_offd[k1] > 0)
2315 {
2316 if(P_marker_offd[k1] < P_offd_i[ii])
2317 {
2318 tmp_CF_marker_offd[k1] = 1;
2319 P_marker_offd[k1] = jj_counter_offd;
2320 jj_counter_offd++;
2321 }
2322 }
2323 }
2324 }
2325 }
2326 }
2327 /* Look at off diag strong connections of i */
2328 if (num_procs > 1)
2329 {
2330 for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2331 {
2332 i1 = S_offd_j[jj];
2333 if(col_offd_S_to_A)
2334 i1 = col_offd_S_to_A[i1];
2335 if (CF_marker_offd[i1] > 0)
2336 {
2337 if(P_marker_offd[i1] < P_offd_i[ii])
2338 {
2339 tmp_CF_marker_offd[i1] = 1;
2340 P_marker_offd[i1] = jj_counter_offd;
2341 jj_counter_offd++;
2342 }
2343 }
2344 else if (CF_marker_offd[i1] != -3)
2345 { /* F point; look at neighbors of i1. Sop contains global col
2346 * numbers and entries that could be in S_diag or S_offd or
2347 * neither. */
2348 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
2349 {
2350 loc_col = Sop_j[kk];
2351 if(loc_col > -1)
2352 { /* In S_diag */
2353 if(CF_marker[loc_col] > 0)
2354 {
2355 if(P_marker[loc_col] < P_diag_i[ii])
2356 {
2357 P_marker[loc_col] = jj_counter;
2358 jj_counter++;
2359 }
2360 }
2361 }
2362 else
2363 {
2364 loc_col = -loc_col - 1;
2365 if(CF_marker_offd[loc_col] > 0)
2366 {
2367 if(P_marker_offd[loc_col] < P_offd_i[ii])
2368 {
2369 P_marker_offd[loc_col] = jj_counter_offd;
2370 tmp_CF_marker_offd[loc_col] = 1;
2371 jj_counter_offd++;
2372 }
2373 }
2374 }
2375 }
2376 }
2377 }
2378 }
2379 }
2380 }
2381
2382 if (debug_flag==4)
2383 {
2384 wall_time = time_getWallclockSeconds() - wall_time;
2385 printf("Proc = %d determine structure %f\n",
2386 my_id, wall_time);
2387 fflush(NULL);
2388 }
2389 /*-----------------------------------------------------------------------
2390 * Allocate arrays.
2391 *-----------------------------------------------------------------------*/
2392
2393 if (debug_flag== 4) wall_time = time_getWallclockSeconds();
2394
2395 P_diag_size = jj_counter;
2396 P_offd_size = jj_counter_offd;
2397
2398 if (P_diag_size)
2399 {
2400 P_diag_j = hypre_CTAlloc(int, P_diag_size);
2401 P_diag_data = hypre_CTAlloc(double, P_diag_size);
2402 }
2403
2404 if (P_offd_size)
2405 {
2406 P_offd_j = hypre_CTAlloc(int, P_offd_size);
2407 P_offd_data = hypre_CTAlloc(double, P_offd_size);
2408 }
2409
2410 P_diag_i[n_coarse_old] = jj_counter;
2411 P_offd_i[n_coarse_old] = jj_counter_offd;
2412
2413 jj_counter = start_indexing;
2414 jj_counter_offd = start_indexing;
2415
2416 /* Fine to coarse mapping */
2417 if(num_procs > 1)
2418 {
2419 big_insert_new_nodes(comm_pkg, extend_comm_pkg, fine_to_coarse,
2420 full_off_procNodes, my_first_cpt,
2421 fine_to_coarse_offd);
2422 }
2423
2424 for (i = 0; i < n_fine; i++)
2425 P_marker[i] = -1;
2426
2427 for (i = 0; i < full_off_procNodes; i++)
2428 P_marker_offd[i] = -1;
2429
2430 /*-----------------------------------------------------------------------
2431 * Loop over fine grid points.
2432 *-----------------------------------------------------------------------*/
2433 for (ii = 0; ii < n_coarse_old; ii++)
2434 {
2435 jj_begin_row = jj_counter;
2436 jj_begin_row_offd = jj_counter_offd;
2437 i = old_coarse_to_fine[ii];
2438
2439 /*--------------------------------------------------------------------
2440 * If i is a c-point, interpolation is the identity.
2441 *--------------------------------------------------------------------*/
2442
2443 if (CF_marker[i] > 0)
2444 {
2445 P_diag_j[jj_counter] = fine_to_coarse[i];
2446 P_diag_data[jj_counter] = one;
2447 jj_counter++;
2448 }
2449
2450 /*--------------------------------------------------------------------
2451 * If i is an F-point, build interpolation.
2452 *--------------------------------------------------------------------*/
2453
2454 else if (CF_marker[i] == -2)
2455 {
2456 strong_f_marker--;
2457 for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
2458 {
2459 i1 = S_diag_j[jj];
2460
2461 /*--------------------------------------------------------------
2462 * If neighbor i1 is a C-point, set column number in P_diag_j
2463 * and initialize interpolation weight to zero.
2464 *--------------------------------------------------------------*/
2465
2466 if (CF_marker[i1] > 0)
2467 {
2468 if (P_marker[i1] < jj_begin_row)
2469 {
2470 P_marker[i1] = jj_counter;
2471 P_diag_j[jj_counter] = fine_to_coarse[i1];
2472 P_diag_data[jj_counter] = zero;
2473 jj_counter++;
2474 }
2475 }
2476 else if (CF_marker[i1] != -3)
2477 {
2478 P_marker[i1] = strong_f_marker;
2479 for (kk = S_diag_i[i1]; kk < S_diag_i[i1+1]; kk++)
2480 {
2481 k1 = S_diag_j[kk];
2482 if (CF_marker[k1] > 0)
2483 {
2484 if(P_marker[k1] < jj_begin_row)
2485 {
2486 P_marker[k1] = jj_counter;
2487 P_diag_j[jj_counter] = fine_to_coarse[k1];
2488 P_diag_data[jj_counter] = zero;
2489 jj_counter++;
2490 }
2491 }
2492 }
2493 if(num_procs > 1)
2494 {
2495 for (kk = S_offd_i[i1]; kk < S_offd_i[i1+1]; kk++)
2496 {
2497 if(col_offd_S_to_A)
2498 k1 = col_offd_S_to_A[S_offd_j[kk]];
2499 else
2500 k1 = S_offd_j[kk];
2501 if(CF_marker_offd[k1] > 0)
2502 {
2503 if(P_marker_offd[k1] < jj_begin_row_offd)
2504 {
2505 P_marker_offd[k1] = jj_counter_offd;
2506 P_offd_j[jj_counter_offd] = k1;
2507 P_offd_data[jj_counter_offd] = zero;
2508 jj_counter_offd++;
2509 }
2510 }
2511 }
2512 }
2513 }
2514 }
2515
2516 if ( num_procs > 1)
2517 {
2518 for (jj=S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
2519 {
2520 i1 = S_offd_j[jj];
2521 if(col_offd_S_to_A)
2522 i1 = col_offd_S_to_A[i1];
2523 if ( CF_marker_offd[i1] > 0)
2524 {
2525 if(P_marker_offd[i1] < jj_begin_row_offd)
2526 {
2527 P_marker_offd[i1] = jj_counter_offd;
2528 P_offd_j[jj_counter_offd] = i1;
2529 P_offd_data[jj_counter_offd] = zero;
2530 jj_counter_offd++;
2531 }
2532 }
2533 else if (CF_marker_offd[i1] != -3)
2534 {
2535 P_marker_offd[i1] = strong_f_marker;
2536 for(kk = Sop_i[i1]; kk < Sop_i[i1+1]; kk++)
2537 {
2538 loc_col = Sop_j[kk];
2539 if(loc_col > -1)
2540 {
2541 if(CF_marker[loc_col] > 0)
2542 {
2543 if(P_marker[loc_col] < jj_begin_row)
2544 {
2545 P_marker[loc_col] = jj_counter;
2546 P_diag_j[jj_counter] = fine_to_coarse[loc_col];
2547 P_diag_data[jj_counter] = zero;
2548 jj_counter++;
2549 }
2550 }
2551 }
2552 else
2553 {
2554 loc_col = - loc_col - 1;
2555 if(CF_marker_offd[loc_col] > 0)
2556 {
2557 if(P_marker_offd[loc_col] < jj_begin_row_offd)
2558 {
2559 P_marker_offd[loc_col] = jj_counter_offd;
2560 P_offd_j[jj_counter_offd]=loc_col;
2561 P_offd_data[jj_counter_offd] = zero;
2562 jj_counter_offd++;
2563 }
2564 }
2565 }
2566 }
2567 }
2568 }
2569 }
2570
2571 jj_end_row = jj_counter;
2572 jj_end_row_offd = jj_counter_offd;
2573
2574 diagonal = A_diag_data[A_diag_i[i]];
2575
2576 for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
2577 { /* i1 is a c-point and strongly influences i, accumulate
2578 * a_(i,i1) into interpolation weight */
2579 i1 = A_diag_j[jj];
2580 if (P_marker[i1] >= jj_begin_row)
2581 {
2582 P_diag_data[P_marker[i1]] += A_diag_data[jj];
2583 }
2584 else if(P_marker[i1] == strong_f_marker)
2585 {
2586 sum = zero;
2587 sgn = 1;
2588 if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
2589 /* Loop over row of A for point i1 and calculate the sum
2590 * of the connections to c-points that strongly incluence i. */
2591 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
2592 {
2593 i2 = A_diag_j[jj1];
2594 if((P_marker[i2] >= jj_begin_row) && (sgn*A_diag_data[jj1]) < 0)
2595 sum += A_diag_data[jj1];
2596 }
2597 if(num_procs > 1)
2598 {
2599 for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++)
2600 {
2601 i2 = A_offd_j[jj1];
2602 if(P_marker_offd[i2] >= jj_begin_row_offd &&
2603 (sgn*A_offd_data[jj1]) < 0)
2604 sum += A_offd_data[jj1];
2605 }
2606 }
2607 if(sum != 0)
2608 {
2609 distribute = A_diag_data[jj]/sum;
2610 /* Loop over row of A for point i1 and do the distribution */
2611 for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++)
2612 {
2613 i2 = A_diag_j[jj1];
2614 if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0)
2615 P_diag_data[P_marker[i2]] +=
2616 distribute*A_diag_data[jj1];
2617 }
2618 if(num_procs > 1)
2619 {
2620 for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
2621 {
2622 i2 = A_offd_j[jj1];
2623 if(P_marker_offd[i2] >= jj_begin_row_offd &&
2624 (sgn*A_offd_data[jj1]) < 0)
2625 P_offd_data[P_marker_offd[i2]] +=
2626 distribute*A_offd_data[jj1];
2627 }
2628 }
2629 }
2630 else
2631 {
2632 diagonal += A_diag_data[jj];
2633 }
2634 }
2635 /* neighbor i1 weakly influences i, accumulate a_(i,i1) into
2636 * diagonal */
2637 else if (CF_marker[i1] != -3)
2638 {
2639 if(num_functions == 1 || dof_func[i] == dof_func[i1])
2640 diagonal += A_diag_data[jj];
2641 }
2642 }
2643 if(num_procs > 1)
2644 {
2645 for(jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2646 {
2647 i1 = A_offd_j[jj];
2648 if(P_marker_offd[i1] >= jj_begin_row_offd)
2649 P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
2650 else if(P_marker_offd[i1] == strong_f_marker)
2651 {
2652 sum = zero;
2653 sgn = 1;
2654 if(A_ext_data[A_ext_i[i1]] < 0) sgn = -1;
2655 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
2656 {
2657 loc_col = A_ext_j[jj1];
2658 if(loc_col > -1)
2659 { /* diag */
2660 if((P_marker[loc_col] >= jj_begin_row)
2661 && (sgn*A_ext_data[jj1]) < 0)
2662 sum += A_ext_data[jj1];
2663 }
2664 else
2665 {
2666 loc_col = - loc_col - 1;
2667 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
2668 (sgn*A_ext_data[jj1]) < 0)
2669 sum += A_ext_data[jj1];
2670 }
2671 }
2672 if(sum != 0)
2673 {
2674 distribute = A_offd_data[jj] / sum;
2675 for(jj1 = A_ext_i[i1]+1; jj1 < A_ext_i[i1+1]; jj1++)
2676 {
2677 loc_col = A_ext_j[jj1];
2678 if(loc_col > -1)
2679 { /* diag */
2680 if(P_marker[loc_col] >= jj_begin_row &&
2681 (sgn*A_ext_data[jj1]) < 0)
2682 P_diag_data[P_marker[loc_col]] += distribute*
2683 A_ext_data[jj1];
2684 }
2685 else
2686 {
2687 loc_col = - loc_col - 1;
2688 if(P_marker_offd[loc_col] >= jj_begin_row_offd &&
2689 (sgn*A_ext_data[jj1]) < 0)
2690 P_offd_data[P_marker_offd[loc_col]] += distribute*
2691 A_ext_data[jj1];
2692 }
2693 }
2694 }
2695 else
2696 {
2697 diagonal += A_offd_data[jj];
2698 }
2699 }
2700 else if (CF_marker_offd[i1] != -3)
2701 {
2702 if(num_functions == 1 || dof_func[i] == dof_func_offd[i1])
2703 diagonal += A_offd_data[jj];
2704 }
2705 }
2706 }
2707 for(jj = jj_begin_row; jj < jj_end_row; jj++)
2708 P_diag_data[jj] /= -diagonal;
2709 for(jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
2710 P_offd_data[jj] /= -diagonal;
2711 }
2712 strong_f_marker--;
2713 }
2714
2715 if (debug_flag==4)
2716 {
2717 wall_time = time_getWallclockSeconds() - wall_time;
2718 printf("Proc = %d fill structure %f\n",
2719 my_id, wall_time);
2720 fflush(NULL);
2721 }
2722 /*-----------------------------------------------------------------------
2723 * Allocate arrays.
2724 *-----------------------------------------------------------------------*/
2725
2726 P = hypre_ParCSRMatrixCreate(comm,
2727 total_old_global_cpts,
2728 total_global_cpts,
2729 num_old_cpts_global,
2730 num_cpts_global,
2731 0,
2732 P_diag_i[n_coarse_old],
2733 P_offd_i[n_coarse_old]);
2734
2735 P_diag = hypre_ParCSRMatrixDiag(P);
2736 hypre_CSRMatrixData(P_diag) = P_diag_data;
2737 hypre_CSRMatrixI(P_diag) = P_diag_i;
2738 hypre_CSRMatrixJ(P_diag) = P_diag_j;
2739 P_offd = hypre_ParCSRMatrixOffd(P);
2740 hypre_CSRMatrixData(P_offd) = P_offd_data;
2741 hypre_CSRMatrixI(P_offd) = P_offd_i;
2742 hypre_CSRMatrixJ(P_offd) = P_offd_j;
2743 hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
2744
2745 /* Compress P, removing coefficients smaller than trunc_factor * Max */
2746 if (trunc_factor != 0.0 || max_elmts > 0)
2747 {
2748 hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
2749 P_diag_data = hypre_CSRMatrixData(P_diag);
2750 P_diag_i = hypre_CSRMatrixI(P_diag);
2751 P_diag_j = hypre_CSRMatrixJ(P_diag);
2752 P_offd_data = hypre_CSRMatrixData(P_offd);
2753 P_offd_i = hypre_CSRMatrixI(P_offd);
2754 P_offd_j = hypre_CSRMatrixJ(P_offd);
2755 P_diag_size = P_diag_i[n_coarse_old];
2756 P_offd_size = P_offd_i[n_coarse_old];
2757 }
2758
2759 /* This builds col_map, col_map should be monotone increasing and contain
2760 * global numbers. */
2761 num_cols_P_offd = 0;
2762 if(P_offd_size)
2763 {
2764 num_cols_P_offd = 0;
2765 for (i=0; i < P_offd_size; i++)
2766 {
2767 index = P_offd_j[i];
2768 if (tmp_CF_marker_offd[index] == 1)
2769 {
2770 num_cols_P_offd++;
2771 tmp_CF_marker_offd[index] = 2;
2772 }
2773 }
2774
2775 if (num_cols_P_offd)
2776 {
2777 col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd);
2778 tmp_map_offd = hypre_CTAlloc(int, num_cols_P_offd);
2779 }
2780
2781 index = 0;
2782 for(i = 0; i < num_cols_P_offd; i++)
2783 {
2784 while( tmp_CF_marker_offd[index] != 2) index++;
2785 /*printf("index %d tmp_CF %d\n", index, tmp_CF_marker_offd[index]);*/
2786 col_map_offd_P[i] = fine_to_coarse_offd[index];
2787 tmp_map_offd[i] = index++;
2788 }
2789
2790 hypre_BigQsortbi(col_map_offd_P, tmp_map_offd, 0, num_cols_P_offd-1);
2791
2792 for (i = 0; i < num_cols_P_offd; i++)
2793 tmp_CF_marker_offd[tmp_map_offd[i]] = i;
2794
2795 for(i = 0; i < P_offd_size; i++)
2796 P_offd_j[i] = tmp_CF_marker_offd[P_offd_j[i]];
2797 }
2798
2799 if (num_cols_P_offd)
2800 {
2801 hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
2802 hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
2803 hypre_TFree(tmp_map_offd);
2804 }
2805
2806#ifdef HYPRE_NO_GLOBAL_PARTITION
2807 hypre_NewCommPkgCreate(P);
2808#else
2809 hypre_MatvecCommPkgCreate(P);
2810#endif
2811
2812 for (i=0; i < n_fine; i++)
2813 if (CF_marker[i] < -1) CF_marker[i] = -1;
2814
2815 *P_ptr = P;
2816
2817 /* Deallocate memory */
2818 hypre_TFree(fine_to_coarse);
2819 hypre_TFree(old_coarse_to_fine);
2820 hypre_TFree(P_marker);
2821
2822 if (num_procs > 1)
2823 {
2824 hypre_BigCSRMatrixDestroy(Sop);
2825 hypre_BigCSRMatrixDestroy(A_ext);
2826 hypre_TFree(fine_to_coarse_offd);
2827 hypre_TFree(P_marker_offd);
2828 hypre_TFree(CF_marker_offd);
2829 hypre_TFree(tmp_CF_marker_offd);
2830 if(num_functions > 1)
2831 hypre_TFree(dof_func_offd);
2832 hypre_TFree(found);
2833
2834
2835 hypre_MatvecCommPkgDestroy(extend_comm_pkg);
2836
2837
2838 }
2839
2840 return hypre_error_flag;
2841}
2842
Note: See TracBrowser for help on using the repository browser.