source: CIVL/examples/mpi-omp/AMG2013/parcsr_mv/par_csr_matop.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 39.1 KB
Line 
1/*BHEADER**********************************************************************
2 * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3 * Produced at the Lawrence Livermore National Laboratory.
4 * This file is part of HYPRE. See file COPYRIGHT for details.
5 *
6 * HYPRE is free software; you can redistribute it and/or modify it under the
7 * terms of the GNU Lesser General Public License (as published by the Free
8 * Software Foundation) version 2.1 dated February 1999.
9 *
10 * $Revision: 2.4 $
11 ***********************************************************************EHEADER*/
12
13
14
15
16#include "headers.h"
17
18
19
20
21#include "utilities.h"
22#include "../parcsr_mv/parcsr_mv.h"
23
24/* The following function was formerly part of hypre_ParMatmul
25 but was removed so it can also be used for multiplication of
26 Boolean matrices
27*/
28
29void hypre_ParMatmul_RowSizes
30( int ** C_diag_i, int ** C_offd_i, int ** B_marker,
31 int * A_diag_i, int * A_diag_j, int * A_offd_i, int * A_offd_j,
32 int * B_diag_i, int * B_diag_j, int * B_offd_i, int * B_offd_j,
33 int * B_ext_diag_i, int * B_ext_diag_j,
34 int * B_ext_offd_i, int * B_ext_offd_j, int * map_B_to_C,
35 int *C_diag_size, int *C_offd_size,
36 int num_rows_diag_A, int num_cols_offd_A, int allsquare,
37 int num_cols_diag_B, int num_cols_offd_B, int num_cols_offd_C
38)
39{
40 int i1, i2, i3, jj2, jj3;
41 int jj_count_diag, jj_count_offd, jj_row_begin_diag, jj_row_begin_offd;
42 int start_indexing = 0; /* start indexing for C_data at 0 */
43 /* First pass begins here. Computes sizes of C rows.
44 Arrays computed: C_diag_i, C_offd_i, B_marker
45 Arrays needed: (11, all int*)
46 A_diag_i, A_diag_j, A_offd_i, A_offd_j,
47 B_diag_i, B_diag_j, B_offd_i, B_offd_j,
48 B_ext_i, B_ext_j, col_map_offd_B,
49 col_map_offd_B, B_offd_i, B_offd_j, B_ext_i, B_ext_j,
50 Scalars computed: C_diag_size, C_offd_size
51 Scalars needed:
52 num_rows_diag_A, num_rows_diag_A, num_cols_offd_A, allsquare,
53 first_col_diag_B, n_cols_B, num_cols_offd_B, num_cols_diag_B
54 */
55
56 *C_diag_i = hypre_CTAlloc(int, num_rows_diag_A+1);
57 *C_offd_i = hypre_CTAlloc(int, num_rows_diag_A+1);
58
59 jj_count_diag = start_indexing;
60 jj_count_offd = start_indexing;
61 for (i1 = 0; i1 < num_cols_diag_B+num_cols_offd_C; i1++)
62 {
63 (*B_marker)[i1] = -1;
64 }
65
66 /*-----------------------------------------------------------------------
67 * Loop over rows of A
68 *-----------------------------------------------------------------------*/
69
70 for (i1 = 0; i1 < num_rows_diag_A; i1++)
71 {
72
73 /*--------------------------------------------------------------------
74 * Set marker for diagonal entry, C_{i1,i1} (for square matrices).
75 *--------------------------------------------------------------------*/
76
77 jj_row_begin_diag = jj_count_diag;
78 jj_row_begin_offd = jj_count_offd;
79 if ( allsquare ) {
80 (*B_marker)[i1] = jj_count_diag;
81 jj_count_diag++;
82 }
83
84 /*-----------------------------------------------------------------
85 * Loop over entries in row i1 of A_offd.
86 *-----------------------------------------------------------------*/
87
88 if (num_cols_offd_A)
89 {
90 for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
91 {
92 i2 = A_offd_j[jj2];
93
94 /*-----------------------------------------------------------
95 * Loop over entries in row i2 of B_ext.
96 *-----------------------------------------------------------*/
97
98 for (jj3 = B_ext_offd_i[i2]; jj3 < B_ext_offd_i[i2+1]; jj3++)
99 {
100 i3 = num_cols_diag_B+B_ext_offd_j[jj3];
101
102 /*--------------------------------------------------------
103 * Check B_marker to see that C_{i1,i3} has not already
104 * been accounted for. If it has not, mark it and increment
105 * counter.
106 *--------------------------------------------------------*/
107
108 if ((*B_marker)[i3] < jj_row_begin_offd)
109 {
110 (*B_marker)[i3] = jj_count_offd;
111 jj_count_offd++;
112 }
113 }
114 for (jj3 = B_ext_diag_i[i2]; jj3 < B_ext_diag_i[i2+1]; jj3++)
115 {
116 i3 = B_ext_diag_j[jj3];
117
118 if ((*B_marker)[i3] < jj_row_begin_diag)
119 {
120 (*B_marker)[i3] = jj_count_diag;
121 jj_count_diag++;
122 }
123 }
124 }
125 }
126 /*-----------------------------------------------------------------
127 * Loop over entries in row i1 of A_diag.
128 *-----------------------------------------------------------------*/
129
130 for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
131 {
132 i2 = A_diag_j[jj2];
133
134 /*-----------------------------------------------------------
135 * Loop over entries in row i2 of B_diag.
136 *-----------------------------------------------------------*/
137
138 for (jj3 = B_diag_i[i2]; jj3 < B_diag_i[i2+1]; jj3++)
139 {
140 i3 = B_diag_j[jj3];
141
142 /*--------------------------------------------------------
143 * Check B_marker to see that C_{i1,i3} has not already
144 * been accounted for. If it has not, mark it and increment
145 * counter.
146 *--------------------------------------------------------*/
147
148 if ((*B_marker)[i3] < jj_row_begin_diag)
149 {
150 (*B_marker)[i3] = jj_count_diag;
151 jj_count_diag++;
152 }
153 }
154 /*-----------------------------------------------------------
155 * Loop over entries in row i2 of B_offd.
156 *-----------------------------------------------------------*/
157
158 if (num_cols_offd_B)
159 {
160 for (jj3 = B_offd_i[i2]; jj3 < B_offd_i[i2+1]; jj3++)
161 {
162 i3 = num_cols_diag_B+map_B_to_C[B_offd_j[jj3]];
163
164 /*--------------------------------------------------------
165 * Check B_marker to see that C_{i1,i3} has not already
166 * been accounted for. If it has not, mark it and increment
167 * counter.
168 *--------------------------------------------------------*/
169
170 if ((*B_marker)[i3] < jj_row_begin_offd)
171 {
172 (*B_marker)[i3] = jj_count_offd;
173 jj_count_offd++;
174 }
175 }
176 }
177 }
178
179 /*--------------------------------------------------------------------
180 * Set C_diag_i and C_offd_i for this row.
181 *--------------------------------------------------------------------*/
182
183 (*C_diag_i)[i1] = jj_row_begin_diag;
184 (*C_offd_i)[i1] = jj_row_begin_offd;
185
186 }
187
188 (*C_diag_i)[num_rows_diag_A] = jj_count_diag;
189 (*C_offd_i)[num_rows_diag_A] = jj_count_offd;
190
191 /*-----------------------------------------------------------------------
192 * Allocate C_diag_data and C_diag_j arrays.
193 * Allocate C_offd_data and C_offd_j arrays.
194 *-----------------------------------------------------------------------*/
195
196 *C_diag_size = jj_count_diag;
197 *C_offd_size = jj_count_offd;
198
199 /* End of First Pass */
200}
201
202/*--------------------------------------------------------------------------
203 * hypre_ParMatmul : multiplies two ParCSRMatrices A and B and returns
204 * the product in ParCSRMatrix C
205 * Note that C does not own the partitionings since its row_starts
206 * is owned by A and col_starts by B.
207 *--------------------------------------------------------------------------*/
208
209hypre_ParCSRMatrix *hypre_ParMatmul( hypre_ParCSRMatrix *A,
210 hypre_ParCSRMatrix *B)
211{
212 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
213
214 hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
215
216 double *A_diag_data = hypre_CSRMatrixData(A_diag);
217 int *A_diag_i = hypre_CSRMatrixI(A_diag);
218 int *A_diag_j = hypre_CSRMatrixJ(A_diag);
219
220 hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
221
222 double *A_offd_data = hypre_CSRMatrixData(A_offd);
223 int *A_offd_i = hypre_CSRMatrixI(A_offd);
224 int *A_offd_j = hypre_CSRMatrixJ(A_offd);
225
226 HYPRE_BigInt *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
227 int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
228 int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
229 int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
230
231 hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B);
232
233 double *B_diag_data = hypre_CSRMatrixData(B_diag);
234 int *B_diag_i = hypre_CSRMatrixI(B_diag);
235 int *B_diag_j = hypre_CSRMatrixJ(B_diag);
236
237 hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B);
238 HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B);
239
240 double *B_offd_data = hypre_CSRMatrixData(B_offd);
241 int *B_offd_i = hypre_CSRMatrixI(B_offd);
242 int *B_offd_j = hypre_CSRMatrixJ(B_offd);
243
244 HYPRE_BigInt first_col_diag_B = hypre_ParCSRMatrixFirstColDiag(B);
245 HYPRE_BigInt last_col_diag_B;
246 HYPRE_BigInt *col_starts_B = hypre_ParCSRMatrixColStarts(B);
247 int num_rows_diag_B = hypre_CSRMatrixNumRows(B_diag);
248 int num_cols_diag_B = hypre_CSRMatrixNumCols(B_diag);
249 int num_cols_offd_B = hypre_CSRMatrixNumCols(B_offd);
250
251 hypre_ParCSRMatrix *C;
252 HYPRE_BigInt *col_map_offd_C;
253 int *map_B_to_C;
254
255 hypre_CSRMatrix *C_diag;
256
257 double *C_diag_data;
258 int *C_diag_i;
259 int *C_diag_j;
260
261 hypre_CSRMatrix *C_offd;
262
263 double *C_offd_data=NULL;
264 int *C_offd_i=NULL;
265 int *C_offd_j=NULL;
266
267 int C_diag_size;
268 int C_offd_size;
269 int num_cols_offd_C = 0;
270
271 hypre_BigCSRMatrix *Bs_ext;
272
273 double *Bs_ext_data;
274 int *Bs_ext_i;
275 HYPRE_BigInt *Bs_ext_j;
276 HYPRE_BigInt *temp;
277
278 double *B_ext_diag_data;
279 int *B_ext_diag_i;
280 int *B_ext_diag_j;
281 int B_ext_diag_size;
282
283 double *B_ext_offd_data;
284 int *B_ext_offd_i;
285 int *B_ext_offd_j;
286 int B_ext_offd_size;
287
288 int *B_marker;
289
290 int i, j;
291 int i1, i2, i3;
292 int jj2, jj3;
293
294 int jj_count_diag, jj_count_offd;
295 int jj_row_begin_diag, jj_row_begin_offd;
296 int start_indexing = 0; /* start indexing for C_data at 0 */
297 HYPRE_BigInt n_rows_A, n_cols_A;
298 HYPRE_BigInt n_rows_B, n_cols_B;
299 HYPRE_BigInt value;
300 int allsquare = 0;
301 int cnt, cnt_offd, cnt_diag;
302 int num_procs;
303
304 double a_entry;
305 double a_b_product;
306
307 double zero = 0.0;
308
309 n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A);
310 n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A);
311 n_rows_B = hypre_ParCSRMatrixGlobalNumRows(B);
312 n_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
313
314 if (n_cols_A != n_rows_B || num_cols_diag_A != num_rows_diag_B)
315 {
316 hypre_error_in_arg(1);
317 printf(" Error! Incompatible matrix dimensions!\n");
318 return NULL;
319 }
320 if ( num_rows_diag_A==num_cols_diag_B) allsquare = 1;
321
322 /*-----------------------------------------------------------------------
323 * Extract B_ext, i.e. portion of B that is stored on neighbor procs
324 * and needed locally for matrix matrix product
325 *-----------------------------------------------------------------------*/
326
327 MPI_Comm_size(comm, &num_procs);
328
329 if (num_procs > 1)
330 {
331 /*---------------------------------------------------------------------
332 * If there exists no CommPkg for A, a CommPkg is generated using
333 * equally load balanced partitionings within
334 * hypre_ParCSRMatrixExtractBExt
335 *--------------------------------------------------------------------*/
336 Bs_ext = hypre_ParCSRMatrixExtractBigExt(B,A,1);
337 Bs_ext_data = hypre_BigCSRMatrixData(Bs_ext);
338 Bs_ext_i = hypre_BigCSRMatrixI(Bs_ext);
339 Bs_ext_j = hypre_BigCSRMatrixJ(Bs_ext);
340 }
341 B_ext_diag_i = hypre_CTAlloc(int, num_cols_offd_A+1);
342 B_ext_offd_i = hypre_CTAlloc(int, num_cols_offd_A+1);
343 B_ext_diag_size = 0;
344 B_ext_offd_size = 0;
345 last_col_diag_B = first_col_diag_B + (HYPRE_BigInt) num_cols_diag_B -1;
346
347 for (i=0; i < num_cols_offd_A; i++)
348 {
349 for (j=Bs_ext_i[i]; j < Bs_ext_i[i+1]; j++)
350 if (Bs_ext_j[j] < first_col_diag_B || Bs_ext_j[j] > last_col_diag_B)
351 B_ext_offd_size++;
352 else
353 B_ext_diag_size++;
354 B_ext_diag_i[i+1] = B_ext_diag_size;
355 B_ext_offd_i[i+1] = B_ext_offd_size;
356 }
357
358 if (B_ext_diag_size)
359 {
360 B_ext_diag_j = hypre_CTAlloc(int, B_ext_diag_size);
361 B_ext_diag_data = hypre_CTAlloc(double, B_ext_diag_size);
362 }
363 if (B_ext_offd_size)
364 {
365 B_ext_offd_j = hypre_CTAlloc(int, B_ext_offd_size);
366 B_ext_offd_data = hypre_CTAlloc(double, B_ext_offd_size);
367 }
368
369 cnt_offd = 0;
370 cnt_diag = 0;
371 for (i=0; i < num_cols_offd_A; i++)
372 {
373 for (j=Bs_ext_i[i]; j < Bs_ext_i[i+1]; j++)
374 if (Bs_ext_j[j] < first_col_diag_B || Bs_ext_j[j] > last_col_diag_B)
375 {
376 Bs_ext_j[cnt_offd] = Bs_ext_j[j];
377 B_ext_offd_data[cnt_offd++] = Bs_ext_data[j];
378 }
379 else
380 {
381 B_ext_diag_j[cnt_diag] = (int) (Bs_ext_j[j] - first_col_diag_B);
382 B_ext_diag_data[cnt_diag++] = Bs_ext_data[j];
383 }
384 }
385
386 cnt = 0;
387 if (B_ext_offd_size || num_cols_offd_B)
388 {
389 temp = hypre_CTAlloc(HYPRE_BigInt, B_ext_offd_size+num_cols_offd_B);
390 for (i=0; i < B_ext_offd_size; i++)
391 temp[i] = Bs_ext_j[i];
392 cnt = B_ext_offd_size;
393 for (i=0; i < num_cols_offd_B; i++)
394 temp[cnt++] = col_map_offd_B[i];
395 }
396 if (cnt)
397 {
398 hypre_BigQsort0(temp, 0, cnt-1);
399
400 num_cols_offd_C = 1;
401 value = temp[0];
402 for (i=1; i < cnt; i++)
403 {
404 if (temp[i] > value)
405 {
406 value = temp[i];
407 temp[num_cols_offd_C++] = value;
408 }
409 }
410 }
411
412 if (num_cols_offd_C)
413 col_map_offd_C = hypre_CTAlloc(HYPRE_BigInt,num_cols_offd_C);
414
415 for (i=0; i < num_cols_offd_C; i++)
416 col_map_offd_C[i] = temp[i];
417
418 if (B_ext_offd_size || num_cols_offd_B)
419 hypre_TFree(temp);
420
421 for (i=0 ; i < B_ext_offd_size; i++)
422 B_ext_offd_j[i] = hypre_BigBinarySearch(col_map_offd_C,
423 Bs_ext_j[i],
424 num_cols_offd_C);
425 if (num_procs > 1)
426 {
427 hypre_BigCSRMatrixDestroy(Bs_ext);
428 Bs_ext = NULL;
429 }
430
431 if (num_cols_offd_B)
432 {
433 map_B_to_C = hypre_CTAlloc(int,num_cols_offd_B);
434
435 cnt = 0;
436 for (i=0; i < num_cols_offd_C; i++)
437 if (col_map_offd_C[i] == col_map_offd_B[cnt])
438 {
439 map_B_to_C[cnt++] = i;
440 if (cnt == num_cols_offd_B) break;
441 }
442 }
443
444 /*-----------------------------------------------------------------------
445 * Allocate marker array.
446 *-----------------------------------------------------------------------*/
447
448 B_marker = hypre_CTAlloc(int, num_cols_diag_B+num_cols_offd_C);
449
450 /*-----------------------------------------------------------------------
451 * Initialize some stuff.
452 *-----------------------------------------------------------------------*/
453
454 for (i1 = 0; i1 < num_cols_diag_B+num_cols_offd_C; i1++)
455 {
456 B_marker[i1] = -1;
457 }
458
459
460 hypre_ParMatmul_RowSizes(
461 &C_diag_i, &C_offd_i, &B_marker,
462 A_diag_i, A_diag_j, A_offd_i, A_offd_j,
463 B_diag_i, B_diag_j, B_offd_i, B_offd_j,
464 B_ext_diag_i, B_ext_diag_j, B_ext_offd_i, B_ext_offd_j,
465 map_B_to_C,
466 &C_diag_size, &C_offd_size,
467 num_rows_diag_A, num_cols_offd_A, allsquare,
468 num_cols_diag_B, num_cols_offd_B,
469 num_cols_offd_C
470 );
471
472
473 /*-----------------------------------------------------------------------
474 * Allocate C_diag_data and C_diag_j arrays.
475 * Allocate C_offd_data and C_offd_j arrays.
476 *-----------------------------------------------------------------------*/
477
478 last_col_diag_B = first_col_diag_B + (HYPRE_BigInt) num_cols_diag_B - 1;
479 C_diag_data = hypre_CTAlloc(double, C_diag_size);
480 C_diag_j = hypre_CTAlloc(int, C_diag_size);
481 if (C_offd_size)
482 {
483 C_offd_data = hypre_CTAlloc(double, C_offd_size);
484 C_offd_j = hypre_CTAlloc(int, C_offd_size);
485 }
486
487
488 /*-----------------------------------------------------------------------
489 * Second Pass: Fill in C_diag_data and C_diag_j.
490 * Second Pass: Fill in C_offd_data and C_offd_j.
491 *-----------------------------------------------------------------------*/
492
493 /*-----------------------------------------------------------------------
494 * Initialize some stuff.
495 *-----------------------------------------------------------------------*/
496
497 jj_count_diag = start_indexing;
498 jj_count_offd = start_indexing;
499 for (i1 = 0; i1 < num_cols_diag_B+num_cols_offd_C; i1++)
500 {
501 B_marker[i1] = -1;
502 }
503
504 /*-----------------------------------------------------------------------
505 * Loop over interior c-points.
506 *-----------------------------------------------------------------------*/
507
508 for (i1 = 0; i1 < num_rows_diag_A; i1++)
509 {
510
511 /*--------------------------------------------------------------------
512 * Create diagonal entry, C_{i1,i1}
513 *--------------------------------------------------------------------*/
514
515 jj_row_begin_diag = jj_count_diag;
516 jj_row_begin_offd = jj_count_offd;
517 if ( allsquare ) {
518 B_marker[i1] = jj_count_diag;
519 C_diag_data[jj_count_diag] = zero;
520 C_diag_j[jj_count_diag] = i1;
521 jj_count_diag++;
522 }
523
524 /*-----------------------------------------------------------------
525 * Loop over entries in row i1 of A_offd.
526 *-----------------------------------------------------------------*/
527
528 if (num_cols_offd_A)
529 {
530 for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
531 {
532 i2 = A_offd_j[jj2];
533 a_entry = A_offd_data[jj2];
534
535 /*-----------------------------------------------------------
536 * Loop over entries in row i2 of B_ext.
537 *-----------------------------------------------------------*/
538
539 for (jj3 = B_ext_offd_i[i2]; jj3 < B_ext_offd_i[i2+1]; jj3++)
540 {
541 i3 = num_cols_diag_B+B_ext_offd_j[jj3];
542 a_b_product = a_entry * B_ext_offd_data[jj3];
543
544 /*--------------------------------------------------------
545 * Check B_marker to see that C_{i1,i3} has not already
546 * been accounted for. If it has not, create a new entry.
547 * If it has, add new contribution.
548 *--------------------------------------------------------*/
549 if (B_marker[i3] < jj_row_begin_offd)
550 {
551 B_marker[i3] = jj_count_offd;
552 C_offd_data[jj_count_offd] = a_b_product;
553 C_offd_j[jj_count_offd] = i3-num_cols_diag_B;
554 jj_count_offd++;
555 }
556 else
557 C_offd_data[B_marker[i3]] += a_b_product;
558 }
559 for (jj3 = B_ext_diag_i[i2]; jj3 < B_ext_diag_i[i2+1]; jj3++)
560 {
561 i3 = B_ext_diag_j[jj3];
562 a_b_product = a_entry * B_ext_diag_data[jj3];
563 if (B_marker[i3] < jj_row_begin_diag)
564 {
565 B_marker[i3] = jj_count_diag;
566 C_diag_data[jj_count_diag] = a_b_product;
567 C_diag_j[jj_count_diag] = i3;
568 jj_count_diag++;
569 }
570 else
571 C_diag_data[B_marker[i3]] += a_b_product;
572 }
573 }
574 }
575
576 /*-----------------------------------------------------------------
577 * Loop over entries in row i1 of A_diag.
578 *-----------------------------------------------------------------*/
579
580 for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
581 {
582 i2 = A_diag_j[jj2];
583 a_entry = A_diag_data[jj2];
584
585 /*-----------------------------------------------------------
586 * Loop over entries in row i2 of B_diag.
587 *-----------------------------------------------------------*/
588
589 for (jj3 = B_diag_i[i2]; jj3 < B_diag_i[i2+1]; jj3++)
590 {
591 i3 = B_diag_j[jj3];
592 a_b_product = a_entry * B_diag_data[jj3];
593
594 /*--------------------------------------------------------
595 * Check B_marker to see that C_{i1,i3} has not already
596 * been accounted for. If it has not, create a new entry.
597 * If it has, add new contribution.
598 *--------------------------------------------------------*/
599
600 if (B_marker[i3] < jj_row_begin_diag)
601 {
602 B_marker[i3] = jj_count_diag;
603 C_diag_data[jj_count_diag] = a_b_product;
604 C_diag_j[jj_count_diag] = i3;
605 jj_count_diag++;
606 }
607 else
608 {
609 C_diag_data[B_marker[i3]] += a_b_product;
610 }
611 }
612 if (num_cols_offd_B)
613 {
614 for (jj3 = B_offd_i[i2]; jj3 < B_offd_i[i2+1]; jj3++)
615 {
616 i3 = num_cols_diag_B+map_B_to_C[B_offd_j[jj3]];
617 a_b_product = a_entry * B_offd_data[jj3];
618
619 /*--------------------------------------------------------
620 * Check B_marker to see that C_{i1,i3} has not already
621 * been accounted for. If it has not, create a new entry.
622 * If it has, add new contribution.
623 *--------------------------------------------------------*/
624
625 if (B_marker[i3] < jj_row_begin_offd)
626 {
627 B_marker[i3] = jj_count_offd;
628 C_offd_data[jj_count_offd] = a_b_product;
629 C_offd_j[jj_count_offd] = i3-num_cols_diag_B;
630 jj_count_offd++;
631 }
632 else
633 {
634 C_offd_data[B_marker[i3]] += a_b_product;
635 }
636 }
637 }
638 }
639 }
640
641 C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_cols_B, row_starts_A,
642 col_starts_B, num_cols_offd_C, C_diag_size, C_offd_size);
643
644/* Note that C does not own the partitionings */
645 hypre_ParCSRMatrixSetRowStartsOwner(C,0);
646 hypre_ParCSRMatrixSetColStartsOwner(C,0);
647
648 C_diag = hypre_ParCSRMatrixDiag(C);
649 hypre_CSRMatrixData(C_diag) = C_diag_data;
650 hypre_CSRMatrixI(C_diag) = C_diag_i;
651 hypre_CSRMatrixJ(C_diag) = C_diag_j;
652
653 C_offd = hypre_ParCSRMatrixOffd(C);
654 hypre_CSRMatrixI(C_offd) = C_offd_i;
655 hypre_ParCSRMatrixOffd(C) = C_offd;
656
657 if (num_cols_offd_C)
658 {
659 hypre_CSRMatrixData(C_offd) = C_offd_data;
660 hypre_CSRMatrixJ(C_offd) = C_offd_j;
661 hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;
662
663 }
664
665 /*-----------------------------------------------------------------------
666 * Free various arrays
667 *-----------------------------------------------------------------------*/
668
669 hypre_TFree(B_marker);
670 hypre_TFree(B_ext_diag_i);
671 if (B_ext_diag_size)
672 {
673 hypre_TFree(B_ext_diag_j);
674 hypre_TFree(B_ext_diag_data);
675 }
676 hypre_TFree(B_ext_offd_i);
677 if (B_ext_offd_size)
678 {
679 hypre_TFree(B_ext_offd_j);
680 hypre_TFree(B_ext_offd_data);
681 }
682 if (num_cols_offd_B) hypre_TFree(map_B_to_C);
683
684 return C;
685
686}
687
688/*--------------------------------------------------------------------------
689 * hypre_ParCSRMatrixExtractConvBExt : extracts rows from B which are located on
690 * other processors and needed for multiplication with A locally. The rows
691 * are returned as CSRMatrix. Column indices are converted to local
692 * indices with columns belonging to immediate neighbors marked as negative;
693 * indices belonging to points on neighbor processors that are more than
694 * distance one away are eliminated.
695 *--------------------------------------------------------------------------*/
696
697hypre_CSRMatrix *
698hypre_ParCSRMatrixExtractConvBExt( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, int data)
699{
700 MPI_Comm comm = hypre_ParCSRMatrixComm(B);
701 HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(B);
702 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(B);
703
704 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
705 int num_recvs;
706 int *recv_vec_starts;
707 int num_sends;
708 int *send_map_starts;
709 int *send_map_elmts;
710
711 hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(B);
712
713 int *diag_i = hypre_CSRMatrixI(diag);
714 int *diag_j = hypre_CSRMatrixJ(diag);
715 double *diag_data = hypre_CSRMatrixData(diag);
716
717 hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(B);
718
719 int *offd_i = hypre_CSRMatrixI(offd);
720 int *offd_j = hypre_CSRMatrixJ(offd);
721 double *offd_data = hypre_CSRMatrixData(offd);
722 int num_cols_offd = hypre_CSRMatrixNumCols(offd);
723
724 int num_cols_B, num_nonzeros;
725 int num_rows_B_ext;
726
727 hypre_CSRMatrix *B_ext;
728
729 int *B_ext_i;
730 HYPRE_BigInt *B_tmp_j;
731 int *B_ext_j;
732 double *B_ext_data;
733
734 hypre_ParCSRCommHandle *comm_handle;
735 hypre_ParCSRCommPkg *tmp_comm_pkg;
736
737 int *B_int_i;
738 HYPRE_BigInt *B_int_j;
739 double * B_int_data;
740
741 int num_procs, my_id;
742 int *jdata_recv_vec_starts;
743 int *jdata_send_map_starts;
744
745 int i, j, k, counter, cnt;
746 int start_index;
747 int j_cnt, j_cnt_rm, jrow;
748 HYPRE_BigInt big_k;
749 HYPRE_BigInt col_1, col_n;
750
751 MPI_Comm_size(comm,&num_procs);
752 MPI_Comm_rank(comm,&my_id);
753
754 num_cols_B = hypre_CSRMatrixNumRows(diag);
755 col_1 = first_col_diag;
756 col_n = first_col_diag + (HYPRE_BigInt) num_cols_B;
757 /*---------------------------------------------------------------------
758 * If there exists no CommPkg for A, a CommPkg is generated using
759 * equally load balanced partitionings
760 *--------------------------------------------------------------------*/
761 if (!hypre_ParCSRMatrixCommPkg(A))
762 {
763 hypre_MatvecCommPkgCreate(A);
764 }
765
766 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
767 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
768 recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
769 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
770 send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
771 send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
772
773 num_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
774 num_rows_B_ext = recv_vec_starts[num_recvs];
775
776 num_rows_B_ext = recv_vec_starts[num_recvs];
777 if ( num_rows_B_ext < 0 )
778 { /* no B_ext, no communication */
779 B_ext_i = NULL;
780 B_ext_j = NULL;
781 if ( data ) B_ext_data = NULL;
782 num_nonzeros = 0;
783 return 0;
784 };
785 B_int_i = hypre_CTAlloc(int, send_map_starts[num_sends]+1);
786 B_ext_i = hypre_CTAlloc(int, num_rows_B_ext+1);
787
788/*--------------------------------------------------------------------------
789 * generate B_int_i through adding number of row-elements of offd and diag
790 * for corresponding rows. B_int_i[j+1] contains the number of elements of
791 * a row j (which is determined through send_map_elmts)
792 *--------------------------------------------------------------------------*/
793 B_int_i[0] = 0;
794 j_cnt = 0;
795 j_cnt_rm = 0;
796 num_nonzeros = 0;
797 for (i=0; i < num_sends; i++)
798 {
799 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
800 {
801 jrow = send_map_elmts[j];
802 B_int_i[++j_cnt] = offd_i[jrow+1] - offd_i[jrow]
803 + diag_i[jrow+1] - diag_i[jrow];
804 num_nonzeros += B_int_i[j_cnt];
805 }
806 }
807
808/*--------------------------------------------------------------------------
809 * initialize communication
810 *--------------------------------------------------------------------------*/
811 comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,
812 &B_int_i[1],&(B_ext_i[1]) );
813
814 B_int_j = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros);
815 if (data) B_int_data = hypre_CTAlloc(double, num_nonzeros);
816
817 jdata_send_map_starts = hypre_CTAlloc(int, num_sends+1);
818 jdata_recv_vec_starts = hypre_CTAlloc(int, num_recvs+1);
819 start_index = B_int_i[0];
820 jdata_send_map_starts[0] = start_index;
821 counter = 0;
822 for (i=0; i < num_sends; i++)
823 {
824 num_nonzeros = counter;
825 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
826 {
827 jrow = send_map_elmts[j];
828 for (k=diag_i[jrow]; k < diag_i[jrow+1]; k++)
829 {
830 B_int_j[counter] = (HYPRE_BigInt) diag_j[k]+first_col_diag;
831 if (data) B_int_data[counter] = diag_data[k];
832 counter++;
833 }
834 for (k=offd_i[jrow]; k < offd_i[jrow+1]; k++)
835 {
836 B_int_j[counter] = col_map_offd[offd_j[k]];
837 if (data) B_int_data[counter] = offd_data[k];
838 counter++;
839 }
840
841 }
842 num_nonzeros = counter - num_nonzeros;
843 start_index += num_nonzeros;
844 jdata_send_map_starts[i+1] = start_index;
845 }
846
847 tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
848 hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
849 hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
850 hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
851 hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgSendProcs(comm_pkg);
852 hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
853 hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = jdata_send_map_starts;
854
855 hypre_ParCSRCommHandleDestroy(comm_handle);
856 comm_handle = NULL;
857
858/*--------------------------------------------------------------------------
859 * after communication exchange B_ext_i[j+1] contains the number of elements
860 * of a row j !
861 * evaluate B_ext_i and compute *num_nonzeros for B_ext
862 *--------------------------------------------------------------------------*/
863
864 for (i=0; i < num_recvs; i++)
865 for (j = recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
866 B_ext_i[j+1] += B_ext_i[j];
867
868 num_nonzeros = B_ext_i[num_rows_B_ext];
869
870 B_tmp_j = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros);
871 B_ext_j = hypre_CTAlloc(int, num_nonzeros);
872
873 if (data)
874 B_ext_data = hypre_CTAlloc(double, num_nonzeros);
875
876 for (i=0; i < num_recvs; i++)
877 {
878 start_index = B_ext_i[recv_vec_starts[i]];
879 num_nonzeros = B_ext_i[recv_vec_starts[i+1]]-start_index;
880 jdata_recv_vec_starts[i+1] = B_ext_i[recv_vec_starts[i+1]];
881 }
882
883
884 hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = jdata_recv_vec_starts;
885
886 comm_handle = hypre_ParCSRCommHandleCreate(21,tmp_comm_pkg,B_int_j,B_tmp_j);
887 hypre_ParCSRCommHandleDestroy(comm_handle);
888 comm_handle = NULL;
889
890 if (data)
891 {
892 comm_handle = hypre_ParCSRCommHandleCreate(1,tmp_comm_pkg,B_int_data,
893 B_ext_data);
894 hypre_ParCSRCommHandleDestroy(comm_handle);
895 comm_handle = NULL;
896 }
897
898 hypre_TFree(jdata_send_map_starts);
899 hypre_TFree(jdata_recv_vec_starts);
900 hypre_TFree(tmp_comm_pkg);
901 hypre_TFree(B_int_i);
902 hypre_TFree(B_int_j);
903 if (data) hypre_TFree(B_int_data);
904
905 cnt = 0;
906 for (i=0; i < num_rows_B_ext; i++)
907 {
908 for (j=B_ext_i[i]; j < B_ext_i[i+1]; j++)
909 {
910 big_k = B_tmp_j[j];
911 if (big_k >= col_1 && big_k < col_n)
912 {
913 if (data) B_ext_data[cnt] = B_ext_data[j];
914 B_ext_j[cnt++] = (int)(big_k - col_1);
915 }
916 else
917 {
918 k = hypre_BigBinarySearch(col_map_offd,big_k,num_cols_offd);
919 if (k > -1)
920 {
921 if (data) B_ext_data[cnt] = B_ext_data[j];
922 B_ext_j[cnt++] = -k-1;
923 }
924 }
925 }
926 B_ext_i[i] = cnt;
927 }
928 for (i = num_rows_B_ext; i > 0; i--)
929 B_ext_i[i] = B_ext_i[i-1];
930 if (num_procs > 1) B_ext_i[0] = 0;
931
932
933 B_ext = hypre_CSRMatrixCreate(num_rows_B_ext,num_cols_B,num_nonzeros);
934 hypre_CSRMatrixI(B_ext) = B_ext_i;
935 hypre_CSRMatrixJ(B_ext) = B_ext_j;
936 if (data) hypre_CSRMatrixData(B_ext) = B_ext_data;
937
938 return B_ext;
939}
940
941/*--------------------------------------------------------------------------
942 * hypre_ParCSRMatrixExtractBigExt : extracts rows from B which are located on
943 * other processors and needed for multiplication with A locally. The rows
944 * are returned as BigCSRMatrix.
945 *--------------------------------------------------------------------------*/
946
947hypre_BigCSRMatrix *
948hypre_ParCSRMatrixExtractBigExt( hypre_ParCSRMatrix *B, hypre_ParCSRMatrix *A, int data)
949{
950 MPI_Comm comm = hypre_ParCSRMatrixComm(B);
951 HYPRE_BigInt first_col_diag = hypre_ParCSRMatrixFirstColDiag(B);
952 HYPRE_BigInt *col_map_offd = hypre_ParCSRMatrixColMapOffd(B);
953
954 hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
955 int num_recvs;
956 int *recv_vec_starts;
957 int num_sends;
958 int *send_map_starts;
959 int *send_map_elmts;
960
961 hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(B);
962
963 int *diag_i = hypre_CSRMatrixI(diag);
964 int *diag_j = hypre_CSRMatrixJ(diag);
965 double *diag_data = hypre_CSRMatrixData(diag);
966
967 hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(B);
968
969 int *offd_i = hypre_CSRMatrixI(offd);
970 int *offd_j = hypre_CSRMatrixJ(offd);
971 double *offd_data = hypre_CSRMatrixData(offd);
972
973 int num_cols_B, num_nonzeros;
974
975 hypre_BigCSRMatrix *B_ext;
976
977 int *B_ext_i;
978 HYPRE_BigInt *B_ext_j;
979 double *B_ext_data;
980
981 hypre_ParCSRCommHandle *comm_handle;
982 hypre_ParCSRCommPkg *tmp_comm_pkg;
983
984 int *B_int_i;
985 HYPRE_BigInt *B_int_j;
986 double * B_int_data;
987
988 int num_procs, my_id;
989 int *jdata_recv_vec_starts;
990 int *jdata_send_map_starts;
991
992 int i, j, k, counter;
993 int start_index;
994 int j_cnt, j_cnt_rm, jrow;
995 int num_rows_B_ext;
996
997 MPI_Comm_size(comm,&num_procs);
998 MPI_Comm_rank(comm,&my_id);
999 /*---------------------------------------------------------------------
1000 * If there exists no CommPkg for A, a CommPkg is generated using
1001 * equally load balanced partitionings
1002 *--------------------------------------------------------------------*/
1003 if (!hypre_ParCSRMatrixCommPkg(A))
1004 {
1005 hypre_MatvecCommPkgCreate(A);
1006 }
1007
1008 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1009 num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
1010 recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
1011 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1012 send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
1013 send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
1014
1015 num_cols_B = hypre_ParCSRMatrixGlobalNumCols(B);
1016 num_rows_B_ext = recv_vec_starts[num_recvs];
1017
1018 num_rows_B_ext = recv_vec_starts[num_recvs];
1019 if ( num_rows_B_ext < 0 )
1020 { /* no B_ext, no communication */
1021 B_ext_i = NULL;
1022 B_ext_j = NULL;
1023 if ( data ) B_ext_data = NULL;
1024 num_nonzeros = 0;
1025 return 0;
1026 };
1027 B_int_i = hypre_CTAlloc(int, send_map_starts[num_sends]+1);
1028 B_ext_i = hypre_CTAlloc(int, num_rows_B_ext+1);
1029
1030/*--------------------------------------------------------------------------
1031 * generate B_int_i through adding number of row-elements of offd and diag
1032 * for corresponding rows. B_int_i[j+1] contains the number of elements of
1033 * a row j (which is determined through send_map_elmts)
1034 *--------------------------------------------------------------------------*/
1035 B_int_i[0] = 0;
1036 j_cnt = 0;
1037 j_cnt_rm = 0;
1038 num_nonzeros = 0;
1039 for (i=0; i < num_sends; i++)
1040 {
1041 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
1042 {
1043 jrow = send_map_elmts[j];
1044 B_int_i[++j_cnt] = offd_i[jrow+1] - offd_i[jrow]
1045 + diag_i[jrow+1] - diag_i[jrow];
1046 num_nonzeros += B_int_i[j_cnt];
1047 }
1048 }
1049
1050/*--------------------------------------------------------------------------
1051 * initialize communication
1052 *--------------------------------------------------------------------------*/
1053 comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,
1054 &B_int_i[1],&(B_ext_i[1]) );
1055
1056 B_int_j = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros);
1057 if (data) B_int_data = hypre_CTAlloc(double, num_nonzeros);
1058
1059 jdata_send_map_starts = hypre_CTAlloc(int, num_sends+1);
1060 jdata_recv_vec_starts = hypre_CTAlloc(int, num_recvs+1);
1061 start_index = B_int_i[0];
1062 jdata_send_map_starts[0] = start_index;
1063 counter = 0;
1064 for (i=0; i < num_sends; i++)
1065 {
1066 num_nonzeros = counter;
1067 for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++)
1068 {
1069 jrow = send_map_elmts[j];
1070 for (k=diag_i[jrow]; k < diag_i[jrow+1]; k++)
1071 {
1072 B_int_j[counter] = (HYPRE_BigInt) diag_j[k]+first_col_diag;
1073 if (data) B_int_data[counter] = diag_data[k];
1074 counter++;
1075 }
1076 for (k=offd_i[jrow]; k < offd_i[jrow+1]; k++)
1077 {
1078 B_int_j[counter] = col_map_offd[offd_j[k]];
1079 if (data) B_int_data[counter] = offd_data[k];
1080 counter++;
1081 }
1082
1083 }
1084 num_nonzeros = counter - num_nonzeros;
1085 start_index += num_nonzeros;
1086 jdata_send_map_starts[i+1] = start_index;
1087 }
1088
1089 tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
1090 hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
1091 hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
1092 hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
1093 hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgSendProcs(comm_pkg);
1094 hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
1095 hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = jdata_send_map_starts;
1096
1097 hypre_ParCSRCommHandleDestroy(comm_handle);
1098 comm_handle = NULL;
1099
1100/*--------------------------------------------------------------------------
1101 * after communication exchange B_ext_i[j+1] contains the number of elements
1102 * of a row j !
1103 * evaluate B_ext_i and compute *num_nonzeros for B_ext
1104 *--------------------------------------------------------------------------*/
1105
1106 for (i=0; i < num_recvs; i++)
1107 for (j = recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
1108 B_ext_i[j+1] += B_ext_i[j];
1109
1110 num_nonzeros = B_ext_i[num_rows_B_ext];
1111
1112 B_ext_j = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros);
1113
1114 if (data)
1115 B_ext_data = hypre_CTAlloc(double, num_nonzeros);
1116
1117 for (i=0; i < num_recvs; i++)
1118 {
1119 start_index = B_ext_i[recv_vec_starts[i]];
1120 num_nonzeros = B_ext_i[recv_vec_starts[i+1]]-start_index;
1121 jdata_recv_vec_starts[i+1] = B_ext_i[recv_vec_starts[i+1]];
1122 }
1123
1124
1125 hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = jdata_recv_vec_starts;
1126
1127 comm_handle = hypre_ParCSRCommHandleCreate(21,tmp_comm_pkg,B_int_j,B_ext_j);
1128 hypre_ParCSRCommHandleDestroy(comm_handle);
1129 comm_handle = NULL;
1130
1131 if (data)
1132 {
1133 comm_handle = hypre_ParCSRCommHandleCreate(1,tmp_comm_pkg,B_int_data,
1134 B_ext_data);
1135 hypre_ParCSRCommHandleDestroy(comm_handle);
1136 comm_handle = NULL;
1137 }
1138
1139 hypre_TFree(jdata_send_map_starts);
1140 hypre_TFree(jdata_recv_vec_starts);
1141 hypre_TFree(tmp_comm_pkg);
1142 hypre_TFree(B_int_i);
1143 hypre_TFree(B_int_j);
1144 if (data) hypre_TFree(B_int_data);
1145
1146 B_ext = hypre_BigCSRMatrixCreate(num_rows_B_ext,num_cols_B,num_nonzeros);
1147 hypre_CSRMatrixI(B_ext) = B_ext_i;
1148 hypre_CSRMatrixJ(B_ext) = B_ext_j;
1149 if (data) hypre_CSRMatrixData(B_ext) = B_ext_data;
1150
1151 return B_ext;
1152}
Note: See TracBrowser for help on using the repository browser.