source: CIVL/examples/mpi-omp/AMG2013/test/amg2013.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 105.9 KB
Line 
1#include <stdlib.h>
2#include <stdio.h>
3#include <math.h>
4
5#include "utilities.h"
6#include "HYPRE.h"
7#include "HYPRE_parcsr_mv.h"
8#include "HYPRE_sstruct_mv.h"
9#include "HYPRE_parcsr_ls.h"
10#include "krylov.h"
11#include "sstruct_mv.h"
12
13#define DEBUG 0
14
15/*--------------------------------------------------------------------------
16 * Data structures
17 *--------------------------------------------------------------------------*/
18
19char* infile_default = "sstruct.in.MG.FD";
20
21typedef int Index[3];
22
23/*------------------------------------------------------------
24 * ProblemIndex:
25 *
26 * The index has extra information stored in entries 3-8 that
27 * determine how the index gets "mapped" to finer index spaces.
28 *
29 * NOTE: For implementation convenience, the index is "pre-shifted"
30 * according to the values in entries 6,7,8. The following discussion
31 * describes how "un-shifted" indexes are mapped, because that is a
32 * more natural way to think about this mapping problem, and because
33 * that is the convention used in the input file for this code. The
34 * reason that pre-shifting is convenient is because it makes the true
35 * value of the index on the unrefined index space readily available
36 * in entries 0-2, hence, all operations on that unrefined space are
37 * straightforward. Also, the only time that the extra mapping
38 * information is needed is when an index is mapped to a new refined
39 * index space, allowing us to isolate the mapping details to the
40 * routine MapProblemIndex. The only other effected routine is
41 * SScanProblemIndex, which takes the user input and pre-shifts it.
42 *
43 * - Entries 3,4,5 have values of either 0 or 1 that indicate
44 * whether to map an index "to the left" or "to the right".
45 * Here is a 1D diagram:
46 *
47 * -- | * | unrefined index space
48 * |
49 * --> | * | . | * | refined index space (factor = 3)
50 * 0 1
51 *
52 * The '*' index on the unrefined index space gets mapped to one of
53 * the '*' indexes on the refined space based on the value (0 or 1)
54 * of the relevent entry (3,4, or 5). The actual mapping formula is
55 * as follows (with refinement factor, r):
56 *
57 * mapped_index[i] = r*index[i] + (r-1)*index[i+3]
58 *
59 * - Entries 6,7,8 contain "shift" information. The shift is
60 * simply added to the mapped index just described. So, the
61 * complete mapping formula is as follows:
62 *
63 * mapped_index[i] = r*index[i] + (r-1)*index[i+3] + index[i+6]
64 *
65 *------------------------------------------------------------*/
66
67typedef int ProblemIndex[9];
68
69typedef struct
70{
71 /* for GridSetExtents */
72 int nboxes;
73 ProblemIndex *ilowers;
74 ProblemIndex *iuppers;
75 int *boxsizes;
76 int max_boxsize;
77
78 /* for GridSetVariables */
79 int nvars;
80 HYPRE_SStructVariable *vartypes;
81
82 /* for GridAddVariables */
83 int add_nvars;
84 ProblemIndex *add_indexes;
85 HYPRE_SStructVariable *add_vartypes;
86
87 /* for GridSetNeighborBox */
88 int glue_nboxes;
89 ProblemIndex *glue_ilowers;
90 ProblemIndex *glue_iuppers;
91 int *glue_nbor_parts;
92 ProblemIndex *glue_nbor_ilowers;
93 ProblemIndex *glue_nbor_iuppers;
94 Index *glue_index_maps;
95 int *glue_primaries;
96
97 /* for GraphSetStencil */
98 int *stencil_num;
99
100 /* for GraphAddEntries */
101 int graph_nboxes;
102 ProblemIndex *graph_ilowers;
103 ProblemIndex *graph_iuppers;
104 Index *graph_strides;
105 int *graph_vars;
106 int *graph_to_parts;
107 ProblemIndex *graph_to_ilowers;
108 ProblemIndex *graph_to_iuppers;
109 Index *graph_to_strides;
110 int *graph_to_vars;
111 Index *graph_index_maps;
112 Index *graph_index_signs;
113 int *graph_entries;
114 double *graph_values;
115 int *graph_boxsizes;
116
117 /* MatrixSetValues */
118 int matset_nboxes;
119 ProblemIndex *matset_ilowers;
120 ProblemIndex *matset_iuppers;
121 Index *matset_strides;
122 int *matset_vars;
123 int *matset_entries;
124 double *matset_values;
125
126 /* MatrixAddToValues */
127 int matadd_nboxes;
128 ProblemIndex *matadd_ilowers;
129 ProblemIndex *matadd_iuppers;
130 int *matadd_vars;
131 int *matadd_nentries;
132 int **matadd_entries;
133 double **matadd_values;
134
135 Index periodic;
136
137} ProblemPartData;
138
139typedef struct
140{
141 int ndim;
142 int nparts;
143 ProblemPartData *pdata;
144 int max_boxsize;
145
146 int nstencils;
147 int *stencil_sizes;
148 Index **stencil_offsets;
149 int **stencil_vars;
150 double **stencil_values;
151
152 int symmetric_num;
153 int *symmetric_parts;
154 int *symmetric_vars;
155 int *symmetric_to_vars;
156 int *symmetric_booleans;
157
158 int ns_symmetric;
159
160 int npools;
161 int *pools; /* array of size nparts */
162 int ndists; /* number of (pool) distributions */
163 int *dist_npools;
164 int **dist_pools;
165} ProblemData;
166
167/*--------------------------------------------------------------------------
168 * definitions for IJ examples, can be replaced later when sstruct interface
169 * completely scalable
170 *--------------------------------------------------------------------------*/
171
172/*3D Laplace on a cube*/
173int BuildParLaplacian (int argc , char *argv [], double *system_size_ptr, HYPRE_ParCSRMatrix *A_ptr , HYPRE_ParVector *rhs_ptr , HYPRE_ParVector *x_ptr);
174/* Laplace type problem, with 27pt stencil on a cube */
175int BuildParLaplacian27pt (int argc , char *argv [], double *system_size_ptr, HYPRE_ParCSRMatrix *A_ptr , HYPRE_ParVector *rhs_ptr , HYPRE_ParVector *x_ptr);
176/* PDE with jumps on a cube */
177int BuildParVarDifConv (int argc , char *argv [], double *system_size_ptr, HYPRE_ParCSRMatrix *A_ptr , HYPRE_ParVector *rhs_ptr , HYPRE_ParVector *x_ptr);
178/* 2D PDE with rotated anisotropy */
179int BuildParRotate7pt (int argc , char *argv [], double *system_size_ptr , HYPRE_ParCSRMatrix *A_ptr , HYPRE_ParVector *rhs_ptr , HYPRE_ParVector *x_ptr);
180/* 2D PDE with 9pt stencil */
181int BuildParLaplacian9pt (int argc , char *argv [], double *system_size_ptr , HYPRE_ParCSRMatrix *A_ptr , HYPRE_ParVector *rhs_ptr , HYPRE_ParVector *x_ptr);
182
183
184/*--------------------------------------------------------------------------
185 * Compute new box based on variable type
186 *--------------------------------------------------------------------------*/
187
188int
189GetVariableBox( Index cell_ilower,
190 Index cell_iupper,
191 int int_vartype,
192 Index var_ilower,
193 Index var_iupper )
194{
195 int ierr = 0;
196 HYPRE_SStructVariable vartype = (HYPRE_SStructVariable) int_vartype;
197
198 var_ilower[0] = cell_ilower[0];
199 var_ilower[1] = cell_ilower[1];
200 var_ilower[2] = cell_ilower[2];
201 var_iupper[0] = cell_iupper[0];
202 var_iupper[1] = cell_iupper[1];
203 var_iupper[2] = cell_iupper[2];
204
205 switch(vartype)
206 {
207 case HYPRE_SSTRUCT_VARIABLE_CELL:
208 var_ilower[0] -= 0; var_ilower[1] -= 0; var_ilower[2] -= 0;
209 break;
210 case HYPRE_SSTRUCT_VARIABLE_NODE:
211 var_ilower[0] -= 1; var_ilower[1] -= 1; var_ilower[2] -= 1;
212 break;
213 case HYPRE_SSTRUCT_VARIABLE_XFACE:
214 var_ilower[0] -= 1; var_ilower[1] -= 0; var_ilower[2] -= 0;
215 break;
216 case HYPRE_SSTRUCT_VARIABLE_YFACE:
217 var_ilower[0] -= 0; var_ilower[1] -= 1; var_ilower[2] -= 0;
218 break;
219 case HYPRE_SSTRUCT_VARIABLE_ZFACE:
220 var_ilower[0] -= 0; var_ilower[1] -= 0; var_ilower[2] -= 1;
221 break;
222 case HYPRE_SSTRUCT_VARIABLE_XEDGE:
223 var_ilower[0] -= 0; var_ilower[1] -= 1; var_ilower[2] -= 1;
224 break;
225 case HYPRE_SSTRUCT_VARIABLE_YEDGE:
226 var_ilower[0] -= 1; var_ilower[1] -= 0; var_ilower[2] -= 1;
227 break;
228 case HYPRE_SSTRUCT_VARIABLE_ZEDGE:
229 var_ilower[0] -= 1; var_ilower[1] -= 1; var_ilower[2] -= 0;
230 break;
231 case HYPRE_SSTRUCT_VARIABLE_UNDEFINED:
232 break;
233 }
234
235 return ierr;
236}
237
238/*--------------------------------------------------------------------------
239 * Read routines
240 *--------------------------------------------------------------------------*/
241
242int
243SScanIntArray( char *sdata_ptr,
244 char **sdata_ptr_ptr,
245 int size,
246 int *array )
247{
248 int i;
249
250 sdata_ptr += strspn(sdata_ptr, " \t\n[");
251 for (i = 0; i < size; i++)
252 {
253 array[i] = strtol(sdata_ptr, &sdata_ptr, 10);
254 }
255 sdata_ptr += strcspn(sdata_ptr, "]") + 1;
256
257 *sdata_ptr_ptr = sdata_ptr;
258 return 0;
259}
260
261int
262SScanDblArray( char *sdata_ptr,
263 char **sdata_ptr_ptr,
264 int size,
265 double *array )
266{
267 int i;
268
269 sdata_ptr += strspn(sdata_ptr, " \t\n[");
270 for (i = 0; i < size; i++)
271 {
272 array[i] = strtod(sdata_ptr, &sdata_ptr);
273 }
274 sdata_ptr += strcspn(sdata_ptr, "]") + 1;
275
276 *sdata_ptr_ptr = sdata_ptr;
277 return 0;
278}
279
280int
281SScanProblemIndex( char *sdata_ptr,
282 char **sdata_ptr_ptr,
283 int ndim,
284 ProblemIndex index )
285{
286 int i;
287 char sign[3];
288
289 /* initialize index array */
290 for (i = 0; i < 9; i++)
291 {
292 index[i] = 0;
293 }
294
295 sdata_ptr += strspn(sdata_ptr, " \t\n(");
296 switch (ndim)
297 {
298 case 1:
299 sscanf(sdata_ptr, "%d%c",
300 &index[0], &sign[0]);
301 break;
302
303 case 2:
304 sscanf(sdata_ptr, "%d%c%d%c",
305 &index[0], &sign[0], &index[1], &sign[1]);
306 break;
307
308 case 3:
309 sscanf(sdata_ptr, "%d%c%d%c%d%c",
310 &index[0], &sign[0], &index[1], &sign[1], &index[2], &sign[2]);
311 break;
312 }
313 sdata_ptr += strcspn(sdata_ptr, ":)");
314 if ( *sdata_ptr == ':' )
315 {
316 /* read in optional shift */
317 sdata_ptr += 1;
318 switch (ndim)
319 {
320 case 1:
321 sscanf(sdata_ptr, "%d", &index[6]);
322 break;
323
324 case 2:
325 sscanf(sdata_ptr, "%d%d", &index[6], &index[7]);
326 break;
327
328 case 3:
329 sscanf(sdata_ptr, "%d%d%d", &index[6], &index[7], &index[8]);
330 break;
331 }
332 /* pre-shift the index */
333 for (i = 0; i < ndim; i++)
334 {
335 index[i] += index[i+6];
336 }
337 }
338 sdata_ptr += strcspn(sdata_ptr, ")") + 1;
339
340 for (i = 0; i < ndim; i++)
341 {
342 if (sign[i] == '+')
343 {
344 index[i+3] = 1;
345 }
346 }
347
348 *sdata_ptr_ptr = sdata_ptr;
349 return 0;
350}
351
352int
353ReadData( char *filename,
354 ProblemData *data_ptr )
355{
356 ProblemData data;
357 ProblemPartData pdata;
358
359 int myid;
360 FILE *file;
361
362 char *sdata = NULL;
363 char *sdata_line;
364 char *sdata_ptr;
365 int sdata_size;
366 int size;
367 int memchunk = 10000;
368 int maxline = 250;
369
370 char key[250];
371
372 int part, var, s, entry, i, il, iu;
373
374 /*-----------------------------------------------------------
375 * Read data file from process 0, then broadcast
376 *-----------------------------------------------------------*/
377
378 MPI_Comm_rank(MPI_COMM_WORLD, &myid);
379
380 if (myid == 0)
381 {
382 if ((file = fopen(filename, "r")) == NULL)
383 {
384 printf("Error: can't open input file %s\n", filename);
385 exit(1);
386 }
387
388 /* allocate initial space, and read first input line */
389 sdata_size = 0;
390 sdata = hypre_TAlloc(char, memchunk);
391 sdata_line = fgets(sdata, maxline, file);
392
393 s= 0;
394 while (sdata_line != NULL)
395 {
396 sdata_size += strlen(sdata_line) + 1;
397
398 /* allocate more space, if necessary */
399 if ((sdata_size + maxline) > s)
400 {
401 sdata = hypre_TReAlloc(sdata, char, (sdata_size + memchunk));
402 s= sdata_size + memchunk;
403 }
404
405 /* read the next input line */
406 sdata_line = fgets((sdata + sdata_size), maxline, file);
407 }
408 }
409
410 /* broadcast the data size */
411 MPI_Bcast(&sdata_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
412
413 /* broadcast the data */
414 sdata = hypre_TReAlloc(sdata, char, sdata_size);
415 MPI_Bcast(sdata, sdata_size, MPI_CHAR, 0, MPI_COMM_WORLD);
416
417 /*-----------------------------------------------------------
418 * Parse the data and fill ProblemData structure
419 *-----------------------------------------------------------*/
420
421 data.max_boxsize = 0;
422 data.symmetric_num = 0;
423 data.symmetric_parts = NULL;
424 data.symmetric_vars = NULL;
425 data.symmetric_to_vars = NULL;
426 data.symmetric_booleans = NULL;
427 data.ns_symmetric = 0;
428 data.ndists = 0;
429 data.dist_npools = NULL;
430 data.dist_pools = NULL;
431
432 sdata_line = sdata;
433 while (sdata_line < (sdata + sdata_size))
434 {
435 sdata_ptr = sdata_line;
436
437 if ( ( sscanf(sdata_ptr, "%s", key) > 0 ) && ( sdata_ptr[0] != '#' ) )
438 {
439 sdata_ptr += strcspn(sdata_ptr, " \t\n");
440
441 if ( strcmp(key, "GridCreate:") == 0 )
442 {
443 data.ndim = strtol(sdata_ptr, &sdata_ptr, 10);
444 data.nparts = strtol(sdata_ptr, &sdata_ptr, 10);
445 data.pdata = hypre_CTAlloc(ProblemPartData, data.nparts);
446 }
447 else if ( strcmp(key, "GridSetExtents:") == 0 )
448 {
449 part = strtol(sdata_ptr, &sdata_ptr, 10);
450 pdata = data.pdata[part];
451 if ((pdata.nboxes % 10) == 0)
452 {
453 size = pdata.nboxes + 10;
454 pdata.ilowers =
455 hypre_TReAlloc(pdata.ilowers, ProblemIndex, size);
456 pdata.iuppers =
457 hypre_TReAlloc(pdata.iuppers, ProblemIndex, size);
458 pdata.boxsizes =
459 hypre_TReAlloc(pdata.boxsizes, int, size);
460 }
461 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
462 pdata.ilowers[pdata.nboxes]);
463 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
464 pdata.iuppers[pdata.nboxes]);
465 /* check use of +- in GridSetExtents */
466 il = 1;
467 iu = 1;
468 for (i = 0; i < data.ndim; i++)
469 {
470 il *= pdata.ilowers[pdata.nboxes][i+3];
471 iu *= pdata.iuppers[pdata.nboxes][i+3];
472 }
473 if ( (il != 0) || (iu != 1) )
474 {
475 printf("Error: Invalid use of `+-' in GridSetExtents\n");
476 exit(1);
477 }
478 pdata.boxsizes[pdata.nboxes] = 1;
479 for (i = 0; i < 3; i++)
480 {
481 pdata.boxsizes[pdata.nboxes] *=
482 (pdata.iuppers[pdata.nboxes][i] -
483 pdata.ilowers[pdata.nboxes][i] + 2);
484 }
485 pdata.max_boxsize =
486 hypre_max(pdata.max_boxsize, pdata.boxsizes[pdata.nboxes]);
487 pdata.nboxes++;
488 data.pdata[part] = pdata;
489 }
490 else if ( strcmp(key, "GridSetVariables:") == 0 )
491 {
492 part = strtol(sdata_ptr, &sdata_ptr, 10);
493 pdata = data.pdata[part];
494 pdata.nvars = strtol(sdata_ptr, &sdata_ptr, 10);
495 pdata.vartypes = hypre_CTAlloc(HYPRE_SStructVariable, pdata.nvars);
496 SScanIntArray(sdata_ptr, &sdata_ptr,
497 pdata.nvars, (int *) pdata.vartypes);
498 data.pdata[part] = pdata;
499 }
500 else if ( strcmp(key, "GridAddVariables:") == 0 )
501 {
502 /* TODO */
503 printf("GridAddVariables not yet implemented!\n");
504 exit(1);
505 }
506 else if ( strcmp(key, "GridSetNeighborBox:") == 0 )
507 {
508 part = strtol(sdata_ptr, &sdata_ptr, 10);
509 pdata = data.pdata[part];
510 if ((pdata.glue_nboxes % 10) == 0)
511 {
512 size = pdata.glue_nboxes + 10;
513 pdata.glue_ilowers =
514 hypre_TReAlloc(pdata.glue_ilowers, ProblemIndex, size);
515 pdata.glue_iuppers =
516 hypre_TReAlloc(pdata.glue_iuppers, ProblemIndex, size);
517 pdata.glue_nbor_parts =
518 hypre_TReAlloc(pdata.glue_nbor_parts, int, size);
519 pdata.glue_nbor_ilowers =
520 hypre_TReAlloc(pdata.glue_nbor_ilowers, ProblemIndex, size);
521 pdata.glue_nbor_iuppers =
522 hypre_TReAlloc(pdata.glue_nbor_iuppers, ProblemIndex, size);
523 pdata.glue_index_maps =
524 hypre_TReAlloc(pdata.glue_index_maps, Index, size);
525 pdata.glue_primaries =
526 hypre_TReAlloc(pdata.glue_primaries, int, size);
527 }
528 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
529 pdata.glue_ilowers[pdata.glue_nboxes]);
530 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
531 pdata.glue_iuppers[pdata.glue_nboxes]);
532 pdata.glue_nbor_parts[pdata.glue_nboxes] =
533 strtol(sdata_ptr, &sdata_ptr, 10);
534 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
535 pdata.glue_nbor_ilowers[pdata.glue_nboxes]);
536 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
537 pdata.glue_nbor_iuppers[pdata.glue_nboxes]);
538 SScanIntArray(sdata_ptr, &sdata_ptr, data.ndim,
539 pdata.glue_index_maps[pdata.glue_nboxes]);
540 for (i = data.ndim; i < 3; i++)
541 {
542 pdata.glue_index_maps[pdata.glue_nboxes][i] = i;
543 }
544 sdata_ptr += strcspn(sdata_ptr, ":\t\n");
545 if ( *sdata_ptr == ':' )
546 {
547 /* read in optional primary indicator */
548 sdata_ptr += 1;
549 pdata.glue_primaries[pdata.glue_nboxes] =
550 strtol(sdata_ptr, &sdata_ptr, 10);
551 }
552 else
553 {
554 pdata.glue_primaries[pdata.glue_nboxes] = -1;
555 sdata_ptr -= 1;
556 }
557 pdata.glue_nboxes++;
558 data.pdata[part] = pdata;
559 }
560 else if ( strcmp(key, "GridSetPeriodic:") == 0 )
561 {
562 part = strtol(sdata_ptr, &sdata_ptr, 10);
563 pdata = data.pdata[part];
564 SScanIntArray(sdata_ptr, &sdata_ptr, data.ndim, pdata.periodic);
565 for (i = data.ndim; i < 3; i++)
566 {
567 pdata.periodic[i] = 0;
568 }
569 data.pdata[part] = pdata;
570 }
571 else if ( strcmp(key, "StencilCreate:") == 0 )
572 {
573 data.nstencils = strtol(sdata_ptr, &sdata_ptr, 10);
574 data.stencil_sizes = hypre_CTAlloc(int, data.nstencils);
575 data.stencil_offsets = hypre_CTAlloc(Index *, data.nstencils);
576 data.stencil_vars = hypre_CTAlloc(int *, data.nstencils);
577 data.stencil_values = hypre_CTAlloc(double *, data.nstencils);
578 SScanIntArray(sdata_ptr, &sdata_ptr,
579 data.nstencils, data.stencil_sizes);
580 for (s = 0; s < data.nstencils; s++)
581 {
582 data.stencil_offsets[s] =
583 hypre_CTAlloc(Index, data.stencil_sizes[s]);
584 data.stencil_vars[s] =
585 hypre_CTAlloc(int, data.stencil_sizes[s]);
586 data.stencil_values[s] =
587 hypre_CTAlloc(double, data.stencil_sizes[s]);
588 }
589 }
590 else if ( strcmp(key, "StencilSetEntry:") == 0 )
591 {
592 s = strtol(sdata_ptr, &sdata_ptr, 10);
593 entry = strtol(sdata_ptr, &sdata_ptr, 10);
594 SScanIntArray(sdata_ptr, &sdata_ptr,
595 data.ndim, data.stencil_offsets[s][entry]);
596 for (i = data.ndim; i < 3; i++)
597 {
598 data.stencil_offsets[s][entry][i] = 0;
599 }
600 data.stencil_vars[s][entry] = strtol(sdata_ptr, &sdata_ptr, 10);
601 data.stencil_values[s][entry] = strtod(sdata_ptr, &sdata_ptr);
602 }
603 else if ( strcmp(key, "GraphSetStencil:") == 0 )
604 {
605 part = strtol(sdata_ptr, &sdata_ptr, 10);
606 var = strtol(sdata_ptr, &sdata_ptr, 10);
607 s = strtol(sdata_ptr, &sdata_ptr, 10);
608 pdata = data.pdata[part];
609 if (pdata.stencil_num == NULL)
610 {
611 pdata.stencil_num = hypre_CTAlloc(int, pdata.nvars);
612 }
613 pdata.stencil_num[var] = s;
614 data.pdata[part] = pdata;
615 }
616 else if ( strcmp(key, "GraphAddEntries:") == 0 )
617 {
618 part = strtol(sdata_ptr, &sdata_ptr, 10);
619 pdata = data.pdata[part];
620 if ((pdata.graph_nboxes % 10) == 0)
621 {
622 size = pdata.graph_nboxes + 10;
623 pdata.graph_ilowers =
624 hypre_TReAlloc(pdata.graph_ilowers, ProblemIndex, size);
625 pdata.graph_iuppers =
626 hypre_TReAlloc(pdata.graph_iuppers, ProblemIndex, size);
627 pdata.graph_strides =
628 hypre_TReAlloc(pdata.graph_strides, Index, size);
629 pdata.graph_vars =
630 hypre_TReAlloc(pdata.graph_vars, int, size);
631 pdata.graph_to_parts =
632 hypre_TReAlloc(pdata.graph_to_parts, int, size);
633 pdata.graph_to_ilowers =
634 hypre_TReAlloc(pdata.graph_to_ilowers, ProblemIndex, size);
635 pdata.graph_to_iuppers =
636 hypre_TReAlloc(pdata.graph_to_iuppers, ProblemIndex, size);
637 pdata.graph_to_strides =
638 hypre_TReAlloc(pdata.graph_to_strides, Index, size);
639 pdata.graph_to_vars =
640 hypre_TReAlloc(pdata.graph_to_vars, int, size);
641 pdata.graph_index_maps =
642 hypre_TReAlloc(pdata.graph_index_maps, Index, size);
643 pdata.graph_index_signs =
644 hypre_TReAlloc(pdata.graph_index_signs, Index, size);
645 pdata.graph_entries =
646 hypre_TReAlloc(pdata.graph_entries, int, size);
647 pdata.graph_values =
648 hypre_TReAlloc(pdata.graph_values, double, size);
649 pdata.graph_boxsizes =
650 hypre_TReAlloc(pdata.graph_boxsizes, int, size);
651 }
652 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
653 pdata.graph_ilowers[pdata.graph_nboxes]);
654 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
655 pdata.graph_iuppers[pdata.graph_nboxes]);
656 SScanIntArray(sdata_ptr, &sdata_ptr, data.ndim,
657 pdata.graph_strides[pdata.graph_nboxes]);
658 for (i = data.ndim; i < 3; i++)
659 {
660 pdata.graph_strides[pdata.graph_nboxes][i] = 1;
661 }
662 pdata.graph_vars[pdata.graph_nboxes] =
663 strtol(sdata_ptr, &sdata_ptr, 10);
664 pdata.graph_to_parts[pdata.graph_nboxes] =
665 strtol(sdata_ptr, &sdata_ptr, 10);
666 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
667 pdata.graph_to_ilowers[pdata.graph_nboxes]);
668 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
669 pdata.graph_to_iuppers[pdata.graph_nboxes]);
670 SScanIntArray(sdata_ptr, &sdata_ptr, data.ndim,
671 pdata.graph_to_strides[pdata.graph_nboxes]);
672 for (i = data.ndim; i < 3; i++)
673 {
674 pdata.graph_to_strides[pdata.graph_nboxes][i] = 1;
675 }
676 pdata.graph_to_vars[pdata.graph_nboxes] =
677 strtol(sdata_ptr, &sdata_ptr, 10);
678 SScanIntArray(sdata_ptr, &sdata_ptr, data.ndim,
679 pdata.graph_index_maps[pdata.graph_nboxes]);
680 for (i = data.ndim; i < 3; i++)
681 {
682 pdata.graph_index_maps[pdata.graph_nboxes][i] = i;
683 }
684 for (i = 0; i < 3; i++)
685 {
686 pdata.graph_index_signs[pdata.graph_nboxes][i] = 1;
687 if ( pdata.graph_to_iuppers[pdata.graph_nboxes][i] <
688 pdata.graph_to_ilowers[pdata.graph_nboxes][i] )
689 {
690 pdata.graph_index_signs[pdata.graph_nboxes][i] = -1;
691 }
692 }
693 pdata.graph_entries[pdata.graph_nboxes] =
694 strtol(sdata_ptr, &sdata_ptr, 10);
695 pdata.graph_values[pdata.graph_nboxes] =
696 strtod(sdata_ptr, &sdata_ptr);
697 pdata.graph_boxsizes[pdata.graph_nboxes] = 1;
698 for (i = 0; i < 3; i++)
699 {
700 pdata.graph_boxsizes[pdata.graph_nboxes] *=
701 (pdata.graph_iuppers[pdata.graph_nboxes][i] -
702 pdata.graph_ilowers[pdata.graph_nboxes][i] + 1);
703 }
704 pdata.graph_nboxes++;
705 data.pdata[part] = pdata;
706 }
707 else if ( strcmp(key, "MatrixSetSymmetric:") == 0 )
708 {
709 if ((data.symmetric_num % 10) == 0)
710 {
711 size = data.symmetric_num + 10;
712 data.symmetric_parts =
713 hypre_TReAlloc(data.symmetric_parts, int, size);
714 data.symmetric_vars =
715 hypre_TReAlloc(data.symmetric_vars, int, size);
716 data.symmetric_to_vars =
717 hypre_TReAlloc(data.symmetric_to_vars, int, size);
718 data.symmetric_booleans =
719 hypre_TReAlloc(data.symmetric_booleans, int, size);
720 }
721 data.symmetric_parts[data.symmetric_num] =
722 strtol(sdata_ptr, &sdata_ptr, 10);
723 data.symmetric_vars[data.symmetric_num] =
724 strtol(sdata_ptr, &sdata_ptr, 10);
725 data.symmetric_to_vars[data.symmetric_num] =
726 strtol(sdata_ptr, &sdata_ptr, 10);
727 data.symmetric_booleans[data.symmetric_num] =
728 strtol(sdata_ptr, &sdata_ptr, 10);
729 data.symmetric_num++;
730 }
731 else if ( strcmp(key, "MatrixSetNSSymmetric:") == 0 )
732 {
733 data.ns_symmetric = strtol(sdata_ptr, &sdata_ptr, 10);
734 }
735 else if ( strcmp(key, "MatrixSetValues:") == 0 )
736 {
737 part = strtol(sdata_ptr, &sdata_ptr, 10);
738 pdata = data.pdata[part];
739 if ((pdata.matset_nboxes % 10) == 0)
740 {
741 size = pdata.matset_nboxes + 10;
742 pdata.matset_ilowers =
743 hypre_TReAlloc(pdata.matset_ilowers, ProblemIndex, size);
744 pdata.matset_iuppers =
745 hypre_TReAlloc(pdata.matset_iuppers, ProblemIndex, size);
746 pdata.matset_strides =
747 hypre_TReAlloc(pdata.matset_strides, Index, size);
748 pdata.matset_vars =
749 hypre_TReAlloc(pdata.matset_vars, int, size);
750 pdata.matset_entries =
751 hypre_TReAlloc(pdata.matset_entries, int, size);
752 pdata.matset_values =
753 hypre_TReAlloc(pdata.matset_values, double, size);
754 }
755 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
756 pdata.matset_ilowers[pdata.matset_nboxes]);
757 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
758 pdata.matset_iuppers[pdata.matset_nboxes]);
759 SScanIntArray(sdata_ptr, &sdata_ptr, data.ndim,
760 pdata.matset_strides[pdata.matset_nboxes]);
761 for (i = data.ndim; i < 3; i++)
762 {
763 pdata.matset_strides[pdata.matset_nboxes][i] = 1;
764 }
765 pdata.matset_vars[pdata.matset_nboxes] =
766 strtol(sdata_ptr, &sdata_ptr, 10);
767 pdata.matset_entries[pdata.matset_nboxes] =
768 strtol(sdata_ptr, &sdata_ptr, 10);
769 pdata.matset_values[pdata.matset_nboxes] =
770 strtod(sdata_ptr, &sdata_ptr);
771 pdata.matset_nboxes++;
772 data.pdata[part] = pdata;
773 }
774 else if ( strcmp(key, "MatrixAddToValues:") == 0 )
775 {
776 part = strtol(sdata_ptr, &sdata_ptr, 10);
777 pdata = data.pdata[part];
778 if ((pdata.matadd_nboxes% 10) == 0)
779 {
780 size = pdata.matadd_nboxes+10;
781 pdata.matadd_ilowers=
782 hypre_TReAlloc(pdata.matadd_ilowers, ProblemIndex, size);
783 pdata.matadd_iuppers=
784 hypre_TReAlloc(pdata.matadd_iuppers, ProblemIndex, size);
785 pdata.matadd_vars=
786 hypre_TReAlloc(pdata.matadd_vars, int, size);
787 pdata.matadd_nentries=
788 hypre_TReAlloc(pdata.matadd_nentries, int, size);
789 pdata.matadd_entries=
790 hypre_TReAlloc(pdata.matadd_entries, int *, size);
791 pdata.matadd_values=
792 hypre_TReAlloc(pdata.matadd_values, double *, size);
793 }
794 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
795 pdata.matadd_ilowers[pdata.matadd_nboxes]);
796 SScanProblemIndex(sdata_ptr, &sdata_ptr, data.ndim,
797 pdata.matadd_iuppers[pdata.matadd_nboxes]);
798 pdata.matadd_vars[pdata.matadd_nboxes]=
799 strtol(sdata_ptr, &sdata_ptr, 10);
800 i= strtol(sdata_ptr, &sdata_ptr, 10);
801 pdata.matadd_nentries[pdata.matadd_nboxes]= i;
802 pdata.matadd_entries[pdata.matadd_nboxes] =
803 hypre_TAlloc(int, i);
804 SScanIntArray(sdata_ptr, &sdata_ptr, i,
805 (int*) pdata.matadd_entries[pdata.matadd_nboxes]);
806 pdata.matadd_values[pdata.matadd_nboxes] =
807 hypre_TAlloc(double, i);
808 SScanDblArray(sdata_ptr, &sdata_ptr, i,
809 (double *) pdata.matadd_values[pdata.matadd_nboxes]);
810 pdata.matadd_nboxes++;
811 data.pdata[part] = pdata;
812 }
813 else if ( strcmp(key, "ProcessPoolCreate:") == 0 )
814 {
815 data.ndists++;
816 data.dist_npools= hypre_TReAlloc(data.dist_npools, int, data.ndists);
817 data.dist_pools= hypre_TReAlloc(data.dist_pools, int *, data.ndists);
818 data.dist_npools[data.ndists-1] = strtol(sdata_ptr, &sdata_ptr, 10);
819 data.dist_pools[data.ndists-1] = hypre_CTAlloc(int, data.nparts);
820#if 0
821 data.npools = strtol(sdata_ptr, &sdata_ptr, 10);
822 data.pools = hypre_CTAlloc(int, data.nparts);
823#endif
824 }
825 else if ( strcmp(key, "ProcessPoolSetPart:") == 0 )
826 {
827 i = strtol(sdata_ptr, &sdata_ptr, 10);
828 part = strtol(sdata_ptr, &sdata_ptr, 10);
829 data.dist_pools[data.ndists-1][part] = i;
830 /*data.pools[part] = i; */
831 }
832 }
833
834 sdata_line += strlen(sdata_line) + 1;
835 }
836
837 data.max_boxsize = 0;
838 for (part = 0; part < data.nparts; part++)
839 {
840 data.max_boxsize =
841 hypre_max(data.max_boxsize, data.pdata[part].max_boxsize);
842 }
843
844 hypre_TFree(sdata);
845
846 *data_ptr = data;
847 return 0;
848}
849
850/*--------------------------------------------------------------------------
851 * Distribute routines
852 *--------------------------------------------------------------------------*/
853
854int
855MapProblemIndex( ProblemIndex index,
856 Index m )
857{
858 /* un-shift the index */
859 index[0] -= index[6];
860 index[1] -= index[7];
861 index[2] -= index[8];
862 /* map the index */
863 index[0] = m[0]*index[0] + (m[0]-1)*index[3];
864 index[1] = m[1]*index[1] + (m[1]-1)*index[4];
865 index[2] = m[2]*index[2] + (m[2]-1)*index[5];
866 /* pre-shift the new mapped index */
867 index[0] += index[6];
868 index[1] += index[7];
869 index[2] += index[8];
870
871 return 0;
872}
873
874int
875IntersectBoxes( ProblemIndex ilower1,
876 ProblemIndex iupper1,
877 ProblemIndex ilower2,
878 ProblemIndex iupper2,
879 ProblemIndex int_ilower,
880 ProblemIndex int_iupper )
881{
882 int d, size;
883
884 size = 1;
885 for (d = 0; d < 3; d++)
886 {
887 int_ilower[d] = hypre_max(ilower1[d], ilower2[d]);
888 int_iupper[d] = hypre_min(iupper1[d], iupper2[d]);
889 size *= hypre_max(0, (int_iupper[d] - int_ilower[d] + 1));
890 }
891
892 return size;
893}
894
895int
896DistributeData( ProblemData global_data,
897 int pooldist,
898 Index *refine,
899 Index *distribute,
900 Index *block,
901 int num_procs,
902 int myid,
903 ProblemData *data_ptr )
904{
905 ProblemData data = global_data;
906 ProblemPartData pdata;
907 int *pool_procs;
908 int np, pid;
909 int pool, part, box, b, p, q, r, i, d;
910 int dmap, sign, size;
911 Index m, mmap, n;
912 ProblemIndex ilower, iupper, int_ilower, int_iupper;
913
914 /* set default pool distribution */
915 data.npools = data.dist_npools[pooldist];
916 data.pools = data.dist_pools[pooldist];
917
918 /* determine first process number in each pool */
919 pool_procs = hypre_CTAlloc(int, (data.npools+1));
920 for (part = 0; part < data.nparts; part++)
921 {
922 pool = data.pools[part] + 1;
923 np = distribute[part][0] * distribute[part][1] * distribute[part][2];
924 pool_procs[pool] = hypre_max(pool_procs[pool], np);
925
926 }
927 pool_procs[0] = 0;
928 for (pool = 1; pool < (data.npools + 1); pool++)
929 {
930 pool_procs[pool] = pool_procs[pool - 1] + pool_procs[pool];
931 }
932
933 /* check number of processes */
934 if (pool_procs[data.npools] != num_procs)
935 {
936 printf("Error: Invalid number of processes or process topology\n");
937 exit(1);
938 }
939
940 /* modify part data */
941 for (part = 0; part < data.nparts; part++)
942 {
943 pdata = data.pdata[part];
944 pool = data.pools[part];
945 np = distribute[part][0] * distribute[part][1] * distribute[part][2];
946 pid = myid - pool_procs[pool];
947
948 if ( (pid < 0) || (pid >= np) )
949 {
950 /* none of this part data lives on this process */
951 pdata.nboxes = 0;
952 pdata.glue_nboxes = 0;
953 pdata.graph_nboxes = 0;
954 pdata.matset_nboxes = 0;
955 pdata.matadd_nboxes = 0;
956 }
957 else
958 {
959 /* refine boxes */
960 m[0] = refine[part][0];
961 m[1] = refine[part][1];
962 m[2] = refine[part][2];
963 if ( (m[0] * m[1] * m[2]) > 1)
964 {
965 for (box = 0; box < pdata.nboxes; box++)
966 {
967 MapProblemIndex(pdata.ilowers[box], m);
968 MapProblemIndex(pdata.iuppers[box], m);
969 }
970
971 for (box = 0; box < pdata.graph_nboxes; box++)
972 {
973 MapProblemIndex(pdata.graph_ilowers[box], m);
974 MapProblemIndex(pdata.graph_iuppers[box], m);
975 mmap[0] = m[pdata.graph_index_maps[box][0]];
976 mmap[1] = m[pdata.graph_index_maps[box][1]];
977 mmap[2] = m[pdata.graph_index_maps[box][2]];
978 MapProblemIndex(pdata.graph_to_ilowers[box], mmap);
979 MapProblemIndex(pdata.graph_to_iuppers[box], mmap);
980 }
981 for (box = 0; box < pdata.matset_nboxes; box++)
982 {
983 MapProblemIndex(pdata.matset_ilowers[box], m);
984 MapProblemIndex(pdata.matset_iuppers[box], m);
985 }
986 for (box = 0; box < pdata.matadd_nboxes; box++)
987 {
988 MapProblemIndex(pdata.matadd_ilowers[box], m);
989 MapProblemIndex(pdata.matadd_iuppers[box], m);
990 }
991 }
992
993 /* refine and distribute boxes */
994 m[0] = distribute[part][0];
995 m[1] = distribute[part][1];
996 m[2] = distribute[part][2];
997 if ( (m[0] * m[1] * m[2]) > 1)
998 {
999 p = pid % m[0];
1000 q = ((pid - p) / m[0]) % m[1];
1001 r = (pid - p - q*m[0]) / (m[0]*m[1]);
1002
1003 for (box = 0; box < pdata.nboxes; box++)
1004 {
1005 n[0] = pdata.iuppers[box][0] - pdata.ilowers[box][0] + 1;
1006 n[1] = pdata.iuppers[box][1] - pdata.ilowers[box][1] + 1;
1007 n[2] = pdata.iuppers[box][2] - pdata.ilowers[box][2] + 1;
1008
1009 MapProblemIndex(pdata.ilowers[box], m);
1010 MapProblemIndex(pdata.iuppers[box], m);
1011 pdata.iuppers[box][0] = pdata.ilowers[box][0] + n[0] - 1;
1012 pdata.iuppers[box][1] = pdata.ilowers[box][1] + n[1] - 1;
1013 pdata.iuppers[box][2] = pdata.ilowers[box][2] + n[2] - 1;
1014
1015 pdata.ilowers[box][0] = pdata.ilowers[box][0] + p*n[0];
1016 pdata.ilowers[box][1] = pdata.ilowers[box][1] + q*n[1];
1017 pdata.ilowers[box][2] = pdata.ilowers[box][2] + r*n[2];
1018 pdata.iuppers[box][0] = pdata.iuppers[box][0] + p*n[0];
1019 pdata.iuppers[box][1] = pdata.iuppers[box][1] + q*n[1];
1020 pdata.iuppers[box][2] = pdata.iuppers[box][2] + r*n[2];
1021 }
1022
1023 i = 0;
1024 for (box = 0; box < pdata.graph_nboxes; box++)
1025 {
1026 MapProblemIndex(pdata.graph_ilowers[box], m);
1027 MapProblemIndex(pdata.graph_iuppers[box], m);
1028 mmap[0] = m[pdata.graph_index_maps[box][0]];
1029 mmap[1] = m[pdata.graph_index_maps[box][1]];
1030 mmap[2] = m[pdata.graph_index_maps[box][2]];
1031 MapProblemIndex(pdata.graph_to_ilowers[box], mmap);
1032 MapProblemIndex(pdata.graph_to_iuppers[box], mmap);
1033
1034 for (b = 0; b < pdata.nboxes; b++)
1035 {
1036 /* first convert the box extents based on vartype */
1037 GetVariableBox(pdata.ilowers[b], pdata.iuppers[b],
1038 pdata.vartypes[pdata.graph_vars[box]],
1039 ilower, iupper);
1040 size = IntersectBoxes(pdata.graph_ilowers[box],
1041 pdata.graph_iuppers[box],
1042 ilower, iupper,
1043 int_ilower, int_iupper);
1044 if (size > 0)
1045 {
1046 /* if there is an intersection, it is the only one */
1047 for (d = 0; d < 3; d++)
1048 {
1049 dmap = pdata.graph_index_maps[box][d];
1050 sign = pdata.graph_index_signs[box][d];
1051 pdata.graph_to_ilowers[i][dmap] =
1052 pdata.graph_to_ilowers[box][dmap] +
1053 sign * pdata.graph_to_strides[box][d] *
1054 ((int_ilower[d] - pdata.graph_ilowers[box][d]) /
1055 pdata.graph_strides[box][d]);
1056 pdata.graph_to_iuppers[i][dmap] =
1057 pdata.graph_to_iuppers[box][dmap] +
1058 sign * pdata.graph_to_strides[box][d] *
1059 ((int_iupper[d] - pdata.graph_iuppers[box][d]) /
1060 pdata.graph_strides[box][d]);
1061 pdata.graph_ilowers[i][d] = int_ilower[d];
1062 pdata.graph_iuppers[i][d] = int_iupper[d];
1063 pdata.graph_strides[i][d] =
1064 pdata.graph_strides[box][d];
1065 pdata.graph_to_strides[i][d] =
1066 pdata.graph_to_strides[box][d];
1067 pdata.graph_index_maps[i][d] = dmap;
1068 pdata.graph_index_signs[i][d] = sign;
1069 }
1070 for (d = 3; d < 9; d++)
1071 {
1072 pdata.graph_ilowers[i][d] =
1073 pdata.graph_ilowers[box][d];
1074 pdata.graph_iuppers[i][d] =
1075 pdata.graph_iuppers[box][d];
1076 pdata.graph_to_ilowers[i][d] =
1077 pdata.graph_to_ilowers[box][d];
1078 pdata.graph_to_iuppers[i][d] =
1079 pdata.graph_to_iuppers[box][d];
1080 }
1081 pdata.graph_vars[i] = pdata.graph_vars[box];
1082 pdata.graph_to_parts[i] = pdata.graph_to_parts[box];
1083 pdata.graph_to_vars[i] = pdata.graph_to_vars[box];
1084 pdata.graph_entries[i] = pdata.graph_entries[box];
1085 pdata.graph_values[i] = pdata.graph_values[box];
1086 i++;
1087 break;
1088 }
1089 }
1090 }
1091 pdata.graph_nboxes = i;
1092
1093 i = 0;
1094 for (box = 0; box < pdata.matset_nboxes; box++)
1095 {
1096 MapProblemIndex(pdata.matset_ilowers[box], m);
1097 MapProblemIndex(pdata.matset_iuppers[box], m);
1098
1099 for (b = 0; b < pdata.nboxes; b++)
1100 {
1101 /* first convert the box extents based on vartype */
1102 GetVariableBox(pdata.ilowers[b], pdata.iuppers[b],
1103 pdata.vartypes[pdata.matset_vars[box]],
1104 ilower, iupper);
1105 size = IntersectBoxes(pdata.matset_ilowers[box],
1106 pdata.matset_iuppers[box],
1107 ilower, iupper,
1108 int_ilower, int_iupper);
1109 if (size > 0)
1110 {
1111 /* if there is an intersection, it is the only one */
1112 for (d = 0; d < 3; d++)
1113 {
1114 pdata.matset_ilowers[i][d] = int_ilower[d];
1115 pdata.matset_iuppers[i][d] = int_iupper[d];
1116 pdata.matset_strides[i][d] =
1117 pdata.matset_strides[box][d];
1118 }
1119 for (d = 3; d < 9; d++)
1120 {
1121 pdata.matset_ilowers[i][d] =
1122 pdata.matset_ilowers[box][d];
1123 pdata.matset_iuppers[i][d] =
1124 pdata.matset_iuppers[box][d];
1125 }
1126 pdata.matset_vars[i] = pdata.matset_vars[box];
1127 pdata.matset_entries[i] = pdata.matset_entries[box];
1128 pdata.matset_values[i] = pdata.matset_values[box];
1129 i++;
1130 break;
1131 }
1132 }
1133 }
1134 pdata.matset_nboxes = i;
1135
1136 i = 0;
1137 for (box = 0; box < pdata.matadd_nboxes; box++)
1138 {
1139 MapProblemIndex(pdata.matadd_ilowers[box], m);
1140 MapProblemIndex(pdata.matadd_iuppers[box], m);
1141
1142 for (b = 0; b < pdata.nboxes; b++)
1143 {
1144 /* first convert the box extents based on vartype */
1145 GetVariableBox(pdata.ilowers[b], pdata.iuppers[b],
1146 pdata.vartypes[pdata.matadd_vars[box]],
1147 ilower, iupper);
1148 size = IntersectBoxes(pdata.matadd_ilowers[box],
1149 pdata.matadd_iuppers[box],
1150 ilower, iupper,
1151 int_ilower, int_iupper);
1152 if (size > 0)
1153 {
1154 /* if there is an intersection, it is the only one */
1155 for (d = 0; d < 3; d++)
1156 {
1157 pdata.matadd_ilowers[i][d] = int_ilower[d];
1158 pdata.matadd_iuppers[i][d] = int_iupper[d];
1159 }
1160 for (d = 3; d < 9; d++)
1161 {
1162 pdata.matadd_ilowers[i][d] =
1163 pdata.matadd_ilowers[box][d];
1164 pdata.matadd_iuppers[i][d] =
1165 pdata.matadd_iuppers[box][d];
1166 }
1167 pdata.matadd_vars[i] = pdata.matadd_vars[box];
1168 pdata.matadd_entries[i] = pdata.matadd_entries[box];
1169 pdata.matadd_values[i] = pdata.matadd_values[box];
1170 i++;
1171 break;
1172 }
1173 }
1174 }
1175 pdata.matadd_nboxes = i;
1176 }
1177
1178 /* refine and block boxes */
1179 m[0] = block[part][0];
1180 m[1] = block[part][1];
1181 m[2] = block[part][2];
1182 if ( (m[0] * m[1] * m[2]) > 1)
1183 {
1184 pdata.ilowers = hypre_TReAlloc(pdata.ilowers, ProblemIndex,
1185 m[0]*m[1]*m[2]*pdata.nboxes);
1186 pdata.iuppers = hypre_TReAlloc(pdata.iuppers, ProblemIndex,
1187 m[0]*m[1]*m[2]*pdata.nboxes);
1188 pdata.boxsizes = hypre_TReAlloc(pdata.boxsizes, int,
1189 m[0]*m[1]*m[2]*pdata.nboxes);
1190 for (box = 0; box < pdata.nboxes; box++)
1191 {
1192 n[0] = pdata.iuppers[box][0] - pdata.ilowers[box][0] + 1;
1193 n[1] = pdata.iuppers[box][1] - pdata.ilowers[box][1] + 1;
1194 n[2] = pdata.iuppers[box][2] - pdata.ilowers[box][2] + 1;
1195
1196 MapProblemIndex(pdata.ilowers[box], m);
1197
1198 MapProblemIndex(pdata.iuppers[box], m);
1199 pdata.iuppers[box][0] = pdata.ilowers[box][0] + n[0] - 1;
1200 pdata.iuppers[box][1] = pdata.ilowers[box][1] + n[1] - 1;
1201 pdata.iuppers[box][2] = pdata.ilowers[box][2] + n[2] - 1;
1202
1203 i = box;
1204 for (r = 0; r < m[2]; r++)
1205 {
1206 for (q = 0; q < m[1]; q++)
1207 {
1208 for (p = 0; p < m[0]; p++)
1209 {
1210 pdata.ilowers[i][0] = pdata.ilowers[box][0] + p*n[0];
1211 pdata.ilowers[i][1] = pdata.ilowers[box][1] + q*n[1];
1212 pdata.ilowers[i][2] = pdata.ilowers[box][2] + r*n[2];
1213 pdata.iuppers[i][0] = pdata.iuppers[box][0] + p*n[0];
1214 pdata.iuppers[i][1] = pdata.iuppers[box][1] + q*n[1];
1215 pdata.iuppers[i][2] = pdata.iuppers[box][2] + r*n[2];
1216 for (d = 3; d < 9; d++)
1217 {
1218 pdata.ilowers[i][d] = pdata.ilowers[box][d];
1219 pdata.iuppers[i][d] = pdata.iuppers[box][d];
1220 }
1221 i += pdata.nboxes;
1222 }
1223 }
1224 }
1225 }
1226 pdata.nboxes *= m[0]*m[1]*m[2];
1227
1228 for (box = 0; box < pdata.graph_nboxes; box++)
1229 {
1230 MapProblemIndex(pdata.graph_ilowers[box], m);
1231 MapProblemIndex(pdata.graph_iuppers[box], m);
1232 mmap[0] = m[pdata.graph_index_maps[box][0]];
1233 mmap[1] = m[pdata.graph_index_maps[box][1]];
1234 mmap[2] = m[pdata.graph_index_maps[box][2]];
1235 MapProblemIndex(pdata.graph_to_ilowers[box], mmap);
1236 MapProblemIndex(pdata.graph_to_iuppers[box], mmap);
1237 }
1238 for (box = 0; box < pdata.matset_nboxes; box++)
1239 {
1240 MapProblemIndex(pdata.matset_ilowers[box], m);
1241 MapProblemIndex(pdata.matset_iuppers[box], m);
1242 }
1243 for (box = 0; box < pdata.matadd_nboxes; box++)
1244 {
1245 MapProblemIndex(pdata.matadd_ilowers[box], m);
1246 MapProblemIndex(pdata.matadd_iuppers[box], m);
1247 }
1248 }
1249
1250 /* map remaining ilowers & iuppers */
1251 m[0] = refine[part][0] * block[part][0] * distribute[part][0];
1252 m[1] = refine[part][1] * block[part][1] * distribute[part][1];
1253 m[2] = refine[part][2] * block[part][2] * distribute[part][2];
1254 if ( (m[0] * m[1] * m[2]) > 1)
1255 {
1256 for (box = 0; box < pdata.glue_nboxes; box++)
1257 {
1258 MapProblemIndex(pdata.glue_ilowers[box], m);
1259 MapProblemIndex(pdata.glue_iuppers[box], m);
1260 mmap[0] = m[pdata.glue_index_maps[box][0]];
1261 mmap[1] = m[pdata.glue_index_maps[box][1]];
1262 mmap[2] = m[pdata.glue_index_maps[box][2]];
1263 MapProblemIndex(pdata.glue_nbor_ilowers[box], mmap);
1264 MapProblemIndex(pdata.glue_nbor_iuppers[box], mmap);
1265 }
1266 }
1267
1268 /* compute box sizes, etc. */
1269 pdata.max_boxsize = 0;
1270 for (box = 0; box < pdata.nboxes; box++)
1271 {
1272 pdata.boxsizes[box] = 1;
1273 for (i = 0; i < 3; i++)
1274 {
1275 pdata.boxsizes[box] *=
1276 (pdata.iuppers[box][i] - pdata.ilowers[box][i] + 2);
1277 }
1278 pdata.max_boxsize =
1279 hypre_max(pdata.max_boxsize, pdata.boxsizes[box]);
1280 }
1281 for (box = 0; box < pdata.graph_nboxes; box++)
1282 {
1283 pdata.graph_boxsizes[box] = 1;
1284 for (i = 0; i < 3; i++)
1285 {
1286 pdata.graph_boxsizes[box] *=
1287 (pdata.graph_iuppers[box][i] -
1288 pdata.graph_ilowers[box][i] + 1);
1289 }
1290 }
1291 for (box = 0; box < pdata.matset_nboxes; box++)
1292 {
1293 size = 1;
1294 for (i = 0; i < 3; i++)
1295 {
1296 size*= (pdata.matset_iuppers[box][i] -
1297 pdata.matset_ilowers[box][i] + 1);
1298 }
1299 pdata.max_boxsize = hypre_max(pdata.max_boxsize, size);
1300 }
1301 for (box = 0; box < pdata.matadd_nboxes; box++)
1302 {
1303 size = 1;
1304 for (i = 0; i < 3; i++)
1305 {
1306 size*= (pdata.matadd_iuppers[box][i] -
1307 pdata.matadd_ilowers[box][i] + 1);
1308 }
1309 pdata.max_boxsize = hypre_max(pdata.max_boxsize, size);
1310 }
1311 }
1312
1313 if (pdata.nboxes == 0)
1314 {
1315 hypre_TFree(pdata.ilowers);
1316 hypre_TFree(pdata.iuppers);
1317 hypre_TFree(pdata.boxsizes);
1318 pdata.max_boxsize = 0;
1319 }
1320
1321 if (pdata.glue_nboxes == 0)
1322 {
1323 hypre_TFree(pdata.glue_ilowers);
1324 hypre_TFree(pdata.glue_iuppers);
1325 hypre_TFree(pdata.glue_nbor_parts);
1326 hypre_TFree(pdata.glue_nbor_ilowers);
1327 hypre_TFree(pdata.glue_nbor_iuppers);
1328 hypre_TFree(pdata.glue_index_maps);
1329 hypre_TFree(pdata.glue_primaries);
1330 }
1331
1332 if (pdata.graph_nboxes == 0)
1333 {
1334 hypre_TFree(pdata.graph_ilowers);
1335 hypre_TFree(pdata.graph_iuppers);
1336 hypre_TFree(pdata.graph_strides);
1337 hypre_TFree(pdata.graph_vars);
1338 hypre_TFree(pdata.graph_to_parts);
1339 hypre_TFree(pdata.graph_to_ilowers);
1340 hypre_TFree(pdata.graph_to_iuppers);
1341 hypre_TFree(pdata.graph_to_strides);
1342 hypre_TFree(pdata.graph_to_vars);
1343 hypre_TFree(pdata.graph_index_maps);
1344 hypre_TFree(pdata.graph_index_signs);
1345 hypre_TFree(pdata.graph_entries);
1346 hypre_TFree(pdata.graph_values);
1347 hypre_TFree(pdata.graph_boxsizes);
1348 }
1349
1350 if (pdata.matset_nboxes == 0)
1351 {
1352 hypre_TFree(pdata.matset_ilowers);
1353 hypre_TFree(pdata.matset_iuppers);
1354 hypre_TFree(pdata.matset_strides);
1355 hypre_TFree(pdata.matset_vars);
1356 hypre_TFree(pdata.matset_entries);
1357 hypre_TFree(pdata.matset_values);
1358 }
1359
1360 if (pdata.matadd_nboxes == 0)
1361 {
1362 hypre_TFree(pdata.matadd_ilowers);
1363 hypre_TFree(pdata.matadd_iuppers);
1364 hypre_TFree(pdata.matadd_vars);
1365 hypre_TFree(pdata.matadd_nentries);
1366 for (box = 0; box < pdata.matadd_nboxes; box++)
1367 {
1368 hypre_TFree(pdata.matadd_entries[box]);
1369 hypre_TFree(pdata.matadd_values[box]);
1370 }
1371 hypre_TFree(pdata.matadd_entries);
1372 hypre_TFree(pdata.matadd_values);
1373 }
1374
1375 data.pdata[part] = pdata;
1376 }
1377
1378 data.max_boxsize = 0;
1379 for (part = 0; part < data.nparts; part++)
1380 {
1381 data.max_boxsize =
1382 hypre_max(data.max_boxsize, data.pdata[part].max_boxsize);
1383 }
1384
1385 hypre_TFree(pool_procs);
1386
1387 *data_ptr = data;
1388 return 0;
1389}
1390
1391/*--------------------------------------------------------------------------
1392 * Destroy data
1393 *--------------------------------------------------------------------------*/
1394
1395int
1396DestroyData( ProblemData data )
1397{
1398 ProblemPartData pdata;
1399 int part, box, s;
1400
1401 for (part = 0; part < data.nparts; part++)
1402 {
1403 pdata = data.pdata[part];
1404
1405 if (pdata.nboxes > 0)
1406 {
1407 hypre_TFree(pdata.ilowers);
1408 hypre_TFree(pdata.iuppers);
1409 hypre_TFree(pdata.boxsizes);
1410 }
1411
1412 if (pdata.nvars > 0)
1413 {
1414 hypre_TFree(pdata.vartypes);
1415 }
1416
1417 if (pdata.add_nvars > 0)
1418 {
1419 hypre_TFree(pdata.add_indexes);
1420 hypre_TFree(pdata.add_vartypes);
1421 }
1422
1423 if (pdata.glue_nboxes > 0)
1424 {
1425 hypre_TFree(pdata.glue_ilowers);
1426 hypre_TFree(pdata.glue_iuppers);
1427 hypre_TFree(pdata.glue_nbor_parts);
1428 hypre_TFree(pdata.glue_nbor_ilowers);
1429 hypre_TFree(pdata.glue_nbor_iuppers);
1430 hypre_TFree(pdata.glue_index_maps);
1431 hypre_TFree(pdata.glue_primaries);
1432 }
1433
1434 if (pdata.nvars > 0)
1435 {
1436 hypre_TFree(pdata.stencil_num);
1437 }
1438
1439 if (pdata.graph_nboxes > 0)
1440 {
1441 hypre_TFree(pdata.graph_ilowers);
1442 hypre_TFree(pdata.graph_iuppers);
1443 hypre_TFree(pdata.graph_strides);
1444 hypre_TFree(pdata.graph_vars);
1445 hypre_TFree(pdata.graph_to_parts);
1446 hypre_TFree(pdata.graph_to_ilowers);
1447 hypre_TFree(pdata.graph_to_iuppers);
1448 hypre_TFree(pdata.graph_to_strides);
1449 hypre_TFree(pdata.graph_to_vars);
1450 hypre_TFree(pdata.graph_index_maps);
1451 hypre_TFree(pdata.graph_index_signs);
1452 hypre_TFree(pdata.graph_entries);
1453 hypre_TFree(pdata.graph_values);
1454 hypre_TFree(pdata.graph_boxsizes);
1455 }
1456
1457 if (pdata.matset_nboxes > 0)
1458 {
1459 hypre_TFree(pdata.matset_ilowers);
1460 hypre_TFree(pdata.matset_iuppers);
1461 hypre_TFree(pdata.matset_strides);
1462 hypre_TFree(pdata.matset_vars);
1463 hypre_TFree(pdata.matset_entries);
1464 hypre_TFree(pdata.matset_values);
1465 }
1466
1467 if (pdata.matadd_nboxes > 0)
1468 {
1469 hypre_TFree(pdata.matadd_ilowers);
1470 hypre_TFree(pdata.matadd_iuppers);
1471 hypre_TFree(pdata.matadd_vars);
1472 hypre_TFree(pdata.matadd_nentries);
1473 for (box = 0; box < pdata.matadd_nboxes; box++)
1474 {
1475 hypre_TFree(pdata.matadd_entries[box]);
1476 hypre_TFree(pdata.matadd_values[box]);
1477 }
1478 hypre_TFree(pdata.matadd_entries);
1479 hypre_TFree(pdata.matadd_values);
1480 }
1481 }
1482 hypre_TFree(data.pdata);
1483
1484 for (s = 0; s < data.nstencils; s++)
1485 {
1486 hypre_TFree(data.stencil_offsets[s]);
1487 hypre_TFree(data.stencil_vars[s]);
1488 hypre_TFree(data.stencil_values[s]);
1489 }
1490 hypre_TFree(data.stencil_sizes);
1491 hypre_TFree(data.stencil_offsets);
1492 hypre_TFree(data.stencil_vars);
1493 hypre_TFree(data.stencil_values);
1494
1495 if (data.symmetric_num > 0)
1496 {
1497 hypre_TFree(data.symmetric_parts);
1498 hypre_TFree(data.symmetric_vars);
1499 hypre_TFree(data.symmetric_to_vars);
1500 hypre_TFree(data.symmetric_booleans);
1501 }
1502
1503 hypre_TFree(data.pools);
1504
1505 return 0;
1506}
1507
1508/*--------------------------------------------------------------------------
1509 * Routine to load cosine function
1510 *--------------------------------------------------------------------------*/
1511
1512int
1513SetCosineVector( double scale,
1514 Index ilower,
1515 Index iupper,
1516 double *values)
1517{
1518 int i, j, k;
1519 int count = 0;
1520
1521 for (k = ilower[2]; k <= iupper[2]; k++)
1522 {
1523 for (j = ilower[1]; j <= iupper[1]; j++)
1524 {
1525 for (i = ilower[0]; i <= iupper[0]; i++)
1526 {
1527 values[count] = scale * cos((i+j+k)/10.0);
1528 count++;
1529 }
1530 }
1531 }
1532
1533 return(0);
1534}
1535
1536/*--------------------------------------------------------------------------
1537 * Print usage info
1538 *--------------------------------------------------------------------------*/
1539
1540int
1541PrintUsage( char *progname,
1542 int myid )
1543{
1544 if ( myid == 0 )
1545 {
1546 printf("\n");
1547 printf("Usage: %s [<options>]\n", progname);
1548 printf("\n");
1549 printf(" -in <filename> : input file (default is `%s').\n", infile_default);
1550 printf(" NOTE: -in must come become before parameters that modify the problem (-P, etc.).\n");
1551 printf("\n");
1552 printf(" -P <Px> <Py> <Pz> : define processor topology\n\n");
1553 printf(" -pooldist <p> : pool distribution to use\n");
1554 printf(" -r <rx> <ry> <rz> : refine part(s) for default problem\n");
1555 printf(" -b <bx> <by> <bz> : refine and block part(s) for default problem\n\n");
1556 printf(" -n <nx> <ny> <nz> : define size per processor for problems on cube\n");
1557 printf(" -c <cx> <cy> <cz> : define anisotropies for Laplace problem\n\n");
1558 printf(" -laplace : 3D Laplace problem on a cube\n");
1559 printf(" -27pt : Problem with 27-point stencil on a cube\n");
1560 printf(" -9pt : build 9pt 2D laplacian problem\n");
1561 printf(" -jumps : PDE with jumps on a cube\n\n");
1562 printf(" -solver <ID> : solver ID (default = 0)\n");
1563 printf(" 0 - PCG with AMG (low complexity) precond\n");
1564 printf(" 1 - PCG with diagonal scaling\n");
1565 printf(" 2 - GMRES(10) with AMG (low complexity) precond\n");
1566 printf(" 3 - GMRES(10) with diagonal scaling\n\n");
1567 printf(" -printstats : print out detailed info on AMG preconditioner\n\n");
1568 printf(" -printsystem : print out the system\n\n");
1569 printf(" -rhsfromcosine : solution is cosine function (default), can be used for\n");
1570 printf(" default problem only\n");
1571 printf(" -rhsone : rhs is vector with unit components \n");
1572 printf("\n");
1573 }
1574
1575 return 0;
1576}
1577
1578/*--------------------------------------------------------------------------
1579 * Test driver for semi-structured matrix interface
1580 *--------------------------------------------------------------------------*/
1581
1582int
1583main( int argc,
1584 char *argv[] )
1585{
1586 char *infile;
1587 ProblemData global_data;
1588 ProblemData data;
1589 ProblemPartData pdata;
1590 int nparts;
1591 int pooldist;
1592 int *parts;
1593 Index *refine;
1594 Index *distribute;
1595 Index *block;
1596 int object_type;
1597 int solver_id = 0 ;
1598 int print_system = 0;
1599 int print_level = 0;
1600 int cosine, struct_cosine;
1601 double scale;
1602
1603 HYPRE_SStructGrid grid;
1604 HYPRE_SStructStencil *stencils;
1605 HYPRE_SStructGraph graph;
1606 HYPRE_SStructMatrix A;
1607 HYPRE_SStructVector b;
1608 HYPRE_SStructVector x;
1609
1610 HYPRE_ParCSRMatrix par_A;
1611 HYPRE_ParVector par_b;
1612 HYPRE_ParVector par_x;
1613 HYPRE_Solver par_solver;
1614 HYPRE_Solver par_precond;
1615
1616
1617 Index ilower, iupper;
1618 Index index, to_index;
1619 double *values;
1620
1621 int num_iterations;
1622 double final_res_norm;
1623
1624 int num_procs, myid;
1625 int time_index;
1626 double wall_time;
1627
1628 int sym;
1629
1630 int arg_index, part, var, box, s, entry, i, j, k, size;
1631 int build_matrix_type;
1632 int build_rhs_type;
1633
1634 double system_size;
1635 int *P_xyz, *r_xyz;
1636
1637 /*int max_levels = 25;
1638 int num_sweeps = 1;
1639 int ns_coarse = 1;
1640 int verbose = 0; */
1641 double tol = 1.e-6;
1642 int maxit_prec = 100;
1643 int maxit_sol = 500;
1644 /*int coarse_threshold = 9;
1645 int seq_threshold = 0;
1646
1647
1648 int mc_err;
1649
1650 int cheby_order = 2;
1651 double cheby_eig_ratio = .3; */
1652
1653 int wall = 0;
1654
1655
1656 int pde_index, mesh_index, discr_index;
1657 int sref, pref, vis, spm;
1658
1659
1660
1661 /*-----------------------------------------------------------
1662 * Initialize some stuff
1663 *-----------------------------------------------------------*/
1664
1665 /* Initialize MPI */
1666 MPI_Init(&argc, &argv);
1667
1668 MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
1669 MPI_Comm_rank(MPI_COMM_WORLD, &myid);
1670
1671
1672
1673 hypre_InitMemoryDebug(myid);
1674
1675 /* afem defaults */
1676 mesh_index = pde_index =discr_index = 0;
1677 sref = pref = vis = 0;
1678 spm = 4; /* This used to be 2 - changed to be a "more stable" option */
1679
1680
1681 /*-----------------------------------------------------------
1682 * Parse command line
1683 *-----------------------------------------------------------*/
1684
1685 arg_index = 1;
1686 build_matrix_type = 1;
1687 build_rhs_type = -1;
1688
1689 while ( (arg_index < argc))
1690 {
1691 if ( strcmp(argv[arg_index], "-laplace") == 0 )
1692 {
1693 arg_index++;
1694 build_matrix_type = 2;
1695 }
1696
1697 else if ( strcmp(argv[arg_index], "-27pt") == 0 )
1698 {
1699 arg_index++;
1700 build_matrix_type = 3;
1701 }
1702 else if ( strcmp(argv[arg_index], "-jumps") == 0 )
1703 {
1704 arg_index++;
1705 build_matrix_type = 4;
1706 }
1707 else if ( strcmp(argv[arg_index], "-solver") == 0 )
1708 {
1709 arg_index++;
1710 solver_id = atoi(argv[arg_index++]);
1711 if (solver_id < 0 || solver_id > 3)
1712 {
1713 printf("Wrong solver id ! Code will exit!!\n");
1714 return(1);
1715 }
1716 }
1717 else if ( strcmp(argv[arg_index], "-printsystem") == 0 )
1718 {
1719 arg_index++;
1720 print_system = 1;
1721 }
1722 else if ( strcmp(argv[arg_index], "-printstats") == 0 )
1723 {
1724 arg_index++;
1725 print_level = 1;
1726 }
1727 else
1728 arg_index++;
1729 }
1730
1731 if (build_matrix_type > 1 && build_matrix_type < 8)
1732 {
1733 time_index = hypre_InitializeTiming("Setup matrix and rhs");
1734 hypre_BeginTiming(time_index);
1735
1736 if ( build_matrix_type == 2 )
1737 BuildParLaplacian(argc, argv, &system_size, &par_A, &par_b, &par_x);
1738 else if ( build_matrix_type == 3 )
1739 BuildParLaplacian27pt(argc, argv, &system_size, &par_A, &par_b, &par_x);
1740 else if ( build_matrix_type == 4 )
1741 BuildParVarDifConv(argc, argv, &system_size, &par_A, &par_b, &par_x);
1742 hypre_EndTiming(time_index);
1743 hypre_PrintTiming("Setup matrix and rhs", &wall_time, MPI_COMM_WORLD);
1744 hypre_FinalizeTiming(time_index);
1745 hypre_ClearTiming();
1746 fflush(NULL);
1747
1748 if (print_system)
1749 {
1750 HYPRE_ParCSRMatrixPrintIJ(par_A, 0, 0, "parcsr.out.A");
1751 HYPRE_ParVectorPrintIJ(par_b, 0, "parcsr.out.b");
1752 HYPRE_ParVectorPrintIJ(par_x, 0, "parcsr.out.x0");
1753 }
1754 }
1755 else
1756 {
1757 /*-----------------------------------------------------------
1758 * Read input file
1759 *-----------------------------------------------------------*/
1760
1761 arg_index = 1;
1762
1763 /* parse command line for input file name */
1764 infile = infile_default;
1765 if (argc > 1)
1766 {
1767 if ( strcmp(argv[arg_index], "-in") == 0 )
1768 {
1769 arg_index++;
1770 infile = argv[arg_index++];
1771 }
1772 }
1773
1774 ReadData(infile, &global_data);
1775
1776 /*-----------------------------------------------------------
1777 * Set defaults
1778 *-----------------------------------------------------------*/
1779
1780 sym = 1;
1781
1782 nparts = global_data.nparts;
1783 pooldist = 0;
1784
1785 parts = hypre_TAlloc(int, nparts);
1786 refine = hypre_TAlloc(Index, nparts);
1787 distribute = hypre_TAlloc(Index, nparts);
1788 block = hypre_TAlloc(Index, nparts);
1789 P_xyz = hypre_TAlloc(int, 3);
1790 r_xyz = hypre_TAlloc(int, 3);
1791 for (part = 0; part < nparts; part++)
1792 {
1793 parts[part] = part;
1794 for (j = 0; j < 3; j++)
1795 {
1796 refine[part][j] = 1;
1797 distribute[part][j] = 1;
1798 block[part][j] = 1;
1799 }
1800 }
1801
1802 print_system = 0;
1803 cosine = 1;
1804 struct_cosine = 0;
1805 system_size = 512.;
1806 for (j = 0; j < 3; j++)
1807 {
1808 r_xyz[j] = 1;
1809 P_xyz[j] = 1;
1810 }
1811 /*-----------------------------------------------------------
1812 * Parse command line
1813 *-----------------------------------------------------------*/
1814
1815 while (arg_index < argc)
1816 {
1817 /*if ( strcmp(argv[arg_index], "-pt") == 0 )
1818 {
1819 arg_index++;
1820 nparts = 0;
1821 while ( strncmp(argv[arg_index], "-", 1) != 0 )
1822 {
1823 parts[nparts++] = atoi(argv[arg_index++]);
1824 }
1825 }
1826 else*/ if ( strcmp(argv[arg_index], "-pooldist") == 0 )
1827 {
1828 arg_index++;
1829 pooldist = atoi(argv[arg_index++]);
1830 }
1831 else if ( strcmp(argv[arg_index], "-r") == 0 )
1832 {
1833 arg_index++;
1834 for (j = 0; j < 3; j++)
1835 r_xyz[j] = atoi(argv[arg_index++]);
1836 for (i = 0; i < nparts; i++)
1837 {
1838 part = parts[i];
1839 for (j = 0; j < 3; j++)
1840 refine[part][j] = r_xyz[j];
1841 }
1842 system_size *= (double)r_xyz[0]*(double)r_xyz[1]*(double)r_xyz[2];
1843 hypre_TFree(r_xyz);
1844 }
1845 else if ( strcmp(argv[arg_index], "-P") == 0 )
1846 {
1847 arg_index++;
1848 for (j = 0; j < 3; j++)
1849 P_xyz[j] = atoi(argv[arg_index++]);
1850 for (i = 0; i < nparts; i++)
1851 {
1852 part = parts[i];
1853 for (j = 0; j < 3; j++)
1854 distribute[part][j] = P_xyz[j];
1855 }
1856 system_size *= (double)P_xyz[0]*(double)P_xyz[1]*(double)P_xyz[2];
1857 hypre_TFree(P_xyz);
1858 }
1859 else if ( strcmp(argv[arg_index], "-b") == 0 )
1860 {
1861 arg_index++;
1862 for (i = 0; i < nparts; i++)
1863 {
1864 part = parts[i];
1865 k = arg_index;
1866 for (j = 0; j < 3; j++)
1867 {
1868 block[part][j] = atoi(argv[k++]);
1869 }
1870 }
1871 arg_index += 3;
1872 }
1873 else if ( strcmp(argv[arg_index], "-solver") == 0 )
1874 {
1875 arg_index++;
1876 solver_id = atoi(argv[arg_index++]);
1877 }
1878 else if ( strcmp(argv[arg_index], "-rhsone") == 0 )
1879 {
1880 arg_index++;
1881 cosine = 0;
1882 }
1883 else if ( strcmp(argv[arg_index], "-rhsfromcosine") == 0 )
1884 {
1885 arg_index++;
1886 cosine = 1;
1887 struct_cosine = 1;
1888 }
1889 else if ( strcmp(argv[arg_index], "-printsystem") == 0 )
1890 {
1891 arg_index++;
1892 print_system = 1;
1893 }
1894 else if ( strcmp(argv[arg_index], "-help") == 0 )
1895 {
1896 PrintUsage(argv[0], myid);
1897 exit(1);
1898 break;
1899 }
1900 else
1901 arg_index++;
1902 }
1903
1904 /*-----------------------------------------------------------
1905 * Distribute data
1906 *-----------------------------------------------------------*/
1907
1908 DistributeData(global_data, pooldist, refine, distribute, block,
1909 num_procs, myid, &data);
1910
1911 /*-----------------------------------------------------------
1912 * Synchronize so that timings make sense
1913 *-----------------------------------------------------------*/
1914
1915 MPI_Barrier(MPI_COMM_WORLD);
1916
1917 /*-----------------------------------------------------------
1918 * Set up the grid
1919 *-----------------------------------------------------------*/
1920
1921 time_index = hypre_InitializeTiming("SStruct Interface");
1922 hypre_BeginTiming(time_index);
1923
1924 HYPRE_SStructGridCreate(MPI_COMM_WORLD, data.ndim, data.nparts, &grid);
1925 for (part = 0; part < data.nparts; part++)
1926 {
1927 pdata = data.pdata[part];
1928 for (box = 0; box < pdata.nboxes; box++)
1929 {
1930 HYPRE_SStructGridSetExtents(grid, part,
1931 pdata.ilowers[box], pdata.iuppers[box]);
1932 }
1933
1934 HYPRE_SStructGridSetVariables(grid, part, pdata.nvars, pdata.vartypes);
1935
1936 /* GridAddVariabes */
1937
1938 /* GridSetNeighborBox */
1939 for (box = 0; box < pdata.glue_nboxes; box++)
1940 {
1941#if 1 /* will add primary to the interface soon */
1942 HYPRE_SStructGridSetNeighborBox(grid, part,
1943 pdata.glue_ilowers[box],
1944 pdata.glue_iuppers[box],
1945 pdata.glue_nbor_parts[box],
1946 pdata.glue_nbor_ilowers[box],
1947 pdata.glue_nbor_iuppers[box],
1948 pdata.glue_index_maps[box]);
1949#else
1950 HYPRE_SStructGridSetNeighborBoxZ(grid, part,
1951 pdata.glue_ilowers[box],
1952 pdata.glue_iuppers[box],
1953 pdata.glue_nbor_parts[box],
1954 pdata.glue_nbor_ilowers[box],
1955 pdata.glue_nbor_iuppers[box],
1956 pdata.glue_index_maps[box],
1957 pdata.glue_primaries[box]);
1958#endif
1959 }
1960
1961 HYPRE_SStructGridSetPeriodic(grid, part, pdata.periodic);
1962 }
1963 HYPRE_SStructGridAssemble(grid);
1964
1965 /*-----------------------------------------------------------
1966 * Set up the stencils
1967 *-----------------------------------------------------------*/
1968
1969 stencils = hypre_CTAlloc(HYPRE_SStructStencil, data.nstencils);
1970 for (s = 0; s < data.nstencils; s++)
1971 {
1972 HYPRE_SStructStencilCreate(data.ndim, data.stencil_sizes[s],
1973 &stencils[s]);
1974 for (entry = 0; entry < data.stencil_sizes[s]; entry++)
1975 {
1976 HYPRE_SStructStencilSetEntry(stencils[s], entry,
1977 data.stencil_offsets[s][entry],
1978 data.stencil_vars[s][entry]);
1979 }
1980 }
1981
1982 /*-----------------------------------------------------------
1983 * Set object type
1984 *-----------------------------------------------------------*/
1985
1986 object_type = HYPRE_PARCSR;
1987
1988 /*-----------------------------------------------------------
1989 * Set up the graph
1990 *-----------------------------------------------------------*/
1991
1992 HYPRE_SStructGraphCreate(MPI_COMM_WORLD, grid, &graph);
1993
1994 /* HYPRE_SSTRUCT is the default, so we don't have to call SetObjectType */
1995 if ( object_type != HYPRE_SSTRUCT )
1996 {
1997 HYPRE_SStructGraphSetObjectType(graph, object_type);
1998 }
1999
2000 for (part = 0; part < data.nparts; part++)
2001 {
2002 pdata = data.pdata[part];
2003
2004 /* set stencils */
2005 for (var = 0; var < pdata.nvars; var++)
2006 {
2007 HYPRE_SStructGraphSetStencil(graph, part, var,
2008 stencils[pdata.stencil_num[var]]);
2009 }
2010
2011 /* add entries */
2012 for (box = 0; box < pdata.graph_nboxes; box++)
2013 {
2014 for (index[2] = pdata.graph_ilowers[box][2];
2015 index[2] <= pdata.graph_iuppers[box][2];
2016 index[2] += pdata.graph_strides[box][2])
2017 {
2018 for (index[1] = pdata.graph_ilowers[box][1];
2019 index[1] <= pdata.graph_iuppers[box][1];
2020 index[1] += pdata.graph_strides[box][1])
2021 {
2022 for (index[0] = pdata.graph_ilowers[box][0];
2023 index[0] <= pdata.graph_iuppers[box][0];
2024 index[0] += pdata.graph_strides[box][0])
2025 {
2026 for (i = 0; i < 3; i++)
2027 {
2028 j = pdata.graph_index_maps[box][i];
2029 k = index[i] - pdata.graph_ilowers[box][i];
2030 k /= pdata.graph_strides[box][i];
2031 k *= pdata.graph_index_signs[box][i];
2032 to_index[j] = pdata.graph_to_ilowers[box][j] +
2033 k * pdata.graph_to_strides[box][j];
2034 }
2035 HYPRE_SStructGraphAddEntries(graph, part, index,
2036 pdata.graph_vars[box],
2037 pdata.graph_to_parts[box],
2038 to_index,
2039 pdata.graph_to_vars[box]);
2040 }
2041 }
2042 }
2043 }
2044 }
2045
2046 HYPRE_SStructGraphAssemble(graph);
2047
2048 /*-----------------------------------------------------------
2049 * Set up the matrix
2050 *-----------------------------------------------------------*/
2051
2052 values = hypre_TAlloc(double, data.max_boxsize);
2053
2054 HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, graph, &A);
2055
2056 /* TODO HYPRE_SStructMatrixSetSymmetric(A, 1); */
2057 for (i = 0; i < data.symmetric_num; i++)
2058 {
2059 HYPRE_SStructMatrixSetSymmetric(A, data.symmetric_parts[i],
2060 data.symmetric_vars[i],
2061 data.symmetric_to_vars[i],
2062 data.symmetric_booleans[i]);
2063 }
2064 HYPRE_SStructMatrixSetNSSymmetric(A, data.ns_symmetric);
2065
2066 /* HYPRE_SSTRUCT is the default, so we don't have to call SetObjectType */
2067 if ( object_type != HYPRE_SSTRUCT )
2068 {
2069 HYPRE_SStructMatrixSetObjectType(A, object_type);
2070 }
2071
2072 HYPRE_SStructMatrixInitialize(A);
2073
2074 for (part = 0; part < data.nparts; part++)
2075 {
2076 pdata = data.pdata[part];
2077
2078 /* set stencil values */
2079 for (var = 0; var < pdata.nvars; var++)
2080 {
2081 s = pdata.stencil_num[var];
2082 for (i = 0; i < data.stencil_sizes[s]; i++)
2083 {
2084 for (j = 0; j < pdata.max_boxsize; j++)
2085 {
2086 values[j] = data.stencil_values[s][i];
2087 }
2088 for (box = 0; box < pdata.nboxes; box++)
2089 {
2090 GetVariableBox(pdata.ilowers[box], pdata.iuppers[box],
2091 pdata.vartypes[var], ilower, iupper);
2092 HYPRE_SStructMatrixSetBoxValues(A, part, ilower, iupper,
2093 var, 1, &i, values);
2094 }
2095 }
2096 }
2097
2098 /* set non-stencil entries */
2099 for (box = 0; box < pdata.graph_nboxes; box++)
2100 {
2101 /*
2102 * RDF NOTE: Add a separate interface routine for setting non-stencil
2103 * entries. It would be more efficient to set boundary values a box
2104 * at a time, but AMR may require striding, and some codes may already
2105 * have a natural values array to pass in, but can't because it uses
2106 * ghost values.
2107 *
2108 * Example new interface routine:
2109 * SetNSBoxValues(matrix, part, ilower, iupper, stride, entry
2110 * values_ilower, values_iupper, values);
2111 */
2112
2113/* since we have already tested SetBoxValues above, use SetValues here */
2114#if 0
2115 for (j = 0; j < pdata.graph_boxsizes[box]; j++)
2116 {
2117 values[j] = pdata.graph_values[box];
2118 }
2119 HYPRE_SStructMatrixSetBoxValues(A, part,
2120 pdata.graph_ilowers[box],
2121 pdata.graph_iuppers[box],
2122 pdata.graph_vars[box],
2123 1, &pdata.graph_entries[box],
2124 values);
2125#else
2126 for (index[2] = pdata.graph_ilowers[box][2];
2127 index[2] <= pdata.graph_iuppers[box][2];
2128 index[2] += pdata.graph_strides[box][2])
2129 {
2130 for (index[1] = pdata.graph_ilowers[box][1];
2131 index[1] <= pdata.graph_iuppers[box][1];
2132 index[1] += pdata.graph_strides[box][1])
2133 {
2134 for (index[0] = pdata.graph_ilowers[box][0];
2135 index[0] <= pdata.graph_iuppers[box][0];
2136 index[0] += pdata.graph_strides[box][0])
2137 {
2138 HYPRE_SStructMatrixSetValues(A, part, index,
2139 pdata.graph_vars[box],
2140 1, &pdata.graph_entries[box],
2141 &pdata.graph_values[box]);
2142 }
2143 }
2144 }
2145#endif
2146 }
2147
2148 /* reset some matrix values */
2149 for (box = 0; box < pdata.matset_nboxes; box++)
2150 {
2151 size= 1;
2152 for (j = 0; j < 3; j++)
2153 {
2154 size*= (pdata.matset_iuppers[box][j] -
2155 pdata.matset_ilowers[box][j] + 1);
2156 }
2157 for (j = 0; j < size; j++)
2158 {
2159 values[j] = pdata.matset_values[box];
2160 }
2161 HYPRE_SStructMatrixSetBoxValues(A, part,
2162 pdata.matset_ilowers[box],
2163 pdata.matset_iuppers[box],
2164 pdata.matset_vars[box],
2165 1, &pdata.matset_entries[box],
2166 values);
2167 }
2168
2169 /* add to some matrix values */
2170 for (box = 0; box < pdata.matadd_nboxes; box++)
2171 {
2172 size = 1;
2173 for (j = 0; j < 3; j++)
2174 {
2175 size*= (pdata.matadd_iuppers[box][j] -
2176 pdata.matadd_ilowers[box][j] + 1);
2177 }
2178
2179 for (entry = 0; entry < pdata.matadd_nentries[box]; entry++)
2180 {
2181 for (j = 0; j < size; j++)
2182 {
2183 values[j] = pdata.matadd_values[box][entry];
2184 }
2185
2186 HYPRE_SStructMatrixAddToBoxValues(A, part,
2187 pdata.matadd_ilowers[box],
2188 pdata.matadd_iuppers[box],
2189 pdata.matadd_vars[box],
2190 1, &pdata.matadd_entries[box][entry],
2191 values);
2192 }
2193 }
2194 }
2195
2196 HYPRE_SStructMatrixAssemble(A);
2197
2198 /*-----------------------------------------------------------
2199 * Set up the linear system
2200 *-----------------------------------------------------------*/
2201
2202 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, grid, &b);
2203
2204 /* HYPRE_SSTRUCT is the default, so we don't have to call SetObjectType */
2205 if ( object_type != HYPRE_SSTRUCT )
2206 {
2207 HYPRE_SStructVectorSetObjectType(b, object_type);
2208 }
2209
2210 HYPRE_SStructVectorInitialize(b);
2211 for (j = 0; j < data.max_boxsize; j++)
2212 {
2213 values[j] = 1.0;
2214 }
2215 for (part = 0; part < data.nparts; part++)
2216 {
2217 pdata = data.pdata[part];
2218 for (var = 0; var < pdata.nvars; var++)
2219 {
2220 for (box = 0; box < pdata.nboxes; box++)
2221 {
2222 GetVariableBox(pdata.ilowers[box], pdata.iuppers[box],
2223 pdata.vartypes[var], ilower, iupper);
2224 HYPRE_SStructVectorSetBoxValues(b, part, ilower, iupper,
2225 var, values);
2226 }
2227 }
2228 }
2229 HYPRE_SStructVectorAssemble(b);
2230
2231 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, grid, &x);
2232
2233 /* HYPRE_SSTRUCT is the default, so we don't have to call SetObjectType */
2234 if ( object_type != HYPRE_SSTRUCT )
2235 {
2236 HYPRE_SStructVectorSetObjectType(x, object_type);
2237 }
2238
2239 HYPRE_SStructVectorInitialize(x);
2240 /*-----------------------------------------------------------
2241 * If requested, reset linear system so that it has
2242 * exact solution:
2243 *
2244 * u(part,var,i,j,k) = (part+1)*(var+1)*cosine[(i+j+k)/10]
2245 *
2246 *-----------------------------------------------------------*/
2247
2248 if (cosine)
2249 {
2250 for (part = 0; part < data.nparts; part++)
2251 {
2252 pdata = data.pdata[part];
2253 for (var = 0; var < pdata.nvars; var++)
2254 {
2255 scale = (part+1.0)*(var+1.0);
2256 for (box = 0; box < pdata.nboxes; box++)
2257 {
2258 GetVariableBox(pdata.ilowers[box], pdata.iuppers[box], var,
2259 ilower, iupper);
2260 SetCosineVector(scale, ilower, iupper, values);
2261 HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper,
2262 var, values);
2263 }
2264 }
2265 }
2266 }
2267 HYPRE_SStructVectorAssemble(x);
2268
2269 hypre_EndTiming(time_index);
2270 hypre_PrintTiming("SStruct Interface", &wall_time, MPI_COMM_WORLD);
2271 hypre_FinalizeTiming(time_index);
2272 hypre_ClearTiming();
2273 fflush(NULL);
2274
2275 /*-----------------------------------------------------------
2276 * If requested, reset linear system so that it has
2277 * exact solution:
2278 *
2279 * u(part,var,i,j,k) = (part+1)*(var+1)*cosine[(i+j+k)/10]
2280 *
2281 *-----------------------------------------------------------*/
2282
2283 if (cosine)
2284 {
2285 for (part = 0; part < data.nparts; part++)
2286 {
2287 pdata = data.pdata[part];
2288 for (var = 0; var < pdata.nvars; var++)
2289 {
2290 scale = (part+1.0)*(var+1.0);
2291 for (box = 0; box < pdata.nboxes; box++)
2292 {
2293 GetVariableBox(pdata.ilowers[box], pdata.iuppers[box], var,
2294 ilower, iupper);
2295 SetCosineVector(scale, ilower, iupper, values);
2296 HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper,
2297 var, values);
2298 }
2299 }
2300 }
2301 }
2302
2303 /*-----------------------------------------------------------
2304 * Get the objects out
2305 * NOTE: This should go after the cosine part, but for the bug
2306 *-----------------------------------------------------------*/
2307
2308 HYPRE_SStructMatrixGetObject(A, (void **) &par_A);
2309 HYPRE_SStructVectorGetObject(b, (void **) &par_b);
2310 HYPRE_SStructVectorGetObject(x, (void **) &par_x);
2311
2312 /*-----------------------------------------------------------
2313 * Finish resetting the linear system
2314 *-----------------------------------------------------------*/
2315
2316 if (cosine)
2317 {
2318 /* This if/else is due to a bug in SStructMatvec */
2319 if (object_type != HYPRE_PARCSR)
2320 {
2321 HYPRE_SStructVectorAssemble(x);
2322 /* Apply A to cosine vector to yield righthand side */
2323 hypre_SStructMatvec(1.0, A, x, 0.0, b);
2324 /* Reset initial guess to zero */
2325 hypre_SStructMatvec(0.0, A, b, 0.0, x);
2326 }
2327 else
2328 {
2329 /* Apply A to cosine vector to yield righthand side */
2330 HYPRE_ParCSRMatrixMatvec(1.0, par_A, par_x, 0.0, par_b );
2331 /* Reset initial guess to zero */
2332 HYPRE_ParCSRMatrixMatvec(0.0, par_A, par_b, 0.0, par_x );
2333 }
2334 }
2335
2336 /*-----------------------------------------------------------
2337 * Print out the system and initial guess
2338 *-----------------------------------------------------------*/
2339 /*hypre_SStructMatvec(1.0, A, x, 2.0, b);
2340 HYPRE_ParCSRMatrixMatvec(1.0, par_A, par_x, 2.0, par_b );*/
2341
2342 if (print_system)
2343 {
2344 HYPRE_SStructVectorGather(b);
2345 HYPRE_SStructVectorGather(x);
2346 HYPRE_SStructMatrixPrint("sstruct.out.A", A, 0);
2347 HYPRE_SStructVectorPrint("sstruct.out.b", b, 0);
2348 HYPRE_SStructVectorPrint("sstruct.out.x0", x, 0);
2349 }
2350
2351 /*-----------------------------------------------------------
2352 * Debugging code
2353 *-----------------------------------------------------------*/
2354
2355#if DEBUG
2356 {
2357 FILE *file;
2358 char filename[255];
2359
2360 /* result is 1's on the interior of the grid */
2361 hypre_SStructMatvec(1.0, A, b, 0.0, x);
2362 HYPRE_SStructVectorPrint("sstruct.out.matvec", x, 0);
2363
2364 /* result is all 1's */
2365 hypre_SStructCopy(b, x);
2366 HYPRE_SStructVectorPrint("sstruct.out.copy", x, 0);
2367
2368 /* result is all 2's */
2369 hypre_SStructScale(2.0, x);
2370 HYPRE_SStructVectorPrint("sstruct.out.scale", x, 0);
2371
2372 /* result is all 0's */
2373 hypre_SStructAxpy(-2.0, b, x);
2374 HYPRE_SStructVectorPrint("sstruct.out.axpy", x, 0);
2375
2376 /* result is 1's with 0's on some boundaries */
2377 hypre_SStructCopy(b, x);
2378 sprintf(filename, "sstruct.out.gatherpre.%05d", myid);
2379 file = fopen(filename, "w");
2380 for (part = 0; part < data.nparts; part++)
2381 {
2382 pdata = data.pdata[part];
2383 for (var = 0; var < pdata.nvars; var++)
2384 {
2385 for (box = 0; box < pdata.nboxes; box++)
2386 {
2387 GetVariableBox(pdata.ilowers[box], pdata.iuppers[box], var,
2388 ilower, iupper);
2389 HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
2390 var, values);
2391 fprintf(file, "\nPart %d, var %d, box %d:\n", part, var, box);
2392 for (i = 0; i < pdata.boxsizes[box]; i++)
2393 {
2394 fprintf(file, "%e\n", values[i]);
2395 }
2396 }
2397 }
2398 }
2399 fclose(file);
2400
2401 /* result is all 1's */
2402 HYPRE_SStructVectorGather(x);
2403 sprintf(filename, "sstruct.out.gatherpost.%05d", myid);
2404 file = fopen(filename, "w");
2405 for (part = 0; part < data.nparts; part++)
2406 {
2407 pdata = data.pdata[part];
2408 for (var = 0; var < pdata.nvars; var++)
2409 {
2410 for (box = 0; box < pdata.nboxes; box++)
2411 {
2412 GetVariableBox(pdata.ilowers[box], pdata.iuppers[box], var,
2413 ilower, iupper);
2414 HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
2415 var, values);
2416 fprintf(file, "\nPart %d, var %d, box %d:\n", part, var, box);
2417 for (i = 0; i < pdata.boxsizes[box]; i++)
2418 {
2419 fprintf(file, "%e\n", values[i]);
2420 }
2421 }
2422 }
2423 }
2424
2425 /* re-initializes x to 0 */
2426 hypre_SStructAxpy(-1.0, b, x);
2427 }
2428#endif
2429
2430 hypre_TFree(values);
2431 }
2432
2433 /*-----------------------------------------------------------
2434 * Solve the system using ParCSR version of PCG
2435 *-----------------------------------------------------------*/
2436
2437 if ((solver_id > -1) && (solver_id < 2))
2438 {
2439 time_index = hypre_InitializeTiming("PCG Setup");
2440 hypre_BeginTiming(time_index);
2441
2442 HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &par_solver);
2443 HYPRE_PCGSetTol( par_solver, tol );
2444 HYPRE_PCGSetTwoNorm( par_solver, 1 );
2445 HYPRE_PCGSetRelChange( par_solver, 0 );
2446 HYPRE_PCGSetPrintLevel( par_solver, 0 );
2447
2448 if (solver_id == 0)
2449 {
2450 /* use BoomerAMG as preconditioner */
2451 HYPRE_PCGSetMaxIter( par_solver, maxit_prec);
2452 HYPRE_BoomerAMGCreate(&par_precond);
2453 HYPRE_BoomerAMGSetCoarsenType(par_precond, 10);
2454 HYPRE_BoomerAMGSetStrongThreshold(par_precond, 0.25);
2455 HYPRE_BoomerAMGSetAggNumLevels(par_precond, 1);
2456 HYPRE_BoomerAMGSetInterpType(par_precond, 6);
2457 HYPRE_BoomerAMGSetPMaxElmts(par_precond, 4);
2458 HYPRE_BoomerAMGSetTol(par_precond, 0.0);
2459 HYPRE_BoomerAMGSetRelaxType(par_precond, 8);
2460 HYPRE_BoomerAMGSetCycleRelaxType(par_precond, 8, 3);
2461 HYPRE_BoomerAMGSetCycleNumSweeps(par_precond, 1, 3);
2462 HYPRE_BoomerAMGSetRelaxOrder(par_precond, 0);
2463 HYPRE_BoomerAMGSetPrintLevel(par_precond, print_level);
2464 HYPRE_BoomerAMGSetPrintFileName(par_precond, "sstruct.out.log");
2465 HYPRE_BoomerAMGSetMaxIter(par_precond, 1);
2466 HYPRE_PCGSetPrecond( par_solver,
2467 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
2468 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
2469 par_precond );
2470 }
2471 else if (solver_id == 1)
2472 {
2473 /* use diagonal scaling as preconditioner */
2474 HYPRE_PCGSetMaxIter( par_solver, maxit_sol);
2475 par_precond = NULL;
2476 HYPRE_PCGSetPrecond( par_solver,
2477 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
2478 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
2479 par_precond );
2480 }
2481
2482 HYPRE_PCGSetup( par_solver, (HYPRE_Matrix) par_A,
2483 (HYPRE_Vector) par_b, (HYPRE_Vector) par_x );
2484
2485 hypre_EndTiming(time_index);
2486
2487 hypre_PrintTiming("Setup phase times", &wall_time, MPI_COMM_WORLD);
2488 hypre_FinalizeTiming(time_index);
2489 hypre_ClearTiming();
2490 fflush(NULL);
2491
2492 if (myid == 0)
2493 printf ("\nSystem Size / Setup Phase Time: %e\n\n", (system_size/ wall_time));
2494
2495 time_index = hypre_InitializeTiming("PCG Solve");
2496 hypre_BeginTiming(time_index);
2497
2498 HYPRE_PCGSolve( par_solver, (HYPRE_Matrix) par_A,
2499 (HYPRE_Vector) par_b, (HYPRE_Vector) par_x );
2500
2501 hypre_EndTiming(time_index);
2502 hypre_PrintTiming("Solve phase times", &wall_time, MPI_COMM_WORLD);
2503 hypre_FinalizeTiming(time_index);
2504 hypre_ClearTiming();
2505 fflush(NULL);
2506
2507 HYPRE_PCGGetNumIterations( par_solver, &num_iterations );
2508 HYPRE_PCGGetFinalRelativeResidualNorm( par_solver, &final_res_norm );
2509 HYPRE_ParCSRPCGDestroy(par_solver);
2510
2511 if (solver_id == 0)
2512 {
2513 HYPRE_BoomerAMGDestroy(par_precond);
2514 }
2515 }
2516 /*-----------------------------------------------------------
2517 * Solve the system using GMRES
2518 *-----------------------------------------------------------*/
2519
2520 /*-----------------------------------------------------------
2521 * Solve the system using ParCSR version of GMRES
2522 *-----------------------------------------------------------*/
2523
2524 if ((solver_id > 1) && (solver_id < 4))
2525 {
2526 time_index = hypre_InitializeTiming("GMRES Setup");
2527 hypre_BeginTiming(time_index);
2528
2529 HYPRE_ParCSRGMRESCreate(MPI_COMM_WORLD, &par_solver);
2530 HYPRE_GMRESSetKDim(par_solver, 10);
2531 HYPRE_GMRESSetTol(par_solver, tol);
2532 HYPRE_GMRESSetPrintLevel(par_solver, 0);
2533 HYPRE_GMRESSetLogging(par_solver, 1);
2534
2535 if (solver_id == 2)
2536 {
2537 /* use BoomerAMG as preconditioner */
2538 HYPRE_GMRESSetMaxIter(par_solver, maxit_prec);
2539 HYPRE_BoomerAMGCreate(&par_precond);
2540 HYPRE_BoomerAMGSetCoarsenType(par_precond, 10);
2541 HYPRE_BoomerAMGSetStrongThreshold(par_precond, 0.25);
2542 HYPRE_BoomerAMGSetAggNumLevels(par_precond, 1);
2543 HYPRE_BoomerAMGSetInterpType(par_precond, 6);
2544 HYPRE_BoomerAMGSetPMaxElmts(par_precond, 4);
2545 HYPRE_BoomerAMGSetTol(par_precond, 0.0);
2546 HYPRE_BoomerAMGSetRelaxOrder(par_precond, 0);
2547 HYPRE_BoomerAMGSetRelaxType(par_precond, 8);
2548 HYPRE_BoomerAMGSetCycleRelaxType(par_precond, 8, 3);
2549 HYPRE_BoomerAMGSetCycleNumSweeps(par_precond, 1, 3);
2550 HYPRE_BoomerAMGSetPrintLevel(par_precond, print_level);
2551 HYPRE_BoomerAMGSetPrintFileName(par_precond, "sstruct.out.log");
2552 HYPRE_BoomerAMGSetMaxIter(par_precond, 1);
2553 HYPRE_GMRESSetPrecond( par_solver,
2554 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
2555 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
2556 par_precond );
2557 }
2558 else if (solver_id == 3)
2559 {
2560 /* use diagonal scaling as preconditioner */
2561 HYPRE_GMRESSetMaxIter(par_solver, maxit_sol);
2562 par_precond = NULL;
2563 HYPRE_GMRESSetPrecond( par_solver,
2564 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
2565 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
2566 par_precond );
2567 }
2568
2569 HYPRE_GMRESSetup( par_solver, (HYPRE_Matrix) par_A,
2570 (HYPRE_Vector) par_b, (HYPRE_Vector) par_x);
2571
2572 hypre_EndTiming(time_index);
2573
2574
2575 hypre_PrintTiming("Setup phase times", &wall_time, MPI_COMM_WORLD);
2576 hypre_FinalizeTiming(time_index);
2577 hypre_ClearTiming();
2578 fflush(NULL);
2579
2580 if (myid == 0)
2581 printf ("\nSystem Size / Setup Phase Time: %e\n\n", (system_size/ wall_time));
2582
2583 time_index = hypre_InitializeTiming("GMRES Solve");
2584 hypre_BeginTiming(time_index);
2585
2586 HYPRE_GMRESSolve( par_solver, (HYPRE_Matrix) par_A,
2587 (HYPRE_Vector) par_b, (HYPRE_Vector) par_x);
2588
2589 hypre_EndTiming(time_index);
2590 hypre_PrintTiming("Solve phase times", &wall_time, MPI_COMM_WORLD);
2591 hypre_FinalizeTiming(time_index);
2592 hypre_ClearTiming();
2593 fflush(NULL);
2594
2595 HYPRE_GMRESGetNumIterations( par_solver, &num_iterations);
2596 HYPRE_GMRESGetFinalRelativeResidualNorm( par_solver,
2597 &final_res_norm);
2598 HYPRE_ParCSRGMRESDestroy(par_solver);
2599
2600 if (solver_id == 2)
2601 {
2602 HYPRE_BoomerAMGDestroy(par_precond);
2603 }
2604 }
2605
2606 /*-----------------------------------------------------------
2607 * Gather the solution vector
2608 *-----------------------------------------------------------*/
2609
2610 if (build_matrix_type == 1)
2611 {
2612 HYPRE_SStructVectorGather(x);
2613
2614 /*-----------------------------------------------------------
2615 * Print the solution and other info
2616 *-----------------------------------------------------------*/
2617
2618 if (print_system)
2619 {
2620 HYPRE_SStructVectorPrint("sstruct.out.x", x, 0);
2621 }
2622 }
2623
2624 if (myid == 0 )
2625 {
2626 printf("\n");
2627 printf("AMG2013 Benchmark version 1.0\n");
2628 printf("Iterations = %d\n", num_iterations);
2629 printf("Final Relative Residual Norm = %e\n", final_res_norm);
2630 printf ("\nSystem Size * Iterations / Solve Phase Time: %e\n",
2631 (system_size*(double)num_iterations/ wall_time));
2632 if (wall)
2633 printf ("\nSolve Time: %e\n", wall_time);
2634 printf("\n");
2635 }
2636
2637 /*-----------------------------------------------------------
2638 * Finalize things
2639 *-----------------------------------------------------------*/
2640 if (build_matrix_type == 1)
2641 {
2642 HYPRE_SStructGridDestroy(grid);
2643 for (s = 0; s < data.nstencils; s++)
2644 {
2645 HYPRE_SStructStencilDestroy(stencils[s]);
2646 }
2647 hypre_TFree(stencils);
2648 HYPRE_SStructGraphDestroy(graph);
2649 HYPRE_SStructMatrixDestroy(A);
2650 HYPRE_SStructVectorDestroy(b);
2651 HYPRE_SStructVectorDestroy(x);
2652
2653
2654 DestroyData(data);
2655
2656 hypre_TFree(parts);
2657 hypre_TFree(refine);
2658 hypre_TFree(distribute);
2659 hypre_TFree(block);
2660 }
2661 else
2662 {
2663 HYPRE_ParCSRMatrixDestroy(par_A);
2664 HYPRE_ParVectorDestroy(par_b);
2665 HYPRE_ParVectorDestroy(par_x);
2666 }
2667
2668 hypre_FinalizeMemoryDebug();
2669
2670
2671 /* Finalize MPI */
2672 MPI_Finalize();
2673
2674
2675 return (0);
2676}
2677
2678/*----------------------------------------------------------------------
2679 * Build standard 7-point laplacian in 3D with grid and anisotropy.
2680 * Parameters given in command line.
2681 *----------------------------------------------------------------------*/
2682
2683int
2684BuildParLaplacian( int argc,
2685 char *argv[],
2686 double *system_size_ptr,
2687 HYPRE_ParCSRMatrix *A_ptr,
2688 HYPRE_ParVector *rhs_ptr,
2689 HYPRE_ParVector *x_ptr)
2690{
2691 int nx, ny, nz;
2692 HYPRE_BigInt g_nx, g_ny, g_nz;
2693 int P, Q, R;
2694 double cx, cy, cz;
2695
2696 HYPRE_ParCSRMatrix A;
2697 HYPRE_ParVector rhs;
2698 HYPRE_ParVector x;
2699
2700 int num_procs, myid;
2701 int p, q, r;
2702 double *values;
2703
2704 int arg_index;
2705
2706 /*-----------------------------------------------------------
2707 * Initialize some stuff
2708 *-----------------------------------------------------------*/
2709
2710 MPI_Comm_size(MPI_COMM_WORLD, &num_procs );
2711 MPI_Comm_rank(MPI_COMM_WORLD, &myid );
2712
2713 /*-----------------------------------------------------------
2714 * Set defaults
2715 *-----------------------------------------------------------*/
2716 g_nx = 0;
2717 g_ny = 0;
2718 g_nz = 0;
2719
2720 nx = 10;
2721 ny = 10;
2722 nz = 10;
2723
2724 P = 1;
2725 Q = num_procs;
2726 R = 1;
2727
2728 cx = 1.;
2729 cy = 1.;
2730 cz = 1.;
2731
2732 /*-----------------------------------------------------------
2733 * Parse command line
2734 *-----------------------------------------------------------*/
2735 arg_index = 0;
2736 while (arg_index < argc)
2737 {
2738 if ( strcmp(argv[arg_index], "-n") == 0 )
2739 {
2740 arg_index++;
2741 nx = atoi(argv[arg_index++]);
2742 ny = atoi(argv[arg_index++]);
2743 nz = atoi(argv[arg_index++]);
2744 }
2745 else if ( strcmp(argv[arg_index], "-P") == 0 )
2746 {
2747 arg_index++;
2748 P = atoi(argv[arg_index++]);
2749 Q = atoi(argv[arg_index++]);
2750 R = atoi(argv[arg_index++]);
2751 }
2752 else if ( strcmp(argv[arg_index], "-c") == 0 )
2753 {
2754 arg_index++;
2755 cx = atof(argv[arg_index++]);
2756 cy = atof(argv[arg_index++]);
2757 cz = atof(argv[arg_index++]);
2758 }
2759 else
2760 {
2761 arg_index++;
2762 }
2763 }
2764
2765 g_nx = nx*P;
2766 g_ny = ny*Q;
2767 g_nz = nz*R;
2768
2769 /*-----------------------------------------------------------
2770 * Check a few things
2771 *-----------------------------------------------------------*/
2772
2773 if ((P*Q*R) != num_procs)
2774 {
2775 printf("Error: Invalid number of processors or processor topology\n");
2776 exit(1);
2777 }
2778
2779 /*-----------------------------------------------------------
2780 * Print driver parameters
2781 *-----------------------------------------------------------*/
2782
2783 if (myid == 0)
2784 {
2785 printf(" 3D 7-point Laplace problem on a cube\n");
2786#ifdef HYPRE_LONG_LONG
2787 printf(" (nx_global, ny_global, nz_global) = (%lld, %lld, %lld)\n", g_nx, g_ny, g_nz);
2788#else
2789 printf(" (nx_global, ny_global, nz_global) = (%d, %d, %d)\n", g_nx, g_ny, g_nz);
2790#endif
2791 printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
2792 printf(" (cx, cy, cz) = (%f, %f, %f)\n\n", cx, cy, cz);
2793 }
2794
2795 /*-----------------------------------------------------------
2796 * Set up the grid structure
2797 *-----------------------------------------------------------*/
2798
2799 /* compute p,q,r from P,Q,R and myid */
2800 p = myid % P;
2801 q = (( myid - p)/P) % Q;
2802 r = ( myid - p - P*q)/( P*Q );
2803
2804 /*-----------------------------------------------------------
2805 * Generate the matrix
2806 *-----------------------------------------------------------*/
2807
2808 values = hypre_CTAlloc(double, 4);
2809
2810 values[1] = -cx;
2811 values[2] = -cy;
2812 values[3] = -cz;
2813
2814 values[0] = 0.;
2815 if (nx > 1)
2816 {
2817 values[0] += 2.0*cx;
2818 }
2819 if (ny > 1)
2820 {
2821 values[0] += 2.0*cy;
2822 }
2823 if (nz > 1)
2824 {
2825 values[0] += 2.0*cz;
2826 }
2827
2828 A = (HYPRE_ParCSRMatrix) GenerateLaplacian(MPI_COMM_WORLD,
2829 g_nx, g_ny, g_nz, P, Q, R, p, q, r, values, &rhs, &x);
2830
2831 hypre_TFree(values);
2832
2833 *system_size_ptr = (double)g_nx*(double)g_ny*(double)g_nz;
2834 *A_ptr = A;
2835 *rhs_ptr = rhs;
2836 *x_ptr = x;
2837
2838 return (0);
2839}
2840
2841/*----------------------------------------------------------------------
2842 * Build 27-point laplacian in 3D,
2843 * Parameters given in command line.
2844 *----------------------------------------------------------------------*/
2845
2846int
2847BuildParLaplacian27pt( int argc,
2848 char *argv[],
2849 double *system_size_ptr,
2850 HYPRE_ParCSRMatrix *A_ptr,
2851 HYPRE_ParVector *rhs_ptr,
2852 HYPRE_ParVector *x_ptr)
2853{
2854 int nx, ny, nz;
2855 HYPRE_BigInt g_nx, g_ny, g_nz;
2856 int P, Q, R;
2857
2858 HYPRE_ParCSRMatrix A;
2859 HYPRE_ParVector rhs;
2860 HYPRE_ParVector x;
2861
2862 int num_procs, myid;
2863 int p, q, r;
2864 double *values;
2865 int arg_index;
2866
2867 /*-----------------------------------------------------------
2868 * Initialize some stuff
2869 *-----------------------------------------------------------*/
2870
2871 MPI_Comm_size(MPI_COMM_WORLD, &num_procs );
2872 MPI_Comm_rank(MPI_COMM_WORLD, &myid );
2873
2874 /*-----------------------------------------------------------
2875 * Set defaults
2876 *-----------------------------------------------------------*/
2877 g_nx = 0;
2878 g_ny = 0;
2879 g_nz = 0;
2880
2881 nx = 10;
2882 ny = 10;
2883 nz = 10;
2884
2885 P = 1;
2886 Q = num_procs;
2887 R = 1;
2888
2889 /*-----------------------------------------------------------
2890 * Parse command line
2891 *-----------------------------------------------------------*/
2892 arg_index = 0;
2893 while (arg_index < argc)
2894 {
2895 if ( strcmp(argv[arg_index], "-n") == 0 )
2896 {
2897 arg_index++;
2898 nx = atoi(argv[arg_index++]);
2899 ny = atoi(argv[arg_index++]);
2900 nz = atoi(argv[arg_index++]);
2901 }
2902 else if ( strcmp(argv[arg_index], "-P") == 0 )
2903 {
2904 arg_index++;
2905 P = atoi(argv[arg_index++]);
2906 Q = atoi(argv[arg_index++]);
2907 R = atoi(argv[arg_index++]);
2908 }
2909 else
2910 {
2911 arg_index++;
2912 }
2913 }
2914
2915 g_nx = nx*P;
2916 g_ny = ny*Q;
2917 g_nz = nz*R;
2918
2919 /*-----------------------------------------------------------
2920 * Check a few things
2921 *-----------------------------------------------------------*/
2922
2923 if ((P*Q*R) != num_procs)
2924 {
2925 printf("Error: Invalid number of processors or processor topology\n");
2926 exit(1);
2927 }
2928
2929 /*-----------------------------------------------------------
2930 * Print driver parameters
2931 *-----------------------------------------------------------*/
2932
2933 if (myid == 0)
2934 {
2935 printf(" Laplace type problem with a 27-point stencil \n");
2936#ifdef HYPRE_LONG_LONG
2937 printf(" (nx_global, ny_global, nz_global) = (%lld, %lld, %lld)\n", g_nx, g_ny, g_nz);
2938#else
2939 printf(" (nx_global, ny_global, nz_global) = (%d, %d, %d)\n", g_nx, g_ny, g_nz);
2940#endif
2941 printf(" (Px, Py, Pz) = (%d, %d, %d)\n\n", P, Q, R);
2942 }
2943
2944 /*-----------------------------------------------------------
2945 * Set up the grid structure
2946 *-----------------------------------------------------------*/
2947
2948 /* compute p,q,r from P,Q,R and myid */
2949 p = myid % P;
2950 q = (( myid - p)/P) % Q;
2951 r = ( myid - p - P*q)/( P*Q );
2952
2953 /*-----------------------------------------------------------
2954 * Generate the matrix
2955 *-----------------------------------------------------------*/
2956
2957 values = hypre_CTAlloc(double, 2);
2958
2959 values[0] = 26.0;
2960 if (nx == 1 || ny == 1 || nz == 1)
2961 values[0] = 8.0;
2962 if (nx*ny == 1 || nx*nz == 1 || ny*nz == 1)
2963 values[0] = 2.0;
2964 values[1] = -1.;
2965
2966 A = (HYPRE_ParCSRMatrix) GenerateLaplacian27pt(MPI_COMM_WORLD,
2967 g_nx, g_ny, g_nz, P, Q, R, p, q, r, values, &rhs, &x);
2968
2969 hypre_TFree(values);
2970
2971 /* *system_size_ptr = (double)P*(double)Q*(double)R*
2972 (double)nx*(double)ny*(double)nz; */
2973 *system_size_ptr = (double)g_nx*(double)g_ny*(double)g_nz;
2974 *rhs_ptr = rhs;
2975 *x_ptr = x;
2976 *A_ptr = A;
2977
2978 return (0);
2979}
2980
2981int
2982BuildParVarDifConv( int argc,
2983 char *argv[],
2984 double *system_size_ptr,
2985 HYPRE_ParCSRMatrix *A_ptr ,
2986 HYPRE_ParVector *rhs_ptr,
2987 HYPRE_ParVector *x_ptr)
2988{
2989 int nx, ny, nz;
2990 HYPRE_BigInt g_nx, g_ny, g_nz;
2991 int P, Q, R;
2992
2993 HYPRE_ParCSRMatrix A;
2994 HYPRE_ParVector rhs;
2995 HYPRE_ParVector x;
2996
2997 int num_procs, myid;
2998 int p, q, r;
2999 int arg_index = 0;
3000 double eps = 1;
3001
3002 /*-----------------------------------------------------------
3003 * Initialize some stuff
3004 *-----------------------------------------------------------*/
3005
3006 MPI_Comm_size(MPI_COMM_WORLD, &num_procs );
3007 MPI_Comm_rank(MPI_COMM_WORLD, &myid );
3008
3009 /*-----------------------------------------------------------
3010 * Set defaults
3011 *-----------------------------------------------------------*/
3012 g_nx = 0;
3013 g_ny = 0;
3014 g_nz = 0;
3015
3016 nx = 10;
3017 ny = 10;
3018 nz = 10;
3019 P = 1;
3020 Q = num_procs;
3021 R = 1;
3022
3023 /*-----------------------------------------------------------
3024 * Parse command line
3025 *-----------------------------------------------------------*/
3026 while (arg_index < argc)
3027 {
3028 if ( strcmp(argv[arg_index], "-n") == 0 )
3029 {
3030 arg_index++;
3031 nx = atoi(argv[arg_index++]);
3032 ny = atoi(argv[arg_index++]);
3033 nz = atoi(argv[arg_index++]);
3034 }
3035 else if ( strcmp(argv[arg_index], "-P") == 0 )
3036 {
3037 arg_index++;
3038 P = atoi(argv[arg_index++]);
3039 Q = atoi(argv[arg_index++]);
3040 R = atoi(argv[arg_index++]);
3041 }
3042 else
3043 {
3044 arg_index++;
3045 }
3046 }
3047
3048 g_nx = nx*P;
3049 g_ny = ny*Q;
3050 g_nz = nz*R;
3051
3052 /*-----------------------------------------------------------
3053 * Check a few things
3054 *-----------------------------------------------------------*/
3055
3056 if ((P*Q*R) != num_procs)
3057 {
3058 printf("Error: Invalid number of processors or processor topology\n");
3059 exit(1);
3060 }
3061
3062 /*-----------------------------------------------------------
3063 * Print driver parameters
3064 *-----------------------------------------------------------*/
3065
3066 if (myid == 0)
3067 {
3068 printf(" elliptic PDE on a cube with jumps\n");
3069#ifdef HYPRE_LONG_LONG
3070 printf(" (nx_global, ny_global, nz_global) = (%lld, %lld, %lld)\n", g_nx, g_ny, g_nz);
3071#else
3072 printf(" (nx_global, ny_global, nz_global) = (%d, %d, %d)\n", g_nx, g_ny, g_nz);
3073#endif
3074 printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
3075 }
3076 /*-----------------------------------------------------------
3077 * Set up the grid structure
3078 *-----------------------------------------------------------*/
3079
3080 /* compute p,q,r from P,Q,R and myid */
3081 p = myid % P;
3082 q = (( myid - p)/P) % Q;
3083 r = ( myid - p - P*q)/( P*Q );
3084
3085 /*-----------------------------------------------------------
3086 * Generate the matrix
3087 *-----------------------------------------------------------*/
3088
3089 A = (HYPRE_ParCSRMatrix) GenerateVarDifConv(MPI_COMM_WORLD,
3090 g_nx, g_ny, g_nz, P, Q, R, p, q, r, eps, &rhs, &x);
3091
3092 /* *system_size_ptr = (double)P*(double)Q*(double)R*
3093 (double)nx*(double)ny*(double)nz; */
3094 *system_size_ptr = (double)g_nx*(double)g_ny*(double)g_nz;
3095 *A_ptr = A;
3096 *rhs_ptr = rhs;
3097 *x_ptr = x;
3098
3099 return (0);
3100}
Note: See TracBrowser for help on using the repository browser.