source: CIVL/examples/fortran/nek5000/core/crs_amg.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100755
File size: 39.6 KB
Line 
1#include <stddef.h>
2#include <stdlib.h>
3#include <stdio.h>
4#include <string.h>
5#include <math.h>
6#include <sys/stat.h>
7#include <limits.h>
8#include "gslib.h"
9
10#define crs_setup PREFIXED_NAME(crs_amg_setup)
11#define crs_solve PREFIXED_NAME(crs_amg_solve)
12#define crs_stats PREFIXED_NAME(crs_amg_stats)
13#define crs_free PREFIXED_NAME(crs_amg_free )
14
15#ifndef AMG_BLOCK_ROWS
16# define AMG_BLOCK_ROWS 2400
17#endif
18
19#ifndef AMG_MAX_ROWS
20# define AMG_MAX_ROWS 12000
21#endif
22
23#include "crs_amg_io.h"
24
25static double get_time(void)
26{
27#ifdef GS_TIMING
28 return comm_time();
29#else
30 return 0;
31#endif
32}
33
34static void barrier(const struct comm *c)
35{
36#ifdef GS_BARRIER
37 comm_barrier(c);
38#endif
39}
40
41/* sparse matrix, condensed sparse row */
42struct csr_mat {
43 uint rn, cn, *row_off, *col;
44 double *a;
45};
46
47/* z = alpha y + beta M x */
48static double apply_M(
49 double *z, const double alpha, const double *y,
50 const double beta, const struct csr_mat *const M, const double *x)
51{
52 uint i; const uint rn=M->rn;
53 const uint *const row_off = M->row_off, *col = M->col;
54 const double *a = M->a;
55 const double t0 = get_time();
56 for(i=0;i<rn;++i) {
57 uint j; const uint je=row_off[i+1]; double t = 0;
58 for(j=row_off[i]; j<je; ++j) t += (*a++) * x[*col++];
59 *z++ = alpha*(*y++) + beta*t;
60 }
61 return get_time()-t0;
62}
63
64/* z = M^t x */
65static double apply_Mt(
66 double *const z, const struct csr_mat *const M, const double *x)
67{
68 uint i; const uint rn=M->rn,cn=M->cn;
69 const uint *const row_off = M->row_off, *col = M->col;
70 const double *a = M->a;
71 const double t0 = get_time();
72 for(i=0;i<cn;++i) z[i]=0;
73 for(i=0;i<rn;++i) {
74 uint j; const uint je=row_off[i+1]; const double xi = *x++;
75 for(j=row_off[i]; j<je; ++j) z[*col++] += (*a++) * xi;
76 }
77 return get_time()-t0;
78}
79
80struct Q { uint nloc; struct gs_data *gsh; };
81
82/* ve = Q v */
83static double apply_Q(
84 double *const ve, const struct Q *const Q, const double *const v,
85 const struct comm *comm)
86{
87 double t0;
88 memcpy(ve,v,Q->nloc*sizeof(double));
89 barrier(comm); t0=get_time();
90 gs(ve,gs_double,gs_add,0,Q->gsh,0);
91 return get_time()-t0;
92}
93
94/* z := alpha y + beta Q^t x
95 (x := Q Q^t x as a side effect)
96 */
97static double apply_Qt(
98 double *z, const double alpha, const double *y,
99 const double beta, const struct Q *Q, double *x,
100 const struct comm *comm)
101{
102 double t0,t1;
103 uint i; const uint nloc=Q->nloc;
104 barrier(comm); t0=get_time();
105 gs(x,gs_double,gs_add,1,Q->gsh,0);
106 t1 = get_time() - t0;
107 for(i=0;i<nloc;++i) *z++ = alpha*(*y++) + beta*(*x++);
108 return t1;
109}
110
111struct crs_data {
112 struct comm comm;
113 struct gs_data *gs_top;
114 uint un, *umap; /* number of unique id's on this proc, map to user ids */
115 double tni; /* 1 / (total number of unique ids) ... for computing mean */
116 int null_space;
117 unsigned levels;
118 unsigned *cheb_m; /* cheb_m [levels-1] : smoothing steps */
119 double *cheb_rho; /* cheb_rho[levels-1] : spectral radius of smoother */
120 uint *lvl_offset;
121 double *Dff; /* diagonal smoother, F-relaxation */
122 struct Q *Q_W, *Q_AfP, *Q_Aff;
123 struct csr_mat *W, *AfP, *Aff;
124 double *b, *x, *c, *c_old, *r, *buf;
125 double *timing; uint timing_n;
126};
127
128static void amg_exec(struct crs_data *const amg)
129{
130 unsigned lvl; const unsigned levels=amg->levels;
131 const uint *const off = amg->lvl_offset;
132 double *const b = amg->b, *const x = amg->x;
133 double *c = amg->c, *c_old = amg->c_old, *r = amg->r;
134 double *timing = amg->timing;
135 /* restrict down all levels */
136 for(lvl=0;lvl<levels-1;++lvl,timing+=6) {
137 double *const b_l = b+off[lvl], *const b_lp1 = b+off[lvl+1];
138 /* b_{l+1} += W^t b_l */
139 timing[1]+=apply_Mt(amg->buf, &amg->W[lvl],b_l);
140 timing[0]+=apply_Qt(b_lp1, 1,b_lp1, 1,&amg->Q_W[lvl],amg->buf, &amg->comm);
141 }
142 /* solve bottom equation (1 dof) */
143 { const uint i=off[levels-1];
144 if(off[levels]-i) x[i]=amg->Dff[i]*b[i]; }
145 for(lvl=levels-1;lvl--;) {
146 double *const b_l = b+off[lvl];
147 double *const x_l = x+off[lvl], *const x_lp1 = x+off[lvl+1];
148 const double *const d_l = amg->Dff+off[lvl];
149 const uint n = off[lvl+1]-off[lvl]; uint i;
150 const unsigned m = amg->cheb_m[lvl]; unsigned ci;
151 double alpha, beta, gamma;
152 timing = amg->timing + lvl*6;
153 /* buf = Q x_{l+1} */
154 timing[2]+=apply_Q(amg->buf, &amg->Q_AfP[lvl],x_lp1, &amg->comm);
155 /* x_l = W x_{l+1} */
156 timing[3]+=apply_M(x_l, 0,b_l, 1,&amg->W [lvl], amg->buf);
157 /* b_l -= AfP x_{l+1} */
158 timing[3]+=apply_M(b_l, 1,b_l, -1,&amg->AfP[lvl], amg->buf);
159 /* c_1 = Dff b_l */
160 for(i=0;i<n;++i) c[i]=d_l[i]*b_l[i];
161 if(m>1) {
162 alpha = amg->cheb_rho[lvl]/2, alpha*=alpha;
163 gamma = 2*alpha/(1-2*alpha), beta = 1 + gamma;
164 /* r_1 = b_l - Aff c_1 */
165 timing[4]+=apply_Q(amg->buf, &amg->Q_Aff[lvl],c, &amg->comm);
166 timing[5]+=apply_M(r, 1,b_l, -1,&amg->Aff[lvl],amg->buf);
167 /* c_2 = (1+gamma)(c_1 + Dff r_1) */
168 { double *const temp = c; c=c_old,c_old=temp; }
169 for(i=0;i<n;++i) c[i] = beta*(c_old[i]+d_l[i]*r[i]);
170 }
171 for(ci=3;ci<=m;++ci) {
172 gamma = alpha*beta, gamma = gamma/(1-gamma), beta = 1 + gamma;
173 /* r_i = b_l - Aff c_i */
174 timing[4]+=apply_Q(amg->buf, &amg->Q_Aff[lvl],c, &amg->comm);
175 timing[5]+=apply_M(r, 1,b_l, -1,&amg->Aff[lvl],amg->buf);
176 /* c_{i+1} = (1+gamma)*(c_i+D r_i) - gamma c_{i-1} */
177 { double *const temp = c; c=c_old,c_old=temp; }
178 for(i=0;i<n;++i) c[i] = beta*(c_old[i]+d_l[i]*r[i]) - gamma*c[i];
179 }
180 for(i=0;i<n;++i) x_l[i]+=c[i];
181 }
182 amg->timing_n++;
183}
184
185void crs_solve(double *x, struct crs_data *data, double *b)
186{
187 uint i; const uint un = data->un; const uint *const umap = data->umap;
188 double *const ub = data->b, *const ux = data->x;
189
190 gs(b, gs_double,gs_add, 1, data->gs_top, 0);
191 for(i=0;i<un;++i) ub[i]=b[umap[i]];
192
193 amg_exec(data);
194
195 if(data->null_space) {
196 const double avg = data->tni*comm_reduce_double(&data->comm, gs_add, ux,un);
197 for(i=0;i<un;++i) ux[i] -= avg;
198 }
199
200 for(i=0;i<un;++i) x[umap[i]]=ux[i];
201 gs(x, gs_double,gs_add, 0, data->gs_top, 0);
202}
203
204void crs_stats(const struct crs_data *const data)
205{
206 const unsigned lm1 = data->levels-1;
207 double *avg = tmalloc(double, 2*6*lm1);
208 double ni = 1/((double)data->timing_n * data->comm.np);
209 uint i;
210 for(i=0;i<6*lm1;++i) avg[i] = ni*data->timing[i];
211 comm_allreduce(&data->comm,gs_double,gs_add, avg,6*lm1, avg+6*lm1);
212 if(data->comm.id==0) {
213 double *t = avg; unsigned lvl;
214 printf("AMG stats:\n");
215 for(lvl=0;lvl<lm1;++lvl,t+=6)
216 printf(" [lvl %02u] comm: Wt=%0.3e W,AfP=%0.3e Aff=%0.3e\n"
217 " [lvl %02u] work: Wt=%0.3e W,AfP=%0.3e Aff=%0.3e\n",
218 lvl, t[0],t[2],t[4], lvl, t[1],t[3],t[5]);
219 }
220 free(avg);
221}
222
223/*==========================================================================
224
225 Setup
226
227 ==========================================================================*/
228
229
230/* remote id - designates the i-th uknown owned by proc p */
231struct rid { uint p,i; };
232
233/* the data for each id to be read from amg.dat */
234struct id_data {
235 double D;
236 ulong id;
237 unsigned level;
238};
239
240/* global non-zero (matrix entry) */
241struct gnz { ulong i,j; double a; };
242
243/*
244 The user ids id[n] are assigned to unique procs.
245
246 Output
247 uid --- ulong array; uid[0 ... uid.n-1] are the ids owned by this proc
248 rid_map --- id[i] is uid[ rid_map[i].i ] on proc rid_map[i].p
249 map = assign_dofs(...) --- uid[i] is id[map[i]]
250
251 map is returned only when rid_map is null, and vice versa
252*/
253static uint *assign_dofs(struct array *const uid,
254 struct rid *const rid_map,
255 const ulong *const id, const uint n,
256 const uint p,
257 struct gs_data *gs_top, buffer *const buf)
258{
259 uint i,count, *map=0;
260 struct rid *const rid = rid_map?rid_map:tmalloc(struct rid,n);
261 for(i=0;i<n;++i) rid[i].p=p, rid[i].i=i;
262 gs_vec(n?&rid[0].p:0,2, gs_sint,gs_add,0, gs_top,buf);
263 for(count=0,i=0;i<n;++i) {
264 if(rid[i].i==i && id[i]!=0 && rid[i].p==p) ++count; }
265 array_init(ulong,uid,count); uid->n=count;
266 if(rid_map==0) {
267 map=tmalloc(uint,count);
268 for(count=0,i=0;i<n;++i) {
269 if(rid[i].i==i && id[i]!=0 && rid[i].p==p) map[count++]=i; }
270 }
271 { ulong *const uid_p = uid->ptr;
272 for(count=0,i=0;i<n;++i) if(rid[i].i==i && id[i]!=0 && rid[i].p==p)
273 uid_p[count]=id[i], rid[i].i=count, ++count;
274 }
275 if(rid_map) gs_vec(n?&rid[0].p:0,2, gs_sint,gs_add,0, gs_top,buf);
276 else free(rid);
277 return map;
278}
279
280static void localize_rows(struct gnz *p, uint nz,
281 const ulong *const uid, const uint *const id_perm)
282{
283 uint i=0; while(nz--) {
284 const ulong id=p->i;
285 while(uid[i]<id) ++i;
286 (p++)->i=id_perm[i];
287 }
288}
289
290static void find_mat_offs(uint *const off, const int levels,
291 const struct gnz *const p, const uint nz,
292 const struct id_data *const id)
293{
294 int lvl=-1; uint i; for(i=0;i<nz;++i) {
295 const int lvl_i = id[p[i].i].level;
296 while(lvl<lvl_i) off[++lvl]=i;
297 }
298 while(lvl<levels-1) off[++lvl]=i;
299}
300
301static void compress_mat(
302 uint *const row_off, const int rn, uint *const col, double *const a,
303 const struct gnz *const p, const uint nz)
304{
305 int r = -1; uint k;
306 for(r=-1, k=0;k<nz;++k) {
307 const int row = p[k].i;
308 while(r<row) row_off[++r]=k;
309 col[k] = p[k].j;
310 a[k] = p[k].a;
311 }
312 while(r<rn) row_off[++r]=k;
313}
314
315static void amg_setup_mat(
316 struct array *ids, const uint nloc, struct csr_mat *const mat,
317 const ulong *uid, const uint uid_n, const uint *id_perm,
318 const uint i0, const uint j0, struct gnz *const p, const uint nz,
319 buffer *buf)
320{
321 slong *id = ids->ptr; ulong last_id = ULONG_MAX;
322 const uint ne = ids->n;
323 uint k,i=0,ie=nloc;
324 sarray_sort(struct gnz,p,nz, j,1, buf); /* sort by col */
325 for(k=0;k<nz;++k) {
326 const ulong j_id = p[k].j;
327 p[k].i = p[k].i - i0;
328 if(j_id == last_id) { p[k].j = p[k-1].j; continue; }
329 last_id = j_id;
330 /* check local ids */
331 while(i<uid_n && uid[i]<j_id) ++i;
332 if(i<uid_n && uid[i]==j_id) { p[k].j=id_perm[i]-j0; continue; }
333 /* check old group of remote ids */
334 while(ie<ne && (ulong)(-id[ie])<j_id) ++ie;
335 if(ie<ne && (ulong)(-id[ie])==j_id) { p[k].j=ie; continue; }
336 /* not found, add to new group of remote ids */
337 id = array_reserve(slong, ids, ids->n+1);
338 id[ids->n] = -(slong)j_id, p[k].j = ids->n++;
339 }
340 sarray_sort_2(struct gnz,p,nz, i,1, j,1, buf);
341
342 mat->cn = ids->n;
343 compress_mat(mat->row_off,mat->rn, mat->col,mat->a, p,nz);
344}
345
346static uint amg_setup_mats(
347 struct crs_data *const data,
348 const ulong *const uid, const uint uid_n,
349 const uint *const id_perm,
350 const struct id_data *const id,
351 struct array *mat,
352 buffer *buf)
353{
354 const uint *const off = data->lvl_offset;
355 struct array ide = null_array; uint max_e=0;
356 const unsigned levels=data->levels;
357 unsigned lvl, m;
358 uint *mat_off[3]; struct csr_mat *csr_mat[3];
359 data->Q_W = tmalloc(struct Q, (levels-1)*3);
360 data->Q_AfP = data->Q_W + (levels-1);
361 data->Q_Aff = data->Q_AfP + (levels-1);
362 csr_mat[0] = data->W = tmalloc(struct csr_mat, (levels-1)*3);
363 csr_mat[1] = data->AfP = data->W + (levels-1);
364 csr_mat[2] = data->Aff = data->AfP + (levels-1);
365 mat_off[0] = tmalloc(uint, levels*3);
366 mat_off[1] = mat_off[0]+levels;
367 mat_off[2] = mat_off[1]+levels;
368 for(m=0;m<3;++m) {
369 /* change row from uid to local index */
370 localize_rows(mat[m].ptr,mat[m].n, uid,id_perm);
371 /* sort by row */
372 sarray_sort(struct gnz,mat[m].ptr,mat[m].n, i,1, buf);
373 /* find offsets of each level */
374 find_mat_offs(mat_off[m],levels, mat[m].ptr,mat[m].n, id);
375 }
376 /* allocate CSR arrays */
377 if(levels>1) {
378 uint *ui = tmalloc(uint, 3*((off[levels-1]-off[0])+(levels-1))
379 +mat[0].n+mat[1].n+mat[2].n);
380 double *a = tmalloc(double, mat[0].n+mat[1].n+mat[2].n);
381 for(m=0;m<3;++m) for(lvl=0;lvl<levels-1;++lvl) {
382 const uint rn = off[lvl+1]-off[lvl];
383 const uint nz = mat_off[m][lvl+1]-mat_off[m][lvl];
384 csr_mat[m][lvl].rn = rn;
385 csr_mat[m][lvl].row_off = ui, ui += rn+1;
386 csr_mat[m][lvl].col = ui, ui += nz;
387 csr_mat[m][lvl].a = a, a += nz;
388 }
389 }
390 for(lvl=0;lvl<levels-1;++lvl) {
391 uint i; const uint j0=off[lvl+1], nloc = off[levels]-j0;
392 slong *p = array_reserve(slong, &ide, nloc);
393 for(i=0;i<nloc;++i) p[i] = id[i+j0].id;
394 data->Q_W[lvl].nloc = data->Q_AfP[lvl].nloc = ide.n = nloc;
395 amg_setup_mat(&ide,nloc, &data->W[lvl],
396 uid, uid_n, id_perm, off[lvl],j0,
397 (struct gnz*)mat[0].ptr + mat_off[0][lvl],
398 mat_off[0][lvl+1]-mat_off[0][lvl], buf);
399 data->Q_W[lvl].gsh = gs_setup(ide.ptr,ide.n, &data->comm, 0,gs_auto,1);
400 amg_setup_mat(&ide,nloc, &data->AfP[lvl],
401 uid, uid_n, id_perm, off[lvl],j0,
402 (struct gnz*)mat[1].ptr + mat_off[1][lvl],
403 mat_off[1][lvl+1]-mat_off[1][lvl], buf);
404 data->Q_AfP[lvl].gsh = gs_setup(ide.ptr,ide.n, &data->comm, 0,gs_auto,1);
405 if(ide.n>max_e) max_e=ide.n;
406 }
407 for(lvl=0;lvl<levels-1;++lvl) {
408 uint i; const uint j0=off[lvl], nloc = off[lvl+1]-j0;
409 slong *p = array_reserve(slong, &ide, nloc);
410 for(i=0;i<nloc;++i) p[i] = id[i+j0].id;
411 data->Q_Aff[lvl].nloc = ide.n = nloc;
412 amg_setup_mat(&ide,nloc, &data->Aff[lvl],
413 uid, uid_n, id_perm, j0,j0,
414 (struct gnz*)mat[2].ptr + mat_off[2][lvl],
415 mat_off[2][lvl+1]-mat_off[2][lvl], buf);
416 data->Q_Aff[lvl].gsh = gs_setup(ide.ptr,ide.n, &data->comm, 0,gs_auto,1);
417 if(ide.n>max_e) max_e=ide.n;
418 }
419 free(mat_off[0]);
420 array_free(&ide);
421 return max_e;
422}
423
424static uint *compute_offsets(
425 const struct id_data *const id, const uint n,
426 const int levels)
427{
428 uint i, *off = tmalloc(uint, levels+1);
429 int lvl = -1;
430 for(i=0;i<n;++i) {
431 const int lvl_i = id[i].level;
432 while(lvl<lvl_i) off[++lvl]=i;
433 }
434 while(lvl<levels) off[++lvl]=i;
435 return off;
436}
437
438static void read_data(
439 struct crs_data *const data,
440 struct array *ids, struct array mats[3],
441 struct crystal *const cr,
442 const ulong *uid, const uint uid_n, const char *datafname);
443
444static void read_data_mpiio(
445 struct crs_data *const data,
446 struct array *ids, struct array mats[3],
447 struct crystal *const cr,
448 const ulong *uid, const uint uid_n, const char *datafname);
449
450static void amg_setup_aux(struct crs_data *data, uint n, const ulong *id, const char *datafname)
451{
452 struct crystal cr;
453 struct array uid; uint *id_perm;
454 struct array ids, mat[3];
455 uint max_e; ulong temp_long;
456
457 crystal_init(&cr, &data->comm);
458 data->umap = assign_dofs(&uid,0, id,n,data->comm.id,data->gs_top,&cr.data);
459 data->un = uid.n;
460
461 sortp_long(&cr.data,0, uid.ptr,uid.n,sizeof(ulong));
462 sarray_permute(ulong,uid.ptr ,uid.n, cr.data.ptr, &temp_long);
463 sarray_permute(uint ,data->umap,uid.n, cr.data.ptr, &max_e);
464
465#ifdef USEMPIIO
466 read_data_mpiio(data, &ids, mat, &cr, uid.ptr,uid.n, datafname);
467#else
468 read_data(data, &ids, mat, &cr, uid.ptr,uid.n, datafname);
469#endif
470
471 /* we should have data for every uid;
472 if not, then the data is for a smaller problem than we were given */
473 { int not_happy = ids.n==uid.n ? 0 : 1;
474 if(comm_reduce_int(&data->comm, gs_max, &not_happy,1)) {
475 comm_barrier(&data->comm);
476 if(data->comm.id==0)
477 fail(1,__FILE__,__LINE__,"AMG: missing data for some rows");
478 else die(1);
479 }
480 }
481
482 sarray_sort(struct id_data,ids.ptr,ids.n, id,1, &cr.data);
483 sarray_sort(struct id_data,ids.ptr,ids.n, level,0, &cr.data);
484 sarray_permute(uint,data->umap,ids.n, cr.data.ptr, &max_e);
485 id_perm = tmalloc(uint, uid.n);
486 sarray_perm_invert(id_perm, cr.data.ptr, uid.n);
487 /* the global id uid[i] will have local index id_perm[i]
488 (the local storage is sorted by level) */
489
490 data->lvl_offset = compute_offsets(ids.ptr,ids.n, data->levels);
491
492 max_e = amg_setup_mats(data,uid.ptr,uid.n,id_perm,ids.ptr,mat, &cr.data);
493
494 { const unsigned levels=data->levels;
495 const uint *const off = data->lvl_offset;
496 struct id_data *const id = ids.ptr; const uint n = ids.n;
497 double *d;
498 uint i,max_f=0;
499 for(i=0;i<levels-1;++i) {
500 const uint nf = off[i+1]-off[i];
501 if(nf>max_f) max_f=nf;
502 }
503 d = data->Dff = tmalloc(double, 3*n + 3*max_f + max_e + 6*(levels-1));
504 data->x = data->Dff + n;
505 data->b = data->x + n;
506 data->c = data->b + n;
507 data->c_old = data->c + max_f;
508 data->r = data->c_old + max_f;
509 data->buf = data->r + max_f;
510 data->timing = data->buf + max_e;
511 for(i=0;i<n;++i) d[i]=id[i].D;
512 for(i=0;i<6*(levels-1);++i) data->timing[i]=0;
513 data->timing_n=0;
514 }
515
516 free(id_perm);
517 array_free(&ids);
518 array_free(&mat[0]);
519 array_free(&mat[1]);
520 array_free(&mat[2]);
521 array_free(&uid);
522 crystal_free(&cr);
523}
524
525static void amg_dump(
526 uint n, const ulong *id,
527 uint nz, const uint *Ai, const uint *Aj, const double *A,
528 struct crs_data *data, const char *datafname);
529
530struct crs_data *crs_setup(
531 uint n, const ulong *id,
532 uint nz, const uint *Ai, const uint *Aj, const double *A,
533 uint null_space, const struct comm *comm, const char *datafname, uint *ierr)
534{
535 struct crs_data *data = tmalloc(struct crs_data,1);
536 struct stat info;
537 int dump;
538
539 comm_dup(&data->comm,comm);
540
541 dump = 0;
542 if(data->comm.id==0) {
543 char str1[132];
544 char str2[132];
545 char str3[132];
546 char str4[132];
547 sprintf(str1,"%s.amg.dat",datafname);
548 sprintf(str2,"%s.amg_W.dat",datafname);
549 sprintf(str3,"%s.amg_AfP.dat",datafname);
550 sprintf(str4,"%s.amg_Aff.dat",datafname);
551 if(stat(str1,&info)!=0) dump++;
552 if(stat(str2,&info)!=0) dump++;
553 if(stat(str3,&info)!=0) dump++;
554 if(stat(str4,&info)!=0) dump++;
555 if(dump!=0) printf("AMG files not found, dumping data ...\n"), fflush(stdout);
556 }
557 comm_bcast(&data->comm,&dump,sizeof(int),0);
558
559 data->gs_top = gs_setup((const slong*)id,n, &data->comm, 1,
560 dump?gs_crystal_router:gs_auto, !dump);
561
562 if(dump) {
563 amg_dump(n,id,nz,Ai,Aj,A,data,datafname);
564 gs_free(data->gs_top);
565
566 if(data->comm.id==0) {
567 printf("AMG dump successful. Run AMG setup tool to continue.\n");
568 fflush(stdout);
569 }
570 comm_barrier(&data->comm);
571 comm_free(&data->comm);
572 free(data);
573 *ierr=1;
574 } else {
575 data->null_space = null_space;
576 amg_setup_aux(data, n,id, datafname);
577 *ierr=0;
578 }
579 return data;
580}
581
582void crs_free(struct crs_data *data)
583{
584 const unsigned levels = data->levels;
585
586 free(data->Dff);
587
588 if(levels>1) {
589 unsigned lvl;
590 for(lvl=0;lvl<levels-1;++lvl)
591 gs_free(data->Q_Aff[lvl].gsh),
592 gs_free(data->Q_AfP[lvl].gsh),
593 gs_free(data->Q_W[lvl].gsh);
594 free(data->W[0].a);
595 free(data->W[0].row_off);
596 }
597
598 free(data->W);
599 free(data->Q_W);
600
601 free(data->lvl_offset);
602
603 free(data->cheb_m);
604 free(data->cheb_rho);
605
606 free(data->umap);
607 gs_free(data->gs_top);
608 comm_free(&data->comm);
609
610 free(data);
611}
612
613/*==========================================================================
614
615 Find ID
616
617 As proc 0 reads in the files, it needs to know where to send the data.
618 The find_id function below takes a sorted list of id's (with no repeats)
619 and outputs the corresponding list of owning procs.
620
621 ==========================================================================*/
622
623struct find_id_map { ulong id; uint p; };
624
625struct find_id_data {
626 struct array map;
627 struct array work;
628 struct crystal *cr;
629};
630
631static void find_id_setup(
632 struct find_id_data *const data,
633 const ulong *id, uint n,
634 struct crystal *const cr)
635{
636 const uint np = cr->comm.np;
637 uint i; struct find_id_map *q;
638
639 data->cr = cr;
640 data->work.ptr=0, data->work.n=0, data->work.max=0;
641 array_init(struct find_id_map, &data->map, n);
642 data->map.n=n;
643 for(q=data->map.ptr,i=0;i<n;++i)
644 q[i].id = id[i], q[i].p = id[i] % np;
645 sarray_transfer(struct find_id_map,&data->map,p,1,cr);
646 sarray_sort(struct find_id_map,data->map.ptr,data->map.n, id,1, &cr->data);
647}
648
649static void find_id_free(struct find_id_data *const data)
650{
651 array_free(&data->map);
652 array_free(&data->work);
653}
654
655struct find_id_work { ulong id; uint p; uint wp; };
656
657static int find_id(
658 uint *p_out, const unsigned p_stride,
659 struct find_id_data *const data,
660 const ulong *id, const unsigned id_stride, const uint n)
661{
662 struct find_id_work *p, *p_end;
663 const struct find_id_map *q=data->map.ptr, *const q_end = q+data->map.n;
664 const uint np = data->cr->comm.np;
665 int not_found = 0;
666
667 uint nn;
668 p = array_reserve(struct find_id_work, &data->work, n);
669 for(nn=n;nn;--nn,id=(const ulong*)((const char*)id+id_stride))
670 p->id=*id, p->p=0, p->wp = (*id)%np, ++p;
671 data->work.n=n;
672
673 /* send to work proc */
674 sarray_transfer(struct find_id_work,&data->work,wp,1,data->cr);
675
676 /* match id's with rid's */
677 sarray_sort(struct find_id_work,data->work.ptr,data->work.n, id,1,
678 &data->cr->data);
679 for(p=data->work.ptr,p_end=p+data->work.n;p!=p_end;++p) {
680 while(q!=q_end && q->id<p->id) ++q;
681 if(q==q_end) break;
682 if(q->id!=p->id) not_found=1,p->p=UINT_MAX;
683 else p->p=q->p;
684 }
685 for(;p!=p_end;++p) not_found=1,p->p=UINT_MAX;
686
687 /* send back */
688 sarray_transfer(struct find_id_work,&data->work,wp,0,data->cr);
689
690 /* map back to user data */
691 sarray_sort(struct find_id_work,data->work.ptr,data->work.n, id,1,
692 &data->cr->data);
693 p = data->work.ptr;
694 for(nn=n;nn;--nn,p_out=(uint*)((char*)p_out+p_stride))
695 *p_out = p->p, ++p;
696
697 return comm_reduce_int(&data->cr->comm, gs_max, &not_found,1);
698}
699
700
701/*==========================================================================
702
703 Read amg.dat, amg_*.dat
704
705 The function read_data is responsible for reading these files,
706 and creating arrays
707 ids of struct id_data
708 mat[3] of struct gnz (W, AfP, Aff)
709 distributed to the appropriate procs
710
711 ==========================================================================*/
712
713static ulong read_level_data(struct crs_data *data, struct file f)
714{
715 unsigned i,n; ulong tn; double *buf;
716 int updt=1;
717 int code=0;
718 if(data->comm.id==0) {
719 double hdr; dread(&hdr,1,f,&code);
720 if(code==0) {
721 printf("AMG version %3.2f\n",hdr);
722 if(hdr!=2.01) {
723 printf("Outdates AMG dat files\n");
724 updt=0;
725 }
726 double t; dread(&t,1,f,&code);
727 data->levels = t;
728 printf("AMG: %u levels\n", data->levels);
729 }
730 }
731 comm_bcast(&data->comm, &data->levels,sizeof(unsigned), 0);
732 comm_bcast(&data->comm, &updt,sizeof(int), 0);
733 comm_bcast(&data->comm, &code,sizeof(int), 0);
734 if(!updt||code!=0) die(1);
735
736 n = data->levels-1;
737 data->cheb_m = tmalloc(unsigned,n);
738 data->cheb_rho = tmalloc(double ,n);
739
740 buf = tmalloc(double, 2*n+1);
741 if(data->comm.id==0) dread(buf,2*n+1,f,&code);
742 comm_bcast(&data->comm, &code,sizeof(int), 0);
743 if(code!=0) die(1);
744
745 comm_bcast(&data->comm, buf,(2*n+1)*sizeof(double), 0);
746 for(i=0;i<n;++i) data->cheb_m[i] = buf[i];
747 for(i=0;i<n;++i) data->cheb_rho[i] = buf[n+i];
748 tn = buf[2*n];
749 data->tni = 1/(double)tn;
750 free(buf);
751
752 if(data->comm.id==0) {
753 printf("AMG Chebyshev smoother data:\n");
754 for(i=0;i<n;++i)
755 printf("AMG level %u: %u iterations with rho = %g\n",
756 i+1, data->cheb_m[i], (double)data->cheb_rho[i]);
757 printf("AMG: %lu rows\n", (unsigned long)tn);
758 }
759
760 return tn;
761}
762
763static void read_data(
764 struct crs_data *const data,
765 struct array *ids, struct array mat[3],
766 struct crystal *const cr,
767 const ulong *uid, const uint uid_n, const char *datafname)
768{
769 int code;
770 struct find_id_data fid;
771 const uint pid = data->comm.id;
772 ulong r,tn;
773 struct array read_buffer=null_array;
774 struct array id_buffer = null_array, mat_buffer = null_array;
775 uint *row_lens=0, *id_proc=0, *id_perm=0;
776 struct array mat_proc = null_array;
777 struct file f={0,0}, fm[3]={{0,0},{0,0},{0,0}};
778 unsigned m;
779 ids->ptr=0, ids->n=0, ids->max=0;
780 for(m=0;m<3;++m) mat[m].ptr=0,mat[m].n=0,mat[m].max=0;
781 find_id_setup(&fid, uid,uid_n, cr);
782 code=0;
783 if(pid==0) {
784 char str1[132];
785 char str2[132];
786 char str3[132];
787 char str4[132];
788 sprintf(str1,"%s.amg.dat",datafname);
789 sprintf(str2,"%s.amg_W.dat",datafname);
790 sprintf(str3,"%s.amg_AfP.dat",datafname);
791 sprintf(str4,"%s.amg_Aff.dat",datafname);
792 f = dopen(str1,'r',&code);
793 fm[0] = dopen(str2,'r',&code);
794 fm[1] = dopen(str3,'r',&code);
795 fm[2] = dopen(str4,'r',&code);
796 if(code==0) {
797 array_init(double, &read_buffer, 6*AMG_BLOCK_ROWS);
798 row_lens = tmalloc(uint, 5*AMG_BLOCK_ROWS);
799 id_proc = row_lens + 3*AMG_BLOCK_ROWS;
800 id_perm = id_proc + AMG_BLOCK_ROWS;
801 array_reserve(struct id_data, &id_buffer, AMG_BLOCK_ROWS);
802 }
803 }
804 comm_bcast(&data->comm,&code,sizeof(int),0);
805 if(code!=0) die(1);
806
807 tn = read_level_data(data,f);
808 for(r=0;r<tn;r+=AMG_BLOCK_ROWS) {
809 unsigned nr = tn-r>AMG_BLOCK_ROWS ? AMG_BLOCK_ROWS : (unsigned)(tn-r);
810 uint mat_size[3]={0,0,0};
811
812 /* read id data */
813 if(pid==0) {
814 unsigned i;
815 struct id_data *idp = id_buffer.ptr;
816 double *b = read_buffer.ptr;
817 dread(b,nr*6, f,&code);
818 if(code==0) {
819 printf("AMG: reading through row %lu, pass %u/%u\n",
820 (unsigned long)b[(nr-1)*6],
821 (unsigned)(r/AMG_BLOCK_ROWS+1),
822 (unsigned)((tn+AMG_BLOCK_ROWS-1)/AMG_BLOCK_ROWS));
823 for(i=0;i<nr;++i) {
824 idp[i].id = *b++;
825 idp[i].level = (uint)(*b++) - 1;
826 mat_size[0] += row_lens[3*i+0] = *b++; /* W row length */
827 mat_size[1] += row_lens[3*i+1] = *b++; /* AfP row length */
828 mat_size[2] += row_lens[3*i+2] = *b++; /* Aff row length */
829 idp[i].D = *b++;
830 }
831 id_buffer.n=nr;
832
833 /* sort id_buffer, remember how to undo */
834 sarray_sort(struct id_data,idp,nr, id,1, &cr->data);
835 sarray_perm_invert(id_perm, cr->data.ptr, nr);
836 }
837 } else
838 id_buffer.n=0;
839
840 comm_bcast(&data->comm,&code,sizeof(int),0);
841 if(code!=0)die(1);
842
843 /* find who owns each row */
844 if(find_id(id_proc,sizeof(uint), &fid,
845 (const ulong*)((const char*)id_buffer.ptr+offsetof(struct id_data,id)),
846 sizeof(struct id_data), id_buffer.n)) {
847 if(pid==0)
848 fail(1,__FILE__,__LINE__,"AMG: data has more rows than given problem");
849 else die(1);
850 }
851 if(pid==0) { /* undo sorting of id_buffer */
852 buffer_reserve(&cr->data,sizeof(struct id_data)+sizeof(uint));
853 sarray_permute(struct id_data,id_buffer.ptr,nr, id_perm, cr->data.ptr);
854 sarray_permute(uint, id_proc, nr, id_perm, cr->data.ptr);
855 }
856 /* read matrix data */
857 for(m=0;m<3;++m) {
858 if(pid==0) {
859 const struct id_data *const idp = id_buffer.ptr;
860 unsigned i; uint j,rl;
861 struct gnz *p = array_reserve(struct gnz, &mat_buffer, mat_size[m]);
862 double *b = array_reserve(double, &read_buffer, 2*mat_size[m]);
863 uint *proc = array_reserve(uint, &mat_proc, mat_size[m]);
864 dread(b,2*mat_size[m], fm[m],&code);
865 if(code==0) {
866 for(i=0;i<nr;++i) {
867 const ulong i_id = idp[i].id;
868 const uint i_p = id_proc[i];
869 for(rl=row_lens[3*i+m],j=0;j<rl;++j)
870 p->i = i_id, p->j = *b++, p->a = *b++, *proc++ = i_p, ++p;
871 }
872 mat_buffer.n = mat_size[m];
873 }
874 } else
875 mat_buffer.n = 0;
876 comm_bcast(&data->comm,&code,sizeof(int),0);
877 if(code!=0)die(1);
878 sarray_transfer_ext(struct gnz,&mat_buffer,mat_proc.ptr,sizeof(uint),cr);
879 array_cat(struct gnz,&mat[m],mat_buffer.ptr,mat_buffer.n);
880 }
881 /* send id_data to owner */
882 sarray_transfer_ext(struct id_data,&id_buffer,id_proc,sizeof(uint),cr);
883 array_cat(struct id_data,ids,id_buffer.ptr,id_buffer.n);
884 }
885 array_free(&id_buffer);
886 array_free(&mat_buffer);
887 if(pid==0) {
888 array_free(&read_buffer);
889 free(row_lens);
890 array_free(&mat_proc);
891 dclose(fm[2]);
892 dclose(fm[1]);
893 dclose(fm[0]);
894 dclose(f);
895 }
896 find_id_free(&fid);
897}
898
899#ifdef USEMPIIO
900static void read_data_mpiio(struct crs_data *const data,
901 struct array *ids, struct array mat[3],
902 struct crystal *const cr,
903 const ulong *uid, const uint uid_n, const char *datafname)
904{
905 int code;
906 struct find_id_data fid;
907 const uint pid = data->comm.id;
908 struct array read_buffer=null_array;
909 struct array id_buffer = null_array, mat_buffer = null_array;
910 uint *row_lens=0, *id_proc=0, *id_perm=0;
911 struct array mat_proc = null_array;
912 ulong scan_in, scan_buf[2];
913 uint mat_size[3] = {0,0,0};
914 unsigned i,m;
915
916 ids->ptr = 0;
917 ids->n = 0;
918 ids->max = 0;
919 for(m = 0; m < 3; ++m) {
920 mat[m].ptr = 0;
921 mat[m].n = 0;
922 mat[m].max = 0;
923 }
924 find_id_setup(&fid, uid,uid_n, cr);
925 code = 0;
926
927 struct file f = {0,0};
928 f = dopen("amg.dat", 'r', &code);
929 if(code != 0) die(1);
930 ulong tn = read_level_data(data, f);
931 dclose(f);
932
933 if(pid==0)
934 printf("AMG: preading through rows ... ");
935
936 struct sfile fs = {0,0};
937 struct sfile fsm[3] = {{0,0},{0,0},{0,0}};
938 char str1[132];
939 char str2[132];
940 char str3[132];
941 char str4[132];
942 sprintf(str1,"%s.amg.dat",datafname);
943 sprintf(str2,"%s.amg_W.dat",datafname);
944 sprintf(str3,"%s.amg_AfP.dat",datafname);
945 sprintf(str4,"%s.amg_Aff.dat",datafname);
946 fs = dopen_mpi(str1,'r', &data->comm, &code);
947 fsm[0] = dopen_mpi(str2,'r', &data->comm, &code);
948 fsm[1] = dopen_mpi(str3,'r', &data->comm, &code);
949 fsm[2] = dopen_mpi(str4,'r', &data->comm, &code);
950 code = comm_reduce_int(&data->comm, gs_max, &code, 1);
951 if(code != 0)
952 die(1);
953
954 array_init(double, &read_buffer, 6*AMG_MAX_ROWS);
955 row_lens = tmalloc(uint, 5*AMG_MAX_ROWS);
956 id_proc = row_lens + 3*AMG_MAX_ROWS;
957 id_perm = id_proc + AMG_MAX_ROWS;
958 array_reserve(struct id_data, &id_buffer, AMG_MAX_ROWS);
959
960 /* assign rows to proc */
961 unsigned nr = tn/data->comm.np;
962 for(i = 0; i < tn%data->comm.np;++i)
963 if(i == pid) nr++;
964
965 if(nr>AMG_MAX_ROWS) {
966 if(pid==0)
967 fail(1,__FILE__,__LINE__,"AMG: AMG_MAX_ROWS too small!");
968 die(1);
969 }
970
971 /* read id data */
972 ulong nr_off[2];
973 scan_in = (ulong)nr;
974 comm_scan(nr_off, &data->comm, gs_long, gs_add, &scan_in, 1, scan_buf);
975 const ulong off0 = 1+1+1+2*(data->levels-1)+1;
976 dview_mpi(off0+nr_off[0]*6, fs, &code);
977 code = comm_reduce_int(&data->comm, gs_max, &code,1);
978 if(code != 0)
979 die(1);
980
981 double *b = read_buffer.ptr;
982 dread_mpi(b, nr*6, fs, &code);
983 code = comm_reduce_int(&data->comm, gs_max, &code,1);
984 if (code != 0)
985 die(1);
986
987 struct id_data *idp = id_buffer.ptr;
988 for (i = 0; i < nr; ++i) {
989 idp[i].id = *b++;
990 idp[i].level = (uint)(*b++) - 1;
991 mat_size[0] += row_lens[3*i+0] = *b++; /* W row length */
992 mat_size[1] += row_lens[3*i+1] = *b++; /* AfP row length */
993 mat_size[2] += row_lens[3*i+2] = *b++; /* Aff row length */
994 idp[i].D = *b++;
995 }
996 id_buffer.n = nr;
997
998 /* sort id_buffer, remember how to undo */
999 sarray_sort(struct id_data,idp,nr, id,1, &cr->data);
1000 sarray_perm_invert(id_perm, cr->data.ptr, nr);
1001 if (find_id(id_proc,sizeof(uint), &fid,
1002 (const ulong*)((const char*)id_buffer.ptr+offsetof(struct id_data,id)),
1003 sizeof(struct id_data), id_buffer.n))
1004 code = 1;
1005 if (comm_reduce_int(&data->comm, gs_max, &code, 1)) {
1006 if (pid==0)
1007 fail(1,__FILE__,__LINE__,"AMG: data has more rows than given problem");
1008 die(1);
1009 }
1010
1011 /* undo sorting of id_buffer */
1012 buffer_reserve(&cr->data,sizeof(struct id_data)+sizeof(uint));
1013 sarray_permute(struct id_data,id_buffer.ptr,nr, id_perm, cr->data.ptr);
1014 sarray_permute(uint, id_proc, nr, id_perm, cr->data.ptr);
1015
1016 /* read matrix data */
1017 for (m = 0; m < 3; ++m) {
1018 ulong mat_size_off[2];
1019 scan_in = (ulong)mat_size[m];
1020 comm_scan(mat_size_off, &data->comm, gs_long, gs_add, &scan_in, 1, scan_buf);
1021 dview_mpi(1+2*mat_size_off[0], fsm[m], &code);
1022 code = comm_reduce_int(&data->comm, gs_max, &code, 1);
1023 if (code != 0)
1024 die(1);
1025
1026 double *b = array_reserve(double, &read_buffer, 2*mat_size[m]);
1027 dread_mpi(b, 2*mat_size[m], fsm[m], &code);
1028 code = comm_reduce_int(&data->comm, gs_max, &code, 1);
1029 if (code != 0)
1030 die(1);
1031
1032 const struct id_data *const idp = id_buffer.ptr;
1033 struct gnz *p = array_reserve(struct gnz, &mat_buffer, mat_size[m]);
1034 uint *proc = array_reserve(uint, &mat_proc, mat_size[m]);
1035 unsigned i;
1036 for (i = 0; i < nr; ++i) {
1037 const ulong i_id = idp[i].id;
1038 const uint i_p = id_proc[i];
1039 unsigned j,rl;
1040 for (rl = row_lens[3*i+m], j=0; j < rl; ++j) {
1041 p->i = i_id;
1042 p->j = *b++;
1043 p->a = *b++;
1044 *proc++ = i_p;
1045 ++p;
1046 }
1047 }
1048 mat_buffer.n = mat_size[m];
1049 sarray_transfer_ext(struct gnz, &mat_buffer, mat_proc.ptr, sizeof(uint), cr);
1050 array_cat(struct gnz, &mat[m], mat_buffer.ptr, mat_buffer.n);
1051 sarray_sort(struct gnz, (struct gnz*)mat[m].ptr, mat[m].n, i, 1, &cr->data);
1052 }
1053
1054 /* send id_data to owner */
1055 sarray_transfer_ext(struct id_data, &id_buffer, id_proc, sizeof(uint), cr);
1056 array_cat(struct id_data,ids, id_buffer.ptr, id_buffer.n);
1057 sarray_sort(struct id_data, ids->ptr, ids->n, id, 1, &cr->data);
1058
1059 /* clean-up */
1060 array_free(&id_buffer);
1061 array_free(&mat_buffer);
1062 array_free(&read_buffer);
1063 free(row_lens);
1064 array_free(&mat_proc);
1065 dclose_mpi(fsm[2]);
1066 dclose_mpi(fsm[1]);
1067 dclose_mpi(fsm[0]);
1068 dclose_mpi(fs);
1069 find_id_free(&fid);
1070
1071 if (pid == 0)
1072 printf("done\n");
1073}
1074#endif
1075
1076
1077
1078/*==========================================================================
1079
1080 Write amgdmp_*.dat
1081
1082 The user's matrix is first assembled, then written out.
1083
1084 ==========================================================================*/
1085
1086
1087enum mat_order { row_major, col_major };
1088enum distr { row_distr, col_distr };
1089
1090#define rid_equal(a,b) ((a).p==(b).p && (a).i==(b).i)
1091
1092/* rnz is a mnemonic for remote non zero */
1093struct rnz {
1094 double v; struct rid i,j;
1095};
1096#define nz_pos_equal(a,b) \
1097 (rid_equal((a).i,(b).i) && rid_equal((a).j,(b).j))
1098
1099static void mat_sort(struct array *const mat,
1100 const enum mat_order order, buffer *const buf)
1101{
1102 switch(order) {
1103 case col_major: sarray_sort_4(struct rnz,mat->ptr,mat->n,
1104 j.p,0,j.i,0, i.p,0,i.i,0, buf); break;
1105 case row_major: sarray_sort_4(struct rnz,mat->ptr,mat->n,
1106 i.p,0,i.i,0, j.p,0,j.i,0, buf); break;
1107 }
1108}
1109
1110/* assumes matrix is already sorted */
1111static void mat_condense_sorted(struct array *const mat)
1112{
1113 struct rnz *dst,*src, *const end=(struct rnz*)mat->ptr+mat->n;
1114 if(mat->n<=1) return;
1115 for(dst=mat->ptr;;++dst) {
1116 if(dst+1==end) return;
1117 if(nz_pos_equal(dst[0],dst[1])) break;
1118 }
1119 for(src=dst+1;src!=end;++src) {
1120 if(nz_pos_equal(*dst,*src))
1121 dst->v += src->v;
1122 else
1123 *(++dst) = *src;
1124 }
1125 mat->n = (dst+1)-(struct rnz*)mat->ptr;
1126}
1127
1128static void mat_condense(
1129 struct array *const mat, const enum mat_order order, buffer *const buf)
1130{
1131 mat_sort(mat,order,buf); mat_condense_sorted(mat);
1132}
1133
1134static void mat_distribute(
1135 struct array *const mat, const enum distr d, const enum mat_order o,
1136 struct crystal *const cr)
1137{
1138 switch(d) {
1139 case row_distr: mat_condense(mat,row_major,&cr->data);
1140 sarray_transfer(struct rnz,mat, i.p,0, cr); break;
1141 case col_distr: mat_condense(mat,col_major,&cr->data);
1142 sarray_transfer(struct rnz,mat, j.p,0, cr); break;
1143 }
1144 mat_condense(mat,o,&cr->data);
1145}
1146
1147struct labelled_rid {
1148 struct rid rid; ulong id;
1149};
1150
1151static void mat_list_nonlocal_sorted(
1152 struct array *const nonlocal_id,
1153 const struct array *const mat, const enum distr d,
1154 const ulong *uid, struct crystal *const cr)
1155{
1156 const uint pid = cr->comm.id;
1157 struct rid last_rid;
1158 const struct rnz *p, *const e=(const struct rnz*)mat->ptr+mat->n;
1159 uint count; struct labelled_rid *out, *end;
1160 #define BUILD_LIST(k) do { \
1161 last_rid.p=UINT_MAX,last_rid.i=UINT_MAX; \
1162 for(count=0,p=mat->ptr;p!=e;++p) { \
1163 if(p->k.p==pid || rid_equal(last_rid,p->k)) continue; \
1164 last_rid=p->k; ++count; \
1165 } \
1166 array_init(struct labelled_rid, nonlocal_id, count); \
1167 nonlocal_id->n=count; out=nonlocal_id->ptr; \
1168 last_rid.p=UINT_MAX,last_rid.i=UINT_MAX; \
1169 for(p=mat->ptr;p!=e;++p) { \
1170 if(p->k.p==pid || rid_equal(last_rid,p->k)) continue; \
1171 (out++)->rid=last_rid=p->k; \
1172 } \
1173 } while(0)
1174 switch(d) {
1175 case row_distr: BUILD_LIST(j); break;
1176 case col_distr: BUILD_LIST(i); break;
1177 }
1178 #undef BUILD_LIST
1179 sarray_transfer(struct labelled_rid,nonlocal_id,rid.p,1,cr);
1180 for(out=nonlocal_id->ptr,end=out+nonlocal_id->n;out!=end;++out)
1181 out->id=uid[out->rid.i];
1182 sarray_transfer(struct labelled_rid,nonlocal_id,rid.p,1,cr);
1183 sarray_sort_2(struct labelled_rid,nonlocal_id->ptr,nonlocal_id->n,
1184 rid.p,0, rid.i,0, &cr->data);
1185}
1186
1187static void mat_list_nonlocal(
1188 struct array *const nonlocal_id,
1189 struct array *const mat, const enum distr d,
1190 const ulong *uid, struct crystal *const cr)
1191{
1192 switch(d) {
1193 case row_distr: mat_sort(mat,col_major,&cr->data); break;
1194 case col_distr: mat_sort(mat,row_major,&cr->data); break;
1195 }
1196 mat_list_nonlocal_sorted(nonlocal_id,mat,d,uid,cr);
1197}
1198
1199static uint dump_matrix_setdata(
1200 buffer *const buf, /* output; ok if this is one of cr's buffers */
1201 struct array *const mat, const ulong *const uid,
1202 struct crystal *const cr)
1203{
1204 const uint pid = cr->comm.id;
1205 struct array nonlocal_id;
1206 double *vi, *vj, *va; uint n;
1207 const struct rnz *nz, *enz;
1208 const struct labelled_rid *rlbl;
1209
1210 mat_distribute(mat,row_distr,col_major,cr);
1211 n = mat->n;
1212
1213 mat_list_nonlocal_sorted(&nonlocal_id,mat,row_distr,uid,cr);
1214
1215 buffer_reserve(buf,3*n*sizeof(double));
1216 vi=buf->ptr, vj=vi+n, va=vj+n;
1217 rlbl = nonlocal_id.ptr;
1218 for(nz=mat->ptr,enz=nz+n;nz!=enz;++nz) {
1219 *vi++ = uid[nz->i.i];
1220 *va++ = nz->v;
1221 if(nz->j.p==pid)
1222 *vj++ = uid[nz->j.i];
1223 else {
1224 const uint jp = nz->j.p, ji = nz->j.i;
1225 while(rlbl->rid.p<jp) ++rlbl;
1226 if(rlbl->rid.p!=jp) printf("dump_matrix: FAIL!!!\n");
1227 while(rlbl->rid.i<ji) ++rlbl;
1228 if(rlbl->rid.i!=ji) printf("dump_matrix: FAIL!!!\n");
1229 *vj++ = rlbl->id;
1230 }
1231 }
1232 array_free(&nonlocal_id);
1233 return n;
1234}
1235
1236static void dump_matrix(
1237 struct array *const mat, const ulong *const uid,
1238 struct crystal *const cr, const char *datafname)
1239{
1240 const struct comm *comm = &cr->comm;
1241 const uint pid = comm->id, np = comm->np;
1242 buffer *const buf = &cr->data;
1243 struct file fi={0,0}, fj={0,0}, fp={0,0};
1244 uint i,n;
1245 int code;
1246
1247 n = dump_matrix_setdata(buf, mat,uid,cr);
1248
1249 code = 0;
1250 if(pid==0) {
1251 char str1[132];
1252 char str2[132];
1253 char str3[132];
1254 sprintf(str1,"%s.amgdmp_i.dat",datafname);
1255 sprintf(str2,"%s.amgdmp_j.dat",datafname);
1256 sprintf(str3,"%s.amgdmp_p.dat",datafname);
1257 fi=dopen(str1,'w',&code);
1258 fj=dopen(str2,'w',&code);
1259 fp=dopen(str3,'w',&code);
1260 }
1261 comm_bcast(comm,&code,sizeof(int),0);
1262 if(code!=0) die(1);
1263
1264 for(i=0;i<np;++i) {
1265 comm_barrier(comm);
1266 if(pid!=0 && i==pid)
1267 comm_send(comm, &n,sizeof(uint), 0,i),
1268 comm_send(comm, buf->ptr,3*n*sizeof(double), 0,np+i);
1269 else if(pid==0) {
1270 double *v;
1271 printf("AMG writing data from proc %u\n",(unsigned)i),fflush(stdout);
1272 if(i!=0) {
1273 comm_recv(comm, &n,sizeof(uint), i,i);
1274 buffer_reserve(buf,3*n*sizeof(double));
1275 comm_recv(comm, buf->ptr,3*n*sizeof(double), i,np+i);
1276 }
1277 v = buf->ptr;
1278 if(code==0) {
1279 dwrite(fi, v , n,&code);
1280 dwrite(fj, v+ n, n,&code);
1281 dwrite(fp, v+2*n, n,&code);
1282 }
1283 }
1284 }
1285 if(pid==0) dclose(fi),dclose(fj),dclose(fp);
1286 comm_bcast(comm,&code,sizeof(int),0);
1287 if(code!=0) die(1);
1288 comm_barrier(comm);
1289}
1290
1291/* assumes data->comm and data->gs_top are set */
1292static void amg_dump(
1293 uint n, const ulong *id,
1294 uint nz, const uint *Ai, const uint *Aj, const double *A,
1295 struct crs_data *data, const char *datafname)
1296{
1297 struct crystal cr;
1298 struct array uid; struct rid *rid_map = tmalloc(struct rid,n);
1299
1300 struct array mat;
1301 uint k; struct rnz *out;
1302
1303 crystal_init(&cr, &data->comm);
1304 assign_dofs(&uid,rid_map, id,n,data->comm.id,data->gs_top,&cr.data);
1305
1306 array_init(struct rnz,&mat,nz);
1307 for(out=mat.ptr,k=0;k<nz;++k) {
1308 uint i=Ai[k], j=Aj[k]; double a=A[k];
1309 if(id[i]==0 || id[j]==0 || fabs(a)==0) continue;
1310 out->v = a, out->i=rid_map[i], out->j=rid_map[j];
1311 ++out;
1312 }
1313 mat.n = out-(struct rnz*)mat.ptr;
1314 free(rid_map);
1315
1316 dump_matrix(&mat,uid.ptr,&cr,datafname);
1317
1318 array_free(&uid);
1319 crystal_free(&cr);
1320}
Note: See TracBrowser for help on using the repository browser.