| 1 | #include <stddef.h>
|
|---|
| 2 | #include <stdlib.h>
|
|---|
| 3 | #include <stdio.h>
|
|---|
| 4 | #include <string.h>
|
|---|
| 5 | #include <math.h>
|
|---|
| 6 | #include <sys/stat.h>
|
|---|
| 7 | #include <limits.h>
|
|---|
| 8 | #include "gslib.h"
|
|---|
| 9 |
|
|---|
| 10 | #define crs_setup PREFIXED_NAME(crs_amg_setup)
|
|---|
| 11 | #define crs_solve PREFIXED_NAME(crs_amg_solve)
|
|---|
| 12 | #define crs_stats PREFIXED_NAME(crs_amg_stats)
|
|---|
| 13 | #define crs_free PREFIXED_NAME(crs_amg_free )
|
|---|
| 14 |
|
|---|
| 15 | #ifndef AMG_BLOCK_ROWS
|
|---|
| 16 | # define AMG_BLOCK_ROWS 2400
|
|---|
| 17 | #endif
|
|---|
| 18 |
|
|---|
| 19 | #ifndef AMG_MAX_ROWS
|
|---|
| 20 | # define AMG_MAX_ROWS 12000
|
|---|
| 21 | #endif
|
|---|
| 22 |
|
|---|
| 23 | #include "crs_amg_io.h"
|
|---|
| 24 |
|
|---|
| 25 | static double get_time(void)
|
|---|
| 26 | {
|
|---|
| 27 | #ifdef GS_TIMING
|
|---|
| 28 | return comm_time();
|
|---|
| 29 | #else
|
|---|
| 30 | return 0;
|
|---|
| 31 | #endif
|
|---|
| 32 | }
|
|---|
| 33 |
|
|---|
| 34 | static void barrier(const struct comm *c)
|
|---|
| 35 | {
|
|---|
| 36 | #ifdef GS_BARRIER
|
|---|
| 37 | comm_barrier(c);
|
|---|
| 38 | #endif
|
|---|
| 39 | }
|
|---|
| 40 |
|
|---|
| 41 | /* sparse matrix, condensed sparse row */
|
|---|
| 42 | struct csr_mat {
|
|---|
| 43 | uint rn, cn, *row_off, *col;
|
|---|
| 44 | double *a;
|
|---|
| 45 | };
|
|---|
| 46 |
|
|---|
| 47 | /* z = alpha y + beta M x */
|
|---|
| 48 | static double apply_M(
|
|---|
| 49 | double *z, const double alpha, const double *y,
|
|---|
| 50 | const double beta, const struct csr_mat *const M, const double *x)
|
|---|
| 51 | {
|
|---|
| 52 | uint i; const uint rn=M->rn;
|
|---|
| 53 | const uint *const row_off = M->row_off, *col = M->col;
|
|---|
| 54 | const double *a = M->a;
|
|---|
| 55 | const double t0 = get_time();
|
|---|
| 56 | for(i=0;i<rn;++i) {
|
|---|
| 57 | uint j; const uint je=row_off[i+1]; double t = 0;
|
|---|
| 58 | for(j=row_off[i]; j<je; ++j) t += (*a++) * x[*col++];
|
|---|
| 59 | *z++ = alpha*(*y++) + beta*t;
|
|---|
| 60 | }
|
|---|
| 61 | return get_time()-t0;
|
|---|
| 62 | }
|
|---|
| 63 |
|
|---|
| 64 | /* z = M^t x */
|
|---|
| 65 | static double apply_Mt(
|
|---|
| 66 | double *const z, const struct csr_mat *const M, const double *x)
|
|---|
| 67 | {
|
|---|
| 68 | uint i; const uint rn=M->rn,cn=M->cn;
|
|---|
| 69 | const uint *const row_off = M->row_off, *col = M->col;
|
|---|
| 70 | const double *a = M->a;
|
|---|
| 71 | const double t0 = get_time();
|
|---|
| 72 | for(i=0;i<cn;++i) z[i]=0;
|
|---|
| 73 | for(i=0;i<rn;++i) {
|
|---|
| 74 | uint j; const uint je=row_off[i+1]; const double xi = *x++;
|
|---|
| 75 | for(j=row_off[i]; j<je; ++j) z[*col++] += (*a++) * xi;
|
|---|
| 76 | }
|
|---|
| 77 | return get_time()-t0;
|
|---|
| 78 | }
|
|---|
| 79 |
|
|---|
| 80 | struct Q { uint nloc; struct gs_data *gsh; };
|
|---|
| 81 |
|
|---|
| 82 | /* ve = Q v */
|
|---|
| 83 | static double apply_Q(
|
|---|
| 84 | double *const ve, const struct Q *const Q, const double *const v,
|
|---|
| 85 | const struct comm *comm)
|
|---|
| 86 | {
|
|---|
| 87 | double t0;
|
|---|
| 88 | memcpy(ve,v,Q->nloc*sizeof(double));
|
|---|
| 89 | barrier(comm); t0=get_time();
|
|---|
| 90 | gs(ve,gs_double,gs_add,0,Q->gsh,0);
|
|---|
| 91 | return get_time()-t0;
|
|---|
| 92 | }
|
|---|
| 93 |
|
|---|
| 94 | /* z := alpha y + beta Q^t x
|
|---|
| 95 | (x := Q Q^t x as a side effect)
|
|---|
| 96 | */
|
|---|
| 97 | static double apply_Qt(
|
|---|
| 98 | double *z, const double alpha, const double *y,
|
|---|
| 99 | const double beta, const struct Q *Q, double *x,
|
|---|
| 100 | const struct comm *comm)
|
|---|
| 101 | {
|
|---|
| 102 | double t0,t1;
|
|---|
| 103 | uint i; const uint nloc=Q->nloc;
|
|---|
| 104 | barrier(comm); t0=get_time();
|
|---|
| 105 | gs(x,gs_double,gs_add,1,Q->gsh,0);
|
|---|
| 106 | t1 = get_time() - t0;
|
|---|
| 107 | for(i=0;i<nloc;++i) *z++ = alpha*(*y++) + beta*(*x++);
|
|---|
| 108 | return t1;
|
|---|
| 109 | }
|
|---|
| 110 |
|
|---|
| 111 | struct crs_data {
|
|---|
| 112 | struct comm comm;
|
|---|
| 113 | struct gs_data *gs_top;
|
|---|
| 114 | uint un, *umap; /* number of unique id's on this proc, map to user ids */
|
|---|
| 115 | double tni; /* 1 / (total number of unique ids) ... for computing mean */
|
|---|
| 116 | int null_space;
|
|---|
| 117 | unsigned levels;
|
|---|
| 118 | unsigned *cheb_m; /* cheb_m [levels-1] : smoothing steps */
|
|---|
| 119 | double *cheb_rho; /* cheb_rho[levels-1] : spectral radius of smoother */
|
|---|
| 120 | uint *lvl_offset;
|
|---|
| 121 | double *Dff; /* diagonal smoother, F-relaxation */
|
|---|
| 122 | struct Q *Q_W, *Q_AfP, *Q_Aff;
|
|---|
| 123 | struct csr_mat *W, *AfP, *Aff;
|
|---|
| 124 | double *b, *x, *c, *c_old, *r, *buf;
|
|---|
| 125 | double *timing; uint timing_n;
|
|---|
| 126 | };
|
|---|
| 127 |
|
|---|
| 128 | static void amg_exec(struct crs_data *const amg)
|
|---|
| 129 | {
|
|---|
| 130 | unsigned lvl; const unsigned levels=amg->levels;
|
|---|
| 131 | const uint *const off = amg->lvl_offset;
|
|---|
| 132 | double *const b = amg->b, *const x = amg->x;
|
|---|
| 133 | double *c = amg->c, *c_old = amg->c_old, *r = amg->r;
|
|---|
| 134 | double *timing = amg->timing;
|
|---|
| 135 | /* restrict down all levels */
|
|---|
| 136 | for(lvl=0;lvl<levels-1;++lvl,timing+=6) {
|
|---|
| 137 | double *const b_l = b+off[lvl], *const b_lp1 = b+off[lvl+1];
|
|---|
| 138 | /* b_{l+1} += W^t b_l */
|
|---|
| 139 | timing[1]+=apply_Mt(amg->buf, &amg->W[lvl],b_l);
|
|---|
| 140 | timing[0]+=apply_Qt(b_lp1, 1,b_lp1, 1,&amg->Q_W[lvl],amg->buf, &amg->comm);
|
|---|
| 141 | }
|
|---|
| 142 | /* solve bottom equation (1 dof) */
|
|---|
| 143 | { const uint i=off[levels-1];
|
|---|
| 144 | if(off[levels]-i) x[i]=amg->Dff[i]*b[i]; }
|
|---|
| 145 | for(lvl=levels-1;lvl--;) {
|
|---|
| 146 | double *const b_l = b+off[lvl];
|
|---|
| 147 | double *const x_l = x+off[lvl], *const x_lp1 = x+off[lvl+1];
|
|---|
| 148 | const double *const d_l = amg->Dff+off[lvl];
|
|---|
| 149 | const uint n = off[lvl+1]-off[lvl]; uint i;
|
|---|
| 150 | const unsigned m = amg->cheb_m[lvl]; unsigned ci;
|
|---|
| 151 | double alpha, beta, gamma;
|
|---|
| 152 | timing = amg->timing + lvl*6;
|
|---|
| 153 | /* buf = Q x_{l+1} */
|
|---|
| 154 | timing[2]+=apply_Q(amg->buf, &amg->Q_AfP[lvl],x_lp1, &amg->comm);
|
|---|
| 155 | /* x_l = W x_{l+1} */
|
|---|
| 156 | timing[3]+=apply_M(x_l, 0,b_l, 1,&amg->W [lvl], amg->buf);
|
|---|
| 157 | /* b_l -= AfP x_{l+1} */
|
|---|
| 158 | timing[3]+=apply_M(b_l, 1,b_l, -1,&amg->AfP[lvl], amg->buf);
|
|---|
| 159 | /* c_1 = Dff b_l */
|
|---|
| 160 | for(i=0;i<n;++i) c[i]=d_l[i]*b_l[i];
|
|---|
| 161 | if(m>1) {
|
|---|
| 162 | alpha = amg->cheb_rho[lvl]/2, alpha*=alpha;
|
|---|
| 163 | gamma = 2*alpha/(1-2*alpha), beta = 1 + gamma;
|
|---|
| 164 | /* r_1 = b_l - Aff c_1 */
|
|---|
| 165 | timing[4]+=apply_Q(amg->buf, &amg->Q_Aff[lvl],c, &amg->comm);
|
|---|
| 166 | timing[5]+=apply_M(r, 1,b_l, -1,&amg->Aff[lvl],amg->buf);
|
|---|
| 167 | /* c_2 = (1+gamma)(c_1 + Dff r_1) */
|
|---|
| 168 | { double *const temp = c; c=c_old,c_old=temp; }
|
|---|
| 169 | for(i=0;i<n;++i) c[i] = beta*(c_old[i]+d_l[i]*r[i]);
|
|---|
| 170 | }
|
|---|
| 171 | for(ci=3;ci<=m;++ci) {
|
|---|
| 172 | gamma = alpha*beta, gamma = gamma/(1-gamma), beta = 1 + gamma;
|
|---|
| 173 | /* r_i = b_l - Aff c_i */
|
|---|
| 174 | timing[4]+=apply_Q(amg->buf, &amg->Q_Aff[lvl],c, &amg->comm);
|
|---|
| 175 | timing[5]+=apply_M(r, 1,b_l, -1,&amg->Aff[lvl],amg->buf);
|
|---|
| 176 | /* c_{i+1} = (1+gamma)*(c_i+D r_i) - gamma c_{i-1} */
|
|---|
| 177 | { double *const temp = c; c=c_old,c_old=temp; }
|
|---|
| 178 | for(i=0;i<n;++i) c[i] = beta*(c_old[i]+d_l[i]*r[i]) - gamma*c[i];
|
|---|
| 179 | }
|
|---|
| 180 | for(i=0;i<n;++i) x_l[i]+=c[i];
|
|---|
| 181 | }
|
|---|
| 182 | amg->timing_n++;
|
|---|
| 183 | }
|
|---|
| 184 |
|
|---|
| 185 | void crs_solve(double *x, struct crs_data *data, double *b)
|
|---|
| 186 | {
|
|---|
| 187 | uint i; const uint un = data->un; const uint *const umap = data->umap;
|
|---|
| 188 | double *const ub = data->b, *const ux = data->x;
|
|---|
| 189 |
|
|---|
| 190 | gs(b, gs_double,gs_add, 1, data->gs_top, 0);
|
|---|
| 191 | for(i=0;i<un;++i) ub[i]=b[umap[i]];
|
|---|
| 192 |
|
|---|
| 193 | amg_exec(data);
|
|---|
| 194 |
|
|---|
| 195 | if(data->null_space) {
|
|---|
| 196 | const double avg = data->tni*comm_reduce_double(&data->comm, gs_add, ux,un);
|
|---|
| 197 | for(i=0;i<un;++i) ux[i] -= avg;
|
|---|
| 198 | }
|
|---|
| 199 |
|
|---|
| 200 | for(i=0;i<un;++i) x[umap[i]]=ux[i];
|
|---|
| 201 | gs(x, gs_double,gs_add, 0, data->gs_top, 0);
|
|---|
| 202 | }
|
|---|
| 203 |
|
|---|
| 204 | void crs_stats(const struct crs_data *const data)
|
|---|
| 205 | {
|
|---|
| 206 | const unsigned lm1 = data->levels-1;
|
|---|
| 207 | double *avg = tmalloc(double, 2*6*lm1);
|
|---|
| 208 | double ni = 1/((double)data->timing_n * data->comm.np);
|
|---|
| 209 | uint i;
|
|---|
| 210 | for(i=0;i<6*lm1;++i) avg[i] = ni*data->timing[i];
|
|---|
| 211 | comm_allreduce(&data->comm,gs_double,gs_add, avg,6*lm1, avg+6*lm1);
|
|---|
| 212 | if(data->comm.id==0) {
|
|---|
| 213 | double *t = avg; unsigned lvl;
|
|---|
| 214 | printf("AMG stats:\n");
|
|---|
| 215 | for(lvl=0;lvl<lm1;++lvl,t+=6)
|
|---|
| 216 | printf(" [lvl %02u] comm: Wt=%0.3e W,AfP=%0.3e Aff=%0.3e\n"
|
|---|
| 217 | " [lvl %02u] work: Wt=%0.3e W,AfP=%0.3e Aff=%0.3e\n",
|
|---|
| 218 | lvl, t[0],t[2],t[4], lvl, t[1],t[3],t[5]);
|
|---|
| 219 | }
|
|---|
| 220 | free(avg);
|
|---|
| 221 | }
|
|---|
| 222 |
|
|---|
| 223 | /*==========================================================================
|
|---|
| 224 |
|
|---|
| 225 | Setup
|
|---|
| 226 |
|
|---|
| 227 | ==========================================================================*/
|
|---|
| 228 |
|
|---|
| 229 |
|
|---|
| 230 | /* remote id - designates the i-th uknown owned by proc p */
|
|---|
| 231 | struct rid { uint p,i; };
|
|---|
| 232 |
|
|---|
| 233 | /* the data for each id to be read from amg.dat */
|
|---|
| 234 | struct id_data {
|
|---|
| 235 | double D;
|
|---|
| 236 | ulong id;
|
|---|
| 237 | unsigned level;
|
|---|
| 238 | };
|
|---|
| 239 |
|
|---|
| 240 | /* global non-zero (matrix entry) */
|
|---|
| 241 | struct gnz { ulong i,j; double a; };
|
|---|
| 242 |
|
|---|
| 243 | /*
|
|---|
| 244 | The user ids id[n] are assigned to unique procs.
|
|---|
| 245 |
|
|---|
| 246 | Output
|
|---|
| 247 | uid --- ulong array; uid[0 ... uid.n-1] are the ids owned by this proc
|
|---|
| 248 | rid_map --- id[i] is uid[ rid_map[i].i ] on proc rid_map[i].p
|
|---|
| 249 | map = assign_dofs(...) --- uid[i] is id[map[i]]
|
|---|
| 250 |
|
|---|
| 251 | map is returned only when rid_map is null, and vice versa
|
|---|
| 252 | */
|
|---|
| 253 | static uint *assign_dofs(struct array *const uid,
|
|---|
| 254 | struct rid *const rid_map,
|
|---|
| 255 | const ulong *const id, const uint n,
|
|---|
| 256 | const uint p,
|
|---|
| 257 | struct gs_data *gs_top, buffer *const buf)
|
|---|
| 258 | {
|
|---|
| 259 | uint i,count, *map=0;
|
|---|
| 260 | struct rid *const rid = rid_map?rid_map:tmalloc(struct rid,n);
|
|---|
| 261 | for(i=0;i<n;++i) rid[i].p=p, rid[i].i=i;
|
|---|
| 262 | gs_vec(n?&rid[0].p:0,2, gs_sint,gs_add,0, gs_top,buf);
|
|---|
| 263 | for(count=0,i=0;i<n;++i) {
|
|---|
| 264 | if(rid[i].i==i && id[i]!=0 && rid[i].p==p) ++count; }
|
|---|
| 265 | array_init(ulong,uid,count); uid->n=count;
|
|---|
| 266 | if(rid_map==0) {
|
|---|
| 267 | map=tmalloc(uint,count);
|
|---|
| 268 | for(count=0,i=0;i<n;++i) {
|
|---|
| 269 | if(rid[i].i==i && id[i]!=0 && rid[i].p==p) map[count++]=i; }
|
|---|
| 270 | }
|
|---|
| 271 | { ulong *const uid_p = uid->ptr;
|
|---|
| 272 | for(count=0,i=0;i<n;++i) if(rid[i].i==i && id[i]!=0 && rid[i].p==p)
|
|---|
| 273 | uid_p[count]=id[i], rid[i].i=count, ++count;
|
|---|
| 274 | }
|
|---|
| 275 | if(rid_map) gs_vec(n?&rid[0].p:0,2, gs_sint,gs_add,0, gs_top,buf);
|
|---|
| 276 | else free(rid);
|
|---|
| 277 | return map;
|
|---|
| 278 | }
|
|---|
| 279 |
|
|---|
| 280 | static void localize_rows(struct gnz *p, uint nz,
|
|---|
| 281 | const ulong *const uid, const uint *const id_perm)
|
|---|
| 282 | {
|
|---|
| 283 | uint i=0; while(nz--) {
|
|---|
| 284 | const ulong id=p->i;
|
|---|
| 285 | while(uid[i]<id) ++i;
|
|---|
| 286 | (p++)->i=id_perm[i];
|
|---|
| 287 | }
|
|---|
| 288 | }
|
|---|
| 289 |
|
|---|
| 290 | static void find_mat_offs(uint *const off, const int levels,
|
|---|
| 291 | const struct gnz *const p, const uint nz,
|
|---|
| 292 | const struct id_data *const id)
|
|---|
| 293 | {
|
|---|
| 294 | int lvl=-1; uint i; for(i=0;i<nz;++i) {
|
|---|
| 295 | const int lvl_i = id[p[i].i].level;
|
|---|
| 296 | while(lvl<lvl_i) off[++lvl]=i;
|
|---|
| 297 | }
|
|---|
| 298 | while(lvl<levels-1) off[++lvl]=i;
|
|---|
| 299 | }
|
|---|
| 300 |
|
|---|
| 301 | static void compress_mat(
|
|---|
| 302 | uint *const row_off, const int rn, uint *const col, double *const a,
|
|---|
| 303 | const struct gnz *const p, const uint nz)
|
|---|
| 304 | {
|
|---|
| 305 | int r = -1; uint k;
|
|---|
| 306 | for(r=-1, k=0;k<nz;++k) {
|
|---|
| 307 | const int row = p[k].i;
|
|---|
| 308 | while(r<row) row_off[++r]=k;
|
|---|
| 309 | col[k] = p[k].j;
|
|---|
| 310 | a[k] = p[k].a;
|
|---|
| 311 | }
|
|---|
| 312 | while(r<rn) row_off[++r]=k;
|
|---|
| 313 | }
|
|---|
| 314 |
|
|---|
| 315 | static void amg_setup_mat(
|
|---|
| 316 | struct array *ids, const uint nloc, struct csr_mat *const mat,
|
|---|
| 317 | const ulong *uid, const uint uid_n, const uint *id_perm,
|
|---|
| 318 | const uint i0, const uint j0, struct gnz *const p, const uint nz,
|
|---|
| 319 | buffer *buf)
|
|---|
| 320 | {
|
|---|
| 321 | slong *id = ids->ptr; ulong last_id = ULONG_MAX;
|
|---|
| 322 | const uint ne = ids->n;
|
|---|
| 323 | uint k,i=0,ie=nloc;
|
|---|
| 324 | sarray_sort(struct gnz,p,nz, j,1, buf); /* sort by col */
|
|---|
| 325 | for(k=0;k<nz;++k) {
|
|---|
| 326 | const ulong j_id = p[k].j;
|
|---|
| 327 | p[k].i = p[k].i - i0;
|
|---|
| 328 | if(j_id == last_id) { p[k].j = p[k-1].j; continue; }
|
|---|
| 329 | last_id = j_id;
|
|---|
| 330 | /* check local ids */
|
|---|
| 331 | while(i<uid_n && uid[i]<j_id) ++i;
|
|---|
| 332 | if(i<uid_n && uid[i]==j_id) { p[k].j=id_perm[i]-j0; continue; }
|
|---|
| 333 | /* check old group of remote ids */
|
|---|
| 334 | while(ie<ne && (ulong)(-id[ie])<j_id) ++ie;
|
|---|
| 335 | if(ie<ne && (ulong)(-id[ie])==j_id) { p[k].j=ie; continue; }
|
|---|
| 336 | /* not found, add to new group of remote ids */
|
|---|
| 337 | id = array_reserve(slong, ids, ids->n+1);
|
|---|
| 338 | id[ids->n] = -(slong)j_id, p[k].j = ids->n++;
|
|---|
| 339 | }
|
|---|
| 340 | sarray_sort_2(struct gnz,p,nz, i,1, j,1, buf);
|
|---|
| 341 |
|
|---|
| 342 | mat->cn = ids->n;
|
|---|
| 343 | compress_mat(mat->row_off,mat->rn, mat->col,mat->a, p,nz);
|
|---|
| 344 | }
|
|---|
| 345 |
|
|---|
| 346 | static uint amg_setup_mats(
|
|---|
| 347 | struct crs_data *const data,
|
|---|
| 348 | const ulong *const uid, const uint uid_n,
|
|---|
| 349 | const uint *const id_perm,
|
|---|
| 350 | const struct id_data *const id,
|
|---|
| 351 | struct array *mat,
|
|---|
| 352 | buffer *buf)
|
|---|
| 353 | {
|
|---|
| 354 | const uint *const off = data->lvl_offset;
|
|---|
| 355 | struct array ide = null_array; uint max_e=0;
|
|---|
| 356 | const unsigned levels=data->levels;
|
|---|
| 357 | unsigned lvl, m;
|
|---|
| 358 | uint *mat_off[3]; struct csr_mat *csr_mat[3];
|
|---|
| 359 | data->Q_W = tmalloc(struct Q, (levels-1)*3);
|
|---|
| 360 | data->Q_AfP = data->Q_W + (levels-1);
|
|---|
| 361 | data->Q_Aff = data->Q_AfP + (levels-1);
|
|---|
| 362 | csr_mat[0] = data->W = tmalloc(struct csr_mat, (levels-1)*3);
|
|---|
| 363 | csr_mat[1] = data->AfP = data->W + (levels-1);
|
|---|
| 364 | csr_mat[2] = data->Aff = data->AfP + (levels-1);
|
|---|
| 365 | mat_off[0] = tmalloc(uint, levels*3);
|
|---|
| 366 | mat_off[1] = mat_off[0]+levels;
|
|---|
| 367 | mat_off[2] = mat_off[1]+levels;
|
|---|
| 368 | for(m=0;m<3;++m) {
|
|---|
| 369 | /* change row from uid to local index */
|
|---|
| 370 | localize_rows(mat[m].ptr,mat[m].n, uid,id_perm);
|
|---|
| 371 | /* sort by row */
|
|---|
| 372 | sarray_sort(struct gnz,mat[m].ptr,mat[m].n, i,1, buf);
|
|---|
| 373 | /* find offsets of each level */
|
|---|
| 374 | find_mat_offs(mat_off[m],levels, mat[m].ptr,mat[m].n, id);
|
|---|
| 375 | }
|
|---|
| 376 | /* allocate CSR arrays */
|
|---|
| 377 | if(levels>1) {
|
|---|
| 378 | uint *ui = tmalloc(uint, 3*((off[levels-1]-off[0])+(levels-1))
|
|---|
| 379 | +mat[0].n+mat[1].n+mat[2].n);
|
|---|
| 380 | double *a = tmalloc(double, mat[0].n+mat[1].n+mat[2].n);
|
|---|
| 381 | for(m=0;m<3;++m) for(lvl=0;lvl<levels-1;++lvl) {
|
|---|
| 382 | const uint rn = off[lvl+1]-off[lvl];
|
|---|
| 383 | const uint nz = mat_off[m][lvl+1]-mat_off[m][lvl];
|
|---|
| 384 | csr_mat[m][lvl].rn = rn;
|
|---|
| 385 | csr_mat[m][lvl].row_off = ui, ui += rn+1;
|
|---|
| 386 | csr_mat[m][lvl].col = ui, ui += nz;
|
|---|
| 387 | csr_mat[m][lvl].a = a, a += nz;
|
|---|
| 388 | }
|
|---|
| 389 | }
|
|---|
| 390 | for(lvl=0;lvl<levels-1;++lvl) {
|
|---|
| 391 | uint i; const uint j0=off[lvl+1], nloc = off[levels]-j0;
|
|---|
| 392 | slong *p = array_reserve(slong, &ide, nloc);
|
|---|
| 393 | for(i=0;i<nloc;++i) p[i] = id[i+j0].id;
|
|---|
| 394 | data->Q_W[lvl].nloc = data->Q_AfP[lvl].nloc = ide.n = nloc;
|
|---|
| 395 | amg_setup_mat(&ide,nloc, &data->W[lvl],
|
|---|
| 396 | uid, uid_n, id_perm, off[lvl],j0,
|
|---|
| 397 | (struct gnz*)mat[0].ptr + mat_off[0][lvl],
|
|---|
| 398 | mat_off[0][lvl+1]-mat_off[0][lvl], buf);
|
|---|
| 399 | data->Q_W[lvl].gsh = gs_setup(ide.ptr,ide.n, &data->comm, 0,gs_auto,1);
|
|---|
| 400 | amg_setup_mat(&ide,nloc, &data->AfP[lvl],
|
|---|
| 401 | uid, uid_n, id_perm, off[lvl],j0,
|
|---|
| 402 | (struct gnz*)mat[1].ptr + mat_off[1][lvl],
|
|---|
| 403 | mat_off[1][lvl+1]-mat_off[1][lvl], buf);
|
|---|
| 404 | data->Q_AfP[lvl].gsh = gs_setup(ide.ptr,ide.n, &data->comm, 0,gs_auto,1);
|
|---|
| 405 | if(ide.n>max_e) max_e=ide.n;
|
|---|
| 406 | }
|
|---|
| 407 | for(lvl=0;lvl<levels-1;++lvl) {
|
|---|
| 408 | uint i; const uint j0=off[lvl], nloc = off[lvl+1]-j0;
|
|---|
| 409 | slong *p = array_reserve(slong, &ide, nloc);
|
|---|
| 410 | for(i=0;i<nloc;++i) p[i] = id[i+j0].id;
|
|---|
| 411 | data->Q_Aff[lvl].nloc = ide.n = nloc;
|
|---|
| 412 | amg_setup_mat(&ide,nloc, &data->Aff[lvl],
|
|---|
| 413 | uid, uid_n, id_perm, j0,j0,
|
|---|
| 414 | (struct gnz*)mat[2].ptr + mat_off[2][lvl],
|
|---|
| 415 | mat_off[2][lvl+1]-mat_off[2][lvl], buf);
|
|---|
| 416 | data->Q_Aff[lvl].gsh = gs_setup(ide.ptr,ide.n, &data->comm, 0,gs_auto,1);
|
|---|
| 417 | if(ide.n>max_e) max_e=ide.n;
|
|---|
| 418 | }
|
|---|
| 419 | free(mat_off[0]);
|
|---|
| 420 | array_free(&ide);
|
|---|
| 421 | return max_e;
|
|---|
| 422 | }
|
|---|
| 423 |
|
|---|
| 424 | static uint *compute_offsets(
|
|---|
| 425 | const struct id_data *const id, const uint n,
|
|---|
| 426 | const int levels)
|
|---|
| 427 | {
|
|---|
| 428 | uint i, *off = tmalloc(uint, levels+1);
|
|---|
| 429 | int lvl = -1;
|
|---|
| 430 | for(i=0;i<n;++i) {
|
|---|
| 431 | const int lvl_i = id[i].level;
|
|---|
| 432 | while(lvl<lvl_i) off[++lvl]=i;
|
|---|
| 433 | }
|
|---|
| 434 | while(lvl<levels) off[++lvl]=i;
|
|---|
| 435 | return off;
|
|---|
| 436 | }
|
|---|
| 437 |
|
|---|
| 438 | static void read_data(
|
|---|
| 439 | struct crs_data *const data,
|
|---|
| 440 | struct array *ids, struct array mats[3],
|
|---|
| 441 | struct crystal *const cr,
|
|---|
| 442 | const ulong *uid, const uint uid_n, const char *datafname);
|
|---|
| 443 |
|
|---|
| 444 | static void read_data_mpiio(
|
|---|
| 445 | struct crs_data *const data,
|
|---|
| 446 | struct array *ids, struct array mats[3],
|
|---|
| 447 | struct crystal *const cr,
|
|---|
| 448 | const ulong *uid, const uint uid_n, const char *datafname);
|
|---|
| 449 |
|
|---|
| 450 | static void amg_setup_aux(struct crs_data *data, uint n, const ulong *id, const char *datafname)
|
|---|
| 451 | {
|
|---|
| 452 | struct crystal cr;
|
|---|
| 453 | struct array uid; uint *id_perm;
|
|---|
| 454 | struct array ids, mat[3];
|
|---|
| 455 | uint max_e; ulong temp_long;
|
|---|
| 456 |
|
|---|
| 457 | crystal_init(&cr, &data->comm);
|
|---|
| 458 | data->umap = assign_dofs(&uid,0, id,n,data->comm.id,data->gs_top,&cr.data);
|
|---|
| 459 | data->un = uid.n;
|
|---|
| 460 |
|
|---|
| 461 | sortp_long(&cr.data,0, uid.ptr,uid.n,sizeof(ulong));
|
|---|
| 462 | sarray_permute(ulong,uid.ptr ,uid.n, cr.data.ptr, &temp_long);
|
|---|
| 463 | sarray_permute(uint ,data->umap,uid.n, cr.data.ptr, &max_e);
|
|---|
| 464 |
|
|---|
| 465 | #ifdef USEMPIIO
|
|---|
| 466 | read_data_mpiio(data, &ids, mat, &cr, uid.ptr,uid.n, datafname);
|
|---|
| 467 | #else
|
|---|
| 468 | read_data(data, &ids, mat, &cr, uid.ptr,uid.n, datafname);
|
|---|
| 469 | #endif
|
|---|
| 470 |
|
|---|
| 471 | /* we should have data for every uid;
|
|---|
| 472 | if not, then the data is for a smaller problem than we were given */
|
|---|
| 473 | { int not_happy = ids.n==uid.n ? 0 : 1;
|
|---|
| 474 | if(comm_reduce_int(&data->comm, gs_max, ¬_happy,1)) {
|
|---|
| 475 | comm_barrier(&data->comm);
|
|---|
| 476 | if(data->comm.id==0)
|
|---|
| 477 | fail(1,__FILE__,__LINE__,"AMG: missing data for some rows");
|
|---|
| 478 | else die(1);
|
|---|
| 479 | }
|
|---|
| 480 | }
|
|---|
| 481 |
|
|---|
| 482 | sarray_sort(struct id_data,ids.ptr,ids.n, id,1, &cr.data);
|
|---|
| 483 | sarray_sort(struct id_data,ids.ptr,ids.n, level,0, &cr.data);
|
|---|
| 484 | sarray_permute(uint,data->umap,ids.n, cr.data.ptr, &max_e);
|
|---|
| 485 | id_perm = tmalloc(uint, uid.n);
|
|---|
| 486 | sarray_perm_invert(id_perm, cr.data.ptr, uid.n);
|
|---|
| 487 | /* the global id uid[i] will have local index id_perm[i]
|
|---|
| 488 | (the local storage is sorted by level) */
|
|---|
| 489 |
|
|---|
| 490 | data->lvl_offset = compute_offsets(ids.ptr,ids.n, data->levels);
|
|---|
| 491 |
|
|---|
| 492 | max_e = amg_setup_mats(data,uid.ptr,uid.n,id_perm,ids.ptr,mat, &cr.data);
|
|---|
| 493 |
|
|---|
| 494 | { const unsigned levels=data->levels;
|
|---|
| 495 | const uint *const off = data->lvl_offset;
|
|---|
| 496 | struct id_data *const id = ids.ptr; const uint n = ids.n;
|
|---|
| 497 | double *d;
|
|---|
| 498 | uint i,max_f=0;
|
|---|
| 499 | for(i=0;i<levels-1;++i) {
|
|---|
| 500 | const uint nf = off[i+1]-off[i];
|
|---|
| 501 | if(nf>max_f) max_f=nf;
|
|---|
| 502 | }
|
|---|
| 503 | d = data->Dff = tmalloc(double, 3*n + 3*max_f + max_e + 6*(levels-1));
|
|---|
| 504 | data->x = data->Dff + n;
|
|---|
| 505 | data->b = data->x + n;
|
|---|
| 506 | data->c = data->b + n;
|
|---|
| 507 | data->c_old = data->c + max_f;
|
|---|
| 508 | data->r = data->c_old + max_f;
|
|---|
| 509 | data->buf = data->r + max_f;
|
|---|
| 510 | data->timing = data->buf + max_e;
|
|---|
| 511 | for(i=0;i<n;++i) d[i]=id[i].D;
|
|---|
| 512 | for(i=0;i<6*(levels-1);++i) data->timing[i]=0;
|
|---|
| 513 | data->timing_n=0;
|
|---|
| 514 | }
|
|---|
| 515 |
|
|---|
| 516 | free(id_perm);
|
|---|
| 517 | array_free(&ids);
|
|---|
| 518 | array_free(&mat[0]);
|
|---|
| 519 | array_free(&mat[1]);
|
|---|
| 520 | array_free(&mat[2]);
|
|---|
| 521 | array_free(&uid);
|
|---|
| 522 | crystal_free(&cr);
|
|---|
| 523 | }
|
|---|
| 524 |
|
|---|
| 525 | static void amg_dump(
|
|---|
| 526 | uint n, const ulong *id,
|
|---|
| 527 | uint nz, const uint *Ai, const uint *Aj, const double *A,
|
|---|
| 528 | struct crs_data *data, const char *datafname);
|
|---|
| 529 |
|
|---|
| 530 | struct crs_data *crs_setup(
|
|---|
| 531 | uint n, const ulong *id,
|
|---|
| 532 | uint nz, const uint *Ai, const uint *Aj, const double *A,
|
|---|
| 533 | uint null_space, const struct comm *comm, const char *datafname, uint *ierr)
|
|---|
| 534 | {
|
|---|
| 535 | struct crs_data *data = tmalloc(struct crs_data,1);
|
|---|
| 536 | struct stat info;
|
|---|
| 537 | int dump;
|
|---|
| 538 |
|
|---|
| 539 | comm_dup(&data->comm,comm);
|
|---|
| 540 |
|
|---|
| 541 | dump = 0;
|
|---|
| 542 | if(data->comm.id==0) {
|
|---|
| 543 | char str1[132];
|
|---|
| 544 | char str2[132];
|
|---|
| 545 | char str3[132];
|
|---|
| 546 | char str4[132];
|
|---|
| 547 | sprintf(str1,"%s.amg.dat",datafname);
|
|---|
| 548 | sprintf(str2,"%s.amg_W.dat",datafname);
|
|---|
| 549 | sprintf(str3,"%s.amg_AfP.dat",datafname);
|
|---|
| 550 | sprintf(str4,"%s.amg_Aff.dat",datafname);
|
|---|
| 551 | if(stat(str1,&info)!=0) dump++;
|
|---|
| 552 | if(stat(str2,&info)!=0) dump++;
|
|---|
| 553 | if(stat(str3,&info)!=0) dump++;
|
|---|
| 554 | if(stat(str4,&info)!=0) dump++;
|
|---|
| 555 | if(dump!=0) printf("AMG files not found, dumping data ...\n"), fflush(stdout);
|
|---|
| 556 | }
|
|---|
| 557 | comm_bcast(&data->comm,&dump,sizeof(int),0);
|
|---|
| 558 |
|
|---|
| 559 | data->gs_top = gs_setup((const slong*)id,n, &data->comm, 1,
|
|---|
| 560 | dump?gs_crystal_router:gs_auto, !dump);
|
|---|
| 561 |
|
|---|
| 562 | if(dump) {
|
|---|
| 563 | amg_dump(n,id,nz,Ai,Aj,A,data,datafname);
|
|---|
| 564 | gs_free(data->gs_top);
|
|---|
| 565 |
|
|---|
| 566 | if(data->comm.id==0) {
|
|---|
| 567 | printf("AMG dump successful. Run AMG setup tool to continue.\n");
|
|---|
| 568 | fflush(stdout);
|
|---|
| 569 | }
|
|---|
| 570 | comm_barrier(&data->comm);
|
|---|
| 571 | comm_free(&data->comm);
|
|---|
| 572 | free(data);
|
|---|
| 573 | *ierr=1;
|
|---|
| 574 | } else {
|
|---|
| 575 | data->null_space = null_space;
|
|---|
| 576 | amg_setup_aux(data, n,id, datafname);
|
|---|
| 577 | *ierr=0;
|
|---|
| 578 | }
|
|---|
| 579 | return data;
|
|---|
| 580 | }
|
|---|
| 581 |
|
|---|
| 582 | void crs_free(struct crs_data *data)
|
|---|
| 583 | {
|
|---|
| 584 | const unsigned levels = data->levels;
|
|---|
| 585 |
|
|---|
| 586 | free(data->Dff);
|
|---|
| 587 |
|
|---|
| 588 | if(levels>1) {
|
|---|
| 589 | unsigned lvl;
|
|---|
| 590 | for(lvl=0;lvl<levels-1;++lvl)
|
|---|
| 591 | gs_free(data->Q_Aff[lvl].gsh),
|
|---|
| 592 | gs_free(data->Q_AfP[lvl].gsh),
|
|---|
| 593 | gs_free(data->Q_W[lvl].gsh);
|
|---|
| 594 | free(data->W[0].a);
|
|---|
| 595 | free(data->W[0].row_off);
|
|---|
| 596 | }
|
|---|
| 597 |
|
|---|
| 598 | free(data->W);
|
|---|
| 599 | free(data->Q_W);
|
|---|
| 600 |
|
|---|
| 601 | free(data->lvl_offset);
|
|---|
| 602 |
|
|---|
| 603 | free(data->cheb_m);
|
|---|
| 604 | free(data->cheb_rho);
|
|---|
| 605 |
|
|---|
| 606 | free(data->umap);
|
|---|
| 607 | gs_free(data->gs_top);
|
|---|
| 608 | comm_free(&data->comm);
|
|---|
| 609 |
|
|---|
| 610 | free(data);
|
|---|
| 611 | }
|
|---|
| 612 |
|
|---|
| 613 | /*==========================================================================
|
|---|
| 614 |
|
|---|
| 615 | Find ID
|
|---|
| 616 |
|
|---|
| 617 | As proc 0 reads in the files, it needs to know where to send the data.
|
|---|
| 618 | The find_id function below takes a sorted list of id's (with no repeats)
|
|---|
| 619 | and outputs the corresponding list of owning procs.
|
|---|
| 620 |
|
|---|
| 621 | ==========================================================================*/
|
|---|
| 622 |
|
|---|
| 623 | struct find_id_map { ulong id; uint p; };
|
|---|
| 624 |
|
|---|
| 625 | struct find_id_data {
|
|---|
| 626 | struct array map;
|
|---|
| 627 | struct array work;
|
|---|
| 628 | struct crystal *cr;
|
|---|
| 629 | };
|
|---|
| 630 |
|
|---|
| 631 | static void find_id_setup(
|
|---|
| 632 | struct find_id_data *const data,
|
|---|
| 633 | const ulong *id, uint n,
|
|---|
| 634 | struct crystal *const cr)
|
|---|
| 635 | {
|
|---|
| 636 | const uint np = cr->comm.np;
|
|---|
| 637 | uint i; struct find_id_map *q;
|
|---|
| 638 |
|
|---|
| 639 | data->cr = cr;
|
|---|
| 640 | data->work.ptr=0, data->work.n=0, data->work.max=0;
|
|---|
| 641 | array_init(struct find_id_map, &data->map, n);
|
|---|
| 642 | data->map.n=n;
|
|---|
| 643 | for(q=data->map.ptr,i=0;i<n;++i)
|
|---|
| 644 | q[i].id = id[i], q[i].p = id[i] % np;
|
|---|
| 645 | sarray_transfer(struct find_id_map,&data->map,p,1,cr);
|
|---|
| 646 | sarray_sort(struct find_id_map,data->map.ptr,data->map.n, id,1, &cr->data);
|
|---|
| 647 | }
|
|---|
| 648 |
|
|---|
| 649 | static void find_id_free(struct find_id_data *const data)
|
|---|
| 650 | {
|
|---|
| 651 | array_free(&data->map);
|
|---|
| 652 | array_free(&data->work);
|
|---|
| 653 | }
|
|---|
| 654 |
|
|---|
| 655 | struct find_id_work { ulong id; uint p; uint wp; };
|
|---|
| 656 |
|
|---|
| 657 | static int find_id(
|
|---|
| 658 | uint *p_out, const unsigned p_stride,
|
|---|
| 659 | struct find_id_data *const data,
|
|---|
| 660 | const ulong *id, const unsigned id_stride, const uint n)
|
|---|
| 661 | {
|
|---|
| 662 | struct find_id_work *p, *p_end;
|
|---|
| 663 | const struct find_id_map *q=data->map.ptr, *const q_end = q+data->map.n;
|
|---|
| 664 | const uint np = data->cr->comm.np;
|
|---|
| 665 | int not_found = 0;
|
|---|
| 666 |
|
|---|
| 667 | uint nn;
|
|---|
| 668 | p = array_reserve(struct find_id_work, &data->work, n);
|
|---|
| 669 | for(nn=n;nn;--nn,id=(const ulong*)((const char*)id+id_stride))
|
|---|
| 670 | p->id=*id, p->p=0, p->wp = (*id)%np, ++p;
|
|---|
| 671 | data->work.n=n;
|
|---|
| 672 |
|
|---|
| 673 | /* send to work proc */
|
|---|
| 674 | sarray_transfer(struct find_id_work,&data->work,wp,1,data->cr);
|
|---|
| 675 |
|
|---|
| 676 | /* match id's with rid's */
|
|---|
| 677 | sarray_sort(struct find_id_work,data->work.ptr,data->work.n, id,1,
|
|---|
| 678 | &data->cr->data);
|
|---|
| 679 | for(p=data->work.ptr,p_end=p+data->work.n;p!=p_end;++p) {
|
|---|
| 680 | while(q!=q_end && q->id<p->id) ++q;
|
|---|
| 681 | if(q==q_end) break;
|
|---|
| 682 | if(q->id!=p->id) not_found=1,p->p=UINT_MAX;
|
|---|
| 683 | else p->p=q->p;
|
|---|
| 684 | }
|
|---|
| 685 | for(;p!=p_end;++p) not_found=1,p->p=UINT_MAX;
|
|---|
| 686 |
|
|---|
| 687 | /* send back */
|
|---|
| 688 | sarray_transfer(struct find_id_work,&data->work,wp,0,data->cr);
|
|---|
| 689 |
|
|---|
| 690 | /* map back to user data */
|
|---|
| 691 | sarray_sort(struct find_id_work,data->work.ptr,data->work.n, id,1,
|
|---|
| 692 | &data->cr->data);
|
|---|
| 693 | p = data->work.ptr;
|
|---|
| 694 | for(nn=n;nn;--nn,p_out=(uint*)((char*)p_out+p_stride))
|
|---|
| 695 | *p_out = p->p, ++p;
|
|---|
| 696 |
|
|---|
| 697 | return comm_reduce_int(&data->cr->comm, gs_max, ¬_found,1);
|
|---|
| 698 | }
|
|---|
| 699 |
|
|---|
| 700 |
|
|---|
| 701 | /*==========================================================================
|
|---|
| 702 |
|
|---|
| 703 | Read amg.dat, amg_*.dat
|
|---|
| 704 |
|
|---|
| 705 | The function read_data is responsible for reading these files,
|
|---|
| 706 | and creating arrays
|
|---|
| 707 | ids of struct id_data
|
|---|
| 708 | mat[3] of struct gnz (W, AfP, Aff)
|
|---|
| 709 | distributed to the appropriate procs
|
|---|
| 710 |
|
|---|
| 711 | ==========================================================================*/
|
|---|
| 712 |
|
|---|
| 713 | static ulong read_level_data(struct crs_data *data, struct file f)
|
|---|
| 714 | {
|
|---|
| 715 | unsigned i,n; ulong tn; double *buf;
|
|---|
| 716 | int updt=1;
|
|---|
| 717 | int code=0;
|
|---|
| 718 | if(data->comm.id==0) {
|
|---|
| 719 | double hdr; dread(&hdr,1,f,&code);
|
|---|
| 720 | if(code==0) {
|
|---|
| 721 | printf("AMG version %3.2f\n",hdr);
|
|---|
| 722 | if(hdr!=2.01) {
|
|---|
| 723 | printf("Outdates AMG dat files\n");
|
|---|
| 724 | updt=0;
|
|---|
| 725 | }
|
|---|
| 726 | double t; dread(&t,1,f,&code);
|
|---|
| 727 | data->levels = t;
|
|---|
| 728 | printf("AMG: %u levels\n", data->levels);
|
|---|
| 729 | }
|
|---|
| 730 | }
|
|---|
| 731 | comm_bcast(&data->comm, &data->levels,sizeof(unsigned), 0);
|
|---|
| 732 | comm_bcast(&data->comm, &updt,sizeof(int), 0);
|
|---|
| 733 | comm_bcast(&data->comm, &code,sizeof(int), 0);
|
|---|
| 734 | if(!updt||code!=0) die(1);
|
|---|
| 735 |
|
|---|
| 736 | n = data->levels-1;
|
|---|
| 737 | data->cheb_m = tmalloc(unsigned,n);
|
|---|
| 738 | data->cheb_rho = tmalloc(double ,n);
|
|---|
| 739 |
|
|---|
| 740 | buf = tmalloc(double, 2*n+1);
|
|---|
| 741 | if(data->comm.id==0) dread(buf,2*n+1,f,&code);
|
|---|
| 742 | comm_bcast(&data->comm, &code,sizeof(int), 0);
|
|---|
| 743 | if(code!=0) die(1);
|
|---|
| 744 |
|
|---|
| 745 | comm_bcast(&data->comm, buf,(2*n+1)*sizeof(double), 0);
|
|---|
| 746 | for(i=0;i<n;++i) data->cheb_m[i] = buf[i];
|
|---|
| 747 | for(i=0;i<n;++i) data->cheb_rho[i] = buf[n+i];
|
|---|
| 748 | tn = buf[2*n];
|
|---|
| 749 | data->tni = 1/(double)tn;
|
|---|
| 750 | free(buf);
|
|---|
| 751 |
|
|---|
| 752 | if(data->comm.id==0) {
|
|---|
| 753 | printf("AMG Chebyshev smoother data:\n");
|
|---|
| 754 | for(i=0;i<n;++i)
|
|---|
| 755 | printf("AMG level %u: %u iterations with rho = %g\n",
|
|---|
| 756 | i+1, data->cheb_m[i], (double)data->cheb_rho[i]);
|
|---|
| 757 | printf("AMG: %lu rows\n", (unsigned long)tn);
|
|---|
| 758 | }
|
|---|
| 759 |
|
|---|
| 760 | return tn;
|
|---|
| 761 | }
|
|---|
| 762 |
|
|---|
| 763 | static void read_data(
|
|---|
| 764 | struct crs_data *const data,
|
|---|
| 765 | struct array *ids, struct array mat[3],
|
|---|
| 766 | struct crystal *const cr,
|
|---|
| 767 | const ulong *uid, const uint uid_n, const char *datafname)
|
|---|
| 768 | {
|
|---|
| 769 | int code;
|
|---|
| 770 | struct find_id_data fid;
|
|---|
| 771 | const uint pid = data->comm.id;
|
|---|
| 772 | ulong r,tn;
|
|---|
| 773 | struct array read_buffer=null_array;
|
|---|
| 774 | struct array id_buffer = null_array, mat_buffer = null_array;
|
|---|
| 775 | uint *row_lens=0, *id_proc=0, *id_perm=0;
|
|---|
| 776 | struct array mat_proc = null_array;
|
|---|
| 777 | struct file f={0,0}, fm[3]={{0,0},{0,0},{0,0}};
|
|---|
| 778 | unsigned m;
|
|---|
| 779 | ids->ptr=0, ids->n=0, ids->max=0;
|
|---|
| 780 | for(m=0;m<3;++m) mat[m].ptr=0,mat[m].n=0,mat[m].max=0;
|
|---|
| 781 | find_id_setup(&fid, uid,uid_n, cr);
|
|---|
| 782 | code=0;
|
|---|
| 783 | if(pid==0) {
|
|---|
| 784 | char str1[132];
|
|---|
| 785 | char str2[132];
|
|---|
| 786 | char str3[132];
|
|---|
| 787 | char str4[132];
|
|---|
| 788 | sprintf(str1,"%s.amg.dat",datafname);
|
|---|
| 789 | sprintf(str2,"%s.amg_W.dat",datafname);
|
|---|
| 790 | sprintf(str3,"%s.amg_AfP.dat",datafname);
|
|---|
| 791 | sprintf(str4,"%s.amg_Aff.dat",datafname);
|
|---|
| 792 | f = dopen(str1,'r',&code);
|
|---|
| 793 | fm[0] = dopen(str2,'r',&code);
|
|---|
| 794 | fm[1] = dopen(str3,'r',&code);
|
|---|
| 795 | fm[2] = dopen(str4,'r',&code);
|
|---|
| 796 | if(code==0) {
|
|---|
| 797 | array_init(double, &read_buffer, 6*AMG_BLOCK_ROWS);
|
|---|
| 798 | row_lens = tmalloc(uint, 5*AMG_BLOCK_ROWS);
|
|---|
| 799 | id_proc = row_lens + 3*AMG_BLOCK_ROWS;
|
|---|
| 800 | id_perm = id_proc + AMG_BLOCK_ROWS;
|
|---|
| 801 | array_reserve(struct id_data, &id_buffer, AMG_BLOCK_ROWS);
|
|---|
| 802 | }
|
|---|
| 803 | }
|
|---|
| 804 | comm_bcast(&data->comm,&code,sizeof(int),0);
|
|---|
| 805 | if(code!=0) die(1);
|
|---|
| 806 |
|
|---|
| 807 | tn = read_level_data(data,f);
|
|---|
| 808 | for(r=0;r<tn;r+=AMG_BLOCK_ROWS) {
|
|---|
| 809 | unsigned nr = tn-r>AMG_BLOCK_ROWS ? AMG_BLOCK_ROWS : (unsigned)(tn-r);
|
|---|
| 810 | uint mat_size[3]={0,0,0};
|
|---|
| 811 |
|
|---|
| 812 | /* read id data */
|
|---|
| 813 | if(pid==0) {
|
|---|
| 814 | unsigned i;
|
|---|
| 815 | struct id_data *idp = id_buffer.ptr;
|
|---|
| 816 | double *b = read_buffer.ptr;
|
|---|
| 817 | dread(b,nr*6, f,&code);
|
|---|
| 818 | if(code==0) {
|
|---|
| 819 | printf("AMG: reading through row %lu, pass %u/%u\n",
|
|---|
| 820 | (unsigned long)b[(nr-1)*6],
|
|---|
| 821 | (unsigned)(r/AMG_BLOCK_ROWS+1),
|
|---|
| 822 | (unsigned)((tn+AMG_BLOCK_ROWS-1)/AMG_BLOCK_ROWS));
|
|---|
| 823 | for(i=0;i<nr;++i) {
|
|---|
| 824 | idp[i].id = *b++;
|
|---|
| 825 | idp[i].level = (uint)(*b++) - 1;
|
|---|
| 826 | mat_size[0] += row_lens[3*i+0] = *b++; /* W row length */
|
|---|
| 827 | mat_size[1] += row_lens[3*i+1] = *b++; /* AfP row length */
|
|---|
| 828 | mat_size[2] += row_lens[3*i+2] = *b++; /* Aff row length */
|
|---|
| 829 | idp[i].D = *b++;
|
|---|
| 830 | }
|
|---|
| 831 | id_buffer.n=nr;
|
|---|
| 832 |
|
|---|
| 833 | /* sort id_buffer, remember how to undo */
|
|---|
| 834 | sarray_sort(struct id_data,idp,nr, id,1, &cr->data);
|
|---|
| 835 | sarray_perm_invert(id_perm, cr->data.ptr, nr);
|
|---|
| 836 | }
|
|---|
| 837 | } else
|
|---|
| 838 | id_buffer.n=0;
|
|---|
| 839 |
|
|---|
| 840 | comm_bcast(&data->comm,&code,sizeof(int),0);
|
|---|
| 841 | if(code!=0)die(1);
|
|---|
| 842 |
|
|---|
| 843 | /* find who owns each row */
|
|---|
| 844 | if(find_id(id_proc,sizeof(uint), &fid,
|
|---|
| 845 | (const ulong*)((const char*)id_buffer.ptr+offsetof(struct id_data,id)),
|
|---|
| 846 | sizeof(struct id_data), id_buffer.n)) {
|
|---|
| 847 | if(pid==0)
|
|---|
| 848 | fail(1,__FILE__,__LINE__,"AMG: data has more rows than given problem");
|
|---|
| 849 | else die(1);
|
|---|
| 850 | }
|
|---|
| 851 | if(pid==0) { /* undo sorting of id_buffer */
|
|---|
| 852 | buffer_reserve(&cr->data,sizeof(struct id_data)+sizeof(uint));
|
|---|
| 853 | sarray_permute(struct id_data,id_buffer.ptr,nr, id_perm, cr->data.ptr);
|
|---|
| 854 | sarray_permute(uint, id_proc, nr, id_perm, cr->data.ptr);
|
|---|
| 855 | }
|
|---|
| 856 | /* read matrix data */
|
|---|
| 857 | for(m=0;m<3;++m) {
|
|---|
| 858 | if(pid==0) {
|
|---|
| 859 | const struct id_data *const idp = id_buffer.ptr;
|
|---|
| 860 | unsigned i; uint j,rl;
|
|---|
| 861 | struct gnz *p = array_reserve(struct gnz, &mat_buffer, mat_size[m]);
|
|---|
| 862 | double *b = array_reserve(double, &read_buffer, 2*mat_size[m]);
|
|---|
| 863 | uint *proc = array_reserve(uint, &mat_proc, mat_size[m]);
|
|---|
| 864 | dread(b,2*mat_size[m], fm[m],&code);
|
|---|
| 865 | if(code==0) {
|
|---|
| 866 | for(i=0;i<nr;++i) {
|
|---|
| 867 | const ulong i_id = idp[i].id;
|
|---|
| 868 | const uint i_p = id_proc[i];
|
|---|
| 869 | for(rl=row_lens[3*i+m],j=0;j<rl;++j)
|
|---|
| 870 | p->i = i_id, p->j = *b++, p->a = *b++, *proc++ = i_p, ++p;
|
|---|
| 871 | }
|
|---|
| 872 | mat_buffer.n = mat_size[m];
|
|---|
| 873 | }
|
|---|
| 874 | } else
|
|---|
| 875 | mat_buffer.n = 0;
|
|---|
| 876 | comm_bcast(&data->comm,&code,sizeof(int),0);
|
|---|
| 877 | if(code!=0)die(1);
|
|---|
| 878 | sarray_transfer_ext(struct gnz,&mat_buffer,mat_proc.ptr,sizeof(uint),cr);
|
|---|
| 879 | array_cat(struct gnz,&mat[m],mat_buffer.ptr,mat_buffer.n);
|
|---|
| 880 | }
|
|---|
| 881 | /* send id_data to owner */
|
|---|
| 882 | sarray_transfer_ext(struct id_data,&id_buffer,id_proc,sizeof(uint),cr);
|
|---|
| 883 | array_cat(struct id_data,ids,id_buffer.ptr,id_buffer.n);
|
|---|
| 884 | }
|
|---|
| 885 | array_free(&id_buffer);
|
|---|
| 886 | array_free(&mat_buffer);
|
|---|
| 887 | if(pid==0) {
|
|---|
| 888 | array_free(&read_buffer);
|
|---|
| 889 | free(row_lens);
|
|---|
| 890 | array_free(&mat_proc);
|
|---|
| 891 | dclose(fm[2]);
|
|---|
| 892 | dclose(fm[1]);
|
|---|
| 893 | dclose(fm[0]);
|
|---|
| 894 | dclose(f);
|
|---|
| 895 | }
|
|---|
| 896 | find_id_free(&fid);
|
|---|
| 897 | }
|
|---|
| 898 |
|
|---|
| 899 | #ifdef USEMPIIO
|
|---|
| 900 | static void read_data_mpiio(struct crs_data *const data,
|
|---|
| 901 | struct array *ids, struct array mat[3],
|
|---|
| 902 | struct crystal *const cr,
|
|---|
| 903 | const ulong *uid, const uint uid_n, const char *datafname)
|
|---|
| 904 | {
|
|---|
| 905 | int code;
|
|---|
| 906 | struct find_id_data fid;
|
|---|
| 907 | const uint pid = data->comm.id;
|
|---|
| 908 | struct array read_buffer=null_array;
|
|---|
| 909 | struct array id_buffer = null_array, mat_buffer = null_array;
|
|---|
| 910 | uint *row_lens=0, *id_proc=0, *id_perm=0;
|
|---|
| 911 | struct array mat_proc = null_array;
|
|---|
| 912 | ulong scan_in, scan_buf[2];
|
|---|
| 913 | uint mat_size[3] = {0,0,0};
|
|---|
| 914 | unsigned i,m;
|
|---|
| 915 |
|
|---|
| 916 | ids->ptr = 0;
|
|---|
| 917 | ids->n = 0;
|
|---|
| 918 | ids->max = 0;
|
|---|
| 919 | for(m = 0; m < 3; ++m) {
|
|---|
| 920 | mat[m].ptr = 0;
|
|---|
| 921 | mat[m].n = 0;
|
|---|
| 922 | mat[m].max = 0;
|
|---|
| 923 | }
|
|---|
| 924 | find_id_setup(&fid, uid,uid_n, cr);
|
|---|
| 925 | code = 0;
|
|---|
| 926 |
|
|---|
| 927 | struct file f = {0,0};
|
|---|
| 928 | f = dopen("amg.dat", 'r', &code);
|
|---|
| 929 | if(code != 0) die(1);
|
|---|
| 930 | ulong tn = read_level_data(data, f);
|
|---|
| 931 | dclose(f);
|
|---|
| 932 |
|
|---|
| 933 | if(pid==0)
|
|---|
| 934 | printf("AMG: preading through rows ... ");
|
|---|
| 935 |
|
|---|
| 936 | struct sfile fs = {0,0};
|
|---|
| 937 | struct sfile fsm[3] = {{0,0},{0,0},{0,0}};
|
|---|
| 938 | char str1[132];
|
|---|
| 939 | char str2[132];
|
|---|
| 940 | char str3[132];
|
|---|
| 941 | char str4[132];
|
|---|
| 942 | sprintf(str1,"%s.amg.dat",datafname);
|
|---|
| 943 | sprintf(str2,"%s.amg_W.dat",datafname);
|
|---|
| 944 | sprintf(str3,"%s.amg_AfP.dat",datafname);
|
|---|
| 945 | sprintf(str4,"%s.amg_Aff.dat",datafname);
|
|---|
| 946 | fs = dopen_mpi(str1,'r', &data->comm, &code);
|
|---|
| 947 | fsm[0] = dopen_mpi(str2,'r', &data->comm, &code);
|
|---|
| 948 | fsm[1] = dopen_mpi(str3,'r', &data->comm, &code);
|
|---|
| 949 | fsm[2] = dopen_mpi(str4,'r', &data->comm, &code);
|
|---|
| 950 | code = comm_reduce_int(&data->comm, gs_max, &code, 1);
|
|---|
| 951 | if(code != 0)
|
|---|
| 952 | die(1);
|
|---|
| 953 |
|
|---|
| 954 | array_init(double, &read_buffer, 6*AMG_MAX_ROWS);
|
|---|
| 955 | row_lens = tmalloc(uint, 5*AMG_MAX_ROWS);
|
|---|
| 956 | id_proc = row_lens + 3*AMG_MAX_ROWS;
|
|---|
| 957 | id_perm = id_proc + AMG_MAX_ROWS;
|
|---|
| 958 | array_reserve(struct id_data, &id_buffer, AMG_MAX_ROWS);
|
|---|
| 959 |
|
|---|
| 960 | /* assign rows to proc */
|
|---|
| 961 | unsigned nr = tn/data->comm.np;
|
|---|
| 962 | for(i = 0; i < tn%data->comm.np;++i)
|
|---|
| 963 | if(i == pid) nr++;
|
|---|
| 964 |
|
|---|
| 965 | if(nr>AMG_MAX_ROWS) {
|
|---|
| 966 | if(pid==0)
|
|---|
| 967 | fail(1,__FILE__,__LINE__,"AMG: AMG_MAX_ROWS too small!");
|
|---|
| 968 | die(1);
|
|---|
| 969 | }
|
|---|
| 970 |
|
|---|
| 971 | /* read id data */
|
|---|
| 972 | ulong nr_off[2];
|
|---|
| 973 | scan_in = (ulong)nr;
|
|---|
| 974 | comm_scan(nr_off, &data->comm, gs_long, gs_add, &scan_in, 1, scan_buf);
|
|---|
| 975 | const ulong off0 = 1+1+1+2*(data->levels-1)+1;
|
|---|
| 976 | dview_mpi(off0+nr_off[0]*6, fs, &code);
|
|---|
| 977 | code = comm_reduce_int(&data->comm, gs_max, &code,1);
|
|---|
| 978 | if(code != 0)
|
|---|
| 979 | die(1);
|
|---|
| 980 |
|
|---|
| 981 | double *b = read_buffer.ptr;
|
|---|
| 982 | dread_mpi(b, nr*6, fs, &code);
|
|---|
| 983 | code = comm_reduce_int(&data->comm, gs_max, &code,1);
|
|---|
| 984 | if (code != 0)
|
|---|
| 985 | die(1);
|
|---|
| 986 |
|
|---|
| 987 | struct id_data *idp = id_buffer.ptr;
|
|---|
| 988 | for (i = 0; i < nr; ++i) {
|
|---|
| 989 | idp[i].id = *b++;
|
|---|
| 990 | idp[i].level = (uint)(*b++) - 1;
|
|---|
| 991 | mat_size[0] += row_lens[3*i+0] = *b++; /* W row length */
|
|---|
| 992 | mat_size[1] += row_lens[3*i+1] = *b++; /* AfP row length */
|
|---|
| 993 | mat_size[2] += row_lens[3*i+2] = *b++; /* Aff row length */
|
|---|
| 994 | idp[i].D = *b++;
|
|---|
| 995 | }
|
|---|
| 996 | id_buffer.n = nr;
|
|---|
| 997 |
|
|---|
| 998 | /* sort id_buffer, remember how to undo */
|
|---|
| 999 | sarray_sort(struct id_data,idp,nr, id,1, &cr->data);
|
|---|
| 1000 | sarray_perm_invert(id_perm, cr->data.ptr, nr);
|
|---|
| 1001 | if (find_id(id_proc,sizeof(uint), &fid,
|
|---|
| 1002 | (const ulong*)((const char*)id_buffer.ptr+offsetof(struct id_data,id)),
|
|---|
| 1003 | sizeof(struct id_data), id_buffer.n))
|
|---|
| 1004 | code = 1;
|
|---|
| 1005 | if (comm_reduce_int(&data->comm, gs_max, &code, 1)) {
|
|---|
| 1006 | if (pid==0)
|
|---|
| 1007 | fail(1,__FILE__,__LINE__,"AMG: data has more rows than given problem");
|
|---|
| 1008 | die(1);
|
|---|
| 1009 | }
|
|---|
| 1010 |
|
|---|
| 1011 | /* undo sorting of id_buffer */
|
|---|
| 1012 | buffer_reserve(&cr->data,sizeof(struct id_data)+sizeof(uint));
|
|---|
| 1013 | sarray_permute(struct id_data,id_buffer.ptr,nr, id_perm, cr->data.ptr);
|
|---|
| 1014 | sarray_permute(uint, id_proc, nr, id_perm, cr->data.ptr);
|
|---|
| 1015 |
|
|---|
| 1016 | /* read matrix data */
|
|---|
| 1017 | for (m = 0; m < 3; ++m) {
|
|---|
| 1018 | ulong mat_size_off[2];
|
|---|
| 1019 | scan_in = (ulong)mat_size[m];
|
|---|
| 1020 | comm_scan(mat_size_off, &data->comm, gs_long, gs_add, &scan_in, 1, scan_buf);
|
|---|
| 1021 | dview_mpi(1+2*mat_size_off[0], fsm[m], &code);
|
|---|
| 1022 | code = comm_reduce_int(&data->comm, gs_max, &code, 1);
|
|---|
| 1023 | if (code != 0)
|
|---|
| 1024 | die(1);
|
|---|
| 1025 |
|
|---|
| 1026 | double *b = array_reserve(double, &read_buffer, 2*mat_size[m]);
|
|---|
| 1027 | dread_mpi(b, 2*mat_size[m], fsm[m], &code);
|
|---|
| 1028 | code = comm_reduce_int(&data->comm, gs_max, &code, 1);
|
|---|
| 1029 | if (code != 0)
|
|---|
| 1030 | die(1);
|
|---|
| 1031 |
|
|---|
| 1032 | const struct id_data *const idp = id_buffer.ptr;
|
|---|
| 1033 | struct gnz *p = array_reserve(struct gnz, &mat_buffer, mat_size[m]);
|
|---|
| 1034 | uint *proc = array_reserve(uint, &mat_proc, mat_size[m]);
|
|---|
| 1035 | unsigned i;
|
|---|
| 1036 | for (i = 0; i < nr; ++i) {
|
|---|
| 1037 | const ulong i_id = idp[i].id;
|
|---|
| 1038 | const uint i_p = id_proc[i];
|
|---|
| 1039 | unsigned j,rl;
|
|---|
| 1040 | for (rl = row_lens[3*i+m], j=0; j < rl; ++j) {
|
|---|
| 1041 | p->i = i_id;
|
|---|
| 1042 | p->j = *b++;
|
|---|
| 1043 | p->a = *b++;
|
|---|
| 1044 | *proc++ = i_p;
|
|---|
| 1045 | ++p;
|
|---|
| 1046 | }
|
|---|
| 1047 | }
|
|---|
| 1048 | mat_buffer.n = mat_size[m];
|
|---|
| 1049 | sarray_transfer_ext(struct gnz, &mat_buffer, mat_proc.ptr, sizeof(uint), cr);
|
|---|
| 1050 | array_cat(struct gnz, &mat[m], mat_buffer.ptr, mat_buffer.n);
|
|---|
| 1051 | sarray_sort(struct gnz, (struct gnz*)mat[m].ptr, mat[m].n, i, 1, &cr->data);
|
|---|
| 1052 | }
|
|---|
| 1053 |
|
|---|
| 1054 | /* send id_data to owner */
|
|---|
| 1055 | sarray_transfer_ext(struct id_data, &id_buffer, id_proc, sizeof(uint), cr);
|
|---|
| 1056 | array_cat(struct id_data,ids, id_buffer.ptr, id_buffer.n);
|
|---|
| 1057 | sarray_sort(struct id_data, ids->ptr, ids->n, id, 1, &cr->data);
|
|---|
| 1058 |
|
|---|
| 1059 | /* clean-up */
|
|---|
| 1060 | array_free(&id_buffer);
|
|---|
| 1061 | array_free(&mat_buffer);
|
|---|
| 1062 | array_free(&read_buffer);
|
|---|
| 1063 | free(row_lens);
|
|---|
| 1064 | array_free(&mat_proc);
|
|---|
| 1065 | dclose_mpi(fsm[2]);
|
|---|
| 1066 | dclose_mpi(fsm[1]);
|
|---|
| 1067 | dclose_mpi(fsm[0]);
|
|---|
| 1068 | dclose_mpi(fs);
|
|---|
| 1069 | find_id_free(&fid);
|
|---|
| 1070 |
|
|---|
| 1071 | if (pid == 0)
|
|---|
| 1072 | printf("done\n");
|
|---|
| 1073 | }
|
|---|
| 1074 | #endif
|
|---|
| 1075 |
|
|---|
| 1076 |
|
|---|
| 1077 |
|
|---|
| 1078 | /*==========================================================================
|
|---|
| 1079 |
|
|---|
| 1080 | Write amgdmp_*.dat
|
|---|
| 1081 |
|
|---|
| 1082 | The user's matrix is first assembled, then written out.
|
|---|
| 1083 |
|
|---|
| 1084 | ==========================================================================*/
|
|---|
| 1085 |
|
|---|
| 1086 |
|
|---|
| 1087 | enum mat_order { row_major, col_major };
|
|---|
| 1088 | enum distr { row_distr, col_distr };
|
|---|
| 1089 |
|
|---|
| 1090 | #define rid_equal(a,b) ((a).p==(b).p && (a).i==(b).i)
|
|---|
| 1091 |
|
|---|
| 1092 | /* rnz is a mnemonic for remote non zero */
|
|---|
| 1093 | struct rnz {
|
|---|
| 1094 | double v; struct rid i,j;
|
|---|
| 1095 | };
|
|---|
| 1096 | #define nz_pos_equal(a,b) \
|
|---|
| 1097 | (rid_equal((a).i,(b).i) && rid_equal((a).j,(b).j))
|
|---|
| 1098 |
|
|---|
| 1099 | static void mat_sort(struct array *const mat,
|
|---|
| 1100 | const enum mat_order order, buffer *const buf)
|
|---|
| 1101 | {
|
|---|
| 1102 | switch(order) {
|
|---|
| 1103 | case col_major: sarray_sort_4(struct rnz,mat->ptr,mat->n,
|
|---|
| 1104 | j.p,0,j.i,0, i.p,0,i.i,0, buf); break;
|
|---|
| 1105 | case row_major: sarray_sort_4(struct rnz,mat->ptr,mat->n,
|
|---|
| 1106 | i.p,0,i.i,0, j.p,0,j.i,0, buf); break;
|
|---|
| 1107 | }
|
|---|
| 1108 | }
|
|---|
| 1109 |
|
|---|
| 1110 | /* assumes matrix is already sorted */
|
|---|
| 1111 | static void mat_condense_sorted(struct array *const mat)
|
|---|
| 1112 | {
|
|---|
| 1113 | struct rnz *dst,*src, *const end=(struct rnz*)mat->ptr+mat->n;
|
|---|
| 1114 | if(mat->n<=1) return;
|
|---|
| 1115 | for(dst=mat->ptr;;++dst) {
|
|---|
| 1116 | if(dst+1==end) return;
|
|---|
| 1117 | if(nz_pos_equal(dst[0],dst[1])) break;
|
|---|
| 1118 | }
|
|---|
| 1119 | for(src=dst+1;src!=end;++src) {
|
|---|
| 1120 | if(nz_pos_equal(*dst,*src))
|
|---|
| 1121 | dst->v += src->v;
|
|---|
| 1122 | else
|
|---|
| 1123 | *(++dst) = *src;
|
|---|
| 1124 | }
|
|---|
| 1125 | mat->n = (dst+1)-(struct rnz*)mat->ptr;
|
|---|
| 1126 | }
|
|---|
| 1127 |
|
|---|
| 1128 | static void mat_condense(
|
|---|
| 1129 | struct array *const mat, const enum mat_order order, buffer *const buf)
|
|---|
| 1130 | {
|
|---|
| 1131 | mat_sort(mat,order,buf); mat_condense_sorted(mat);
|
|---|
| 1132 | }
|
|---|
| 1133 |
|
|---|
| 1134 | static void mat_distribute(
|
|---|
| 1135 | struct array *const mat, const enum distr d, const enum mat_order o,
|
|---|
| 1136 | struct crystal *const cr)
|
|---|
| 1137 | {
|
|---|
| 1138 | switch(d) {
|
|---|
| 1139 | case row_distr: mat_condense(mat,row_major,&cr->data);
|
|---|
| 1140 | sarray_transfer(struct rnz,mat, i.p,0, cr); break;
|
|---|
| 1141 | case col_distr: mat_condense(mat,col_major,&cr->data);
|
|---|
| 1142 | sarray_transfer(struct rnz,mat, j.p,0, cr); break;
|
|---|
| 1143 | }
|
|---|
| 1144 | mat_condense(mat,o,&cr->data);
|
|---|
| 1145 | }
|
|---|
| 1146 |
|
|---|
| 1147 | struct labelled_rid {
|
|---|
| 1148 | struct rid rid; ulong id;
|
|---|
| 1149 | };
|
|---|
| 1150 |
|
|---|
| 1151 | static void mat_list_nonlocal_sorted(
|
|---|
| 1152 | struct array *const nonlocal_id,
|
|---|
| 1153 | const struct array *const mat, const enum distr d,
|
|---|
| 1154 | const ulong *uid, struct crystal *const cr)
|
|---|
| 1155 | {
|
|---|
| 1156 | const uint pid = cr->comm.id;
|
|---|
| 1157 | struct rid last_rid;
|
|---|
| 1158 | const struct rnz *p, *const e=(const struct rnz*)mat->ptr+mat->n;
|
|---|
| 1159 | uint count; struct labelled_rid *out, *end;
|
|---|
| 1160 | #define BUILD_LIST(k) do { \
|
|---|
| 1161 | last_rid.p=UINT_MAX,last_rid.i=UINT_MAX; \
|
|---|
| 1162 | for(count=0,p=mat->ptr;p!=e;++p) { \
|
|---|
| 1163 | if(p->k.p==pid || rid_equal(last_rid,p->k)) continue; \
|
|---|
| 1164 | last_rid=p->k; ++count; \
|
|---|
| 1165 | } \
|
|---|
| 1166 | array_init(struct labelled_rid, nonlocal_id, count); \
|
|---|
| 1167 | nonlocal_id->n=count; out=nonlocal_id->ptr; \
|
|---|
| 1168 | last_rid.p=UINT_MAX,last_rid.i=UINT_MAX; \
|
|---|
| 1169 | for(p=mat->ptr;p!=e;++p) { \
|
|---|
| 1170 | if(p->k.p==pid || rid_equal(last_rid,p->k)) continue; \
|
|---|
| 1171 | (out++)->rid=last_rid=p->k; \
|
|---|
| 1172 | } \
|
|---|
| 1173 | } while(0)
|
|---|
| 1174 | switch(d) {
|
|---|
| 1175 | case row_distr: BUILD_LIST(j); break;
|
|---|
| 1176 | case col_distr: BUILD_LIST(i); break;
|
|---|
| 1177 | }
|
|---|
| 1178 | #undef BUILD_LIST
|
|---|
| 1179 | sarray_transfer(struct labelled_rid,nonlocal_id,rid.p,1,cr);
|
|---|
| 1180 | for(out=nonlocal_id->ptr,end=out+nonlocal_id->n;out!=end;++out)
|
|---|
| 1181 | out->id=uid[out->rid.i];
|
|---|
| 1182 | sarray_transfer(struct labelled_rid,nonlocal_id,rid.p,1,cr);
|
|---|
| 1183 | sarray_sort_2(struct labelled_rid,nonlocal_id->ptr,nonlocal_id->n,
|
|---|
| 1184 | rid.p,0, rid.i,0, &cr->data);
|
|---|
| 1185 | }
|
|---|
| 1186 |
|
|---|
| 1187 | static void mat_list_nonlocal(
|
|---|
| 1188 | struct array *const nonlocal_id,
|
|---|
| 1189 | struct array *const mat, const enum distr d,
|
|---|
| 1190 | const ulong *uid, struct crystal *const cr)
|
|---|
| 1191 | {
|
|---|
| 1192 | switch(d) {
|
|---|
| 1193 | case row_distr: mat_sort(mat,col_major,&cr->data); break;
|
|---|
| 1194 | case col_distr: mat_sort(mat,row_major,&cr->data); break;
|
|---|
| 1195 | }
|
|---|
| 1196 | mat_list_nonlocal_sorted(nonlocal_id,mat,d,uid,cr);
|
|---|
| 1197 | }
|
|---|
| 1198 |
|
|---|
| 1199 | static uint dump_matrix_setdata(
|
|---|
| 1200 | buffer *const buf, /* output; ok if this is one of cr's buffers */
|
|---|
| 1201 | struct array *const mat, const ulong *const uid,
|
|---|
| 1202 | struct crystal *const cr)
|
|---|
| 1203 | {
|
|---|
| 1204 | const uint pid = cr->comm.id;
|
|---|
| 1205 | struct array nonlocal_id;
|
|---|
| 1206 | double *vi, *vj, *va; uint n;
|
|---|
| 1207 | const struct rnz *nz, *enz;
|
|---|
| 1208 | const struct labelled_rid *rlbl;
|
|---|
| 1209 |
|
|---|
| 1210 | mat_distribute(mat,row_distr,col_major,cr);
|
|---|
| 1211 | n = mat->n;
|
|---|
| 1212 |
|
|---|
| 1213 | mat_list_nonlocal_sorted(&nonlocal_id,mat,row_distr,uid,cr);
|
|---|
| 1214 |
|
|---|
| 1215 | buffer_reserve(buf,3*n*sizeof(double));
|
|---|
| 1216 | vi=buf->ptr, vj=vi+n, va=vj+n;
|
|---|
| 1217 | rlbl = nonlocal_id.ptr;
|
|---|
| 1218 | for(nz=mat->ptr,enz=nz+n;nz!=enz;++nz) {
|
|---|
| 1219 | *vi++ = uid[nz->i.i];
|
|---|
| 1220 | *va++ = nz->v;
|
|---|
| 1221 | if(nz->j.p==pid)
|
|---|
| 1222 | *vj++ = uid[nz->j.i];
|
|---|
| 1223 | else {
|
|---|
| 1224 | const uint jp = nz->j.p, ji = nz->j.i;
|
|---|
| 1225 | while(rlbl->rid.p<jp) ++rlbl;
|
|---|
| 1226 | if(rlbl->rid.p!=jp) printf("dump_matrix: FAIL!!!\n");
|
|---|
| 1227 | while(rlbl->rid.i<ji) ++rlbl;
|
|---|
| 1228 | if(rlbl->rid.i!=ji) printf("dump_matrix: FAIL!!!\n");
|
|---|
| 1229 | *vj++ = rlbl->id;
|
|---|
| 1230 | }
|
|---|
| 1231 | }
|
|---|
| 1232 | array_free(&nonlocal_id);
|
|---|
| 1233 | return n;
|
|---|
| 1234 | }
|
|---|
| 1235 |
|
|---|
| 1236 | static void dump_matrix(
|
|---|
| 1237 | struct array *const mat, const ulong *const uid,
|
|---|
| 1238 | struct crystal *const cr, const char *datafname)
|
|---|
| 1239 | {
|
|---|
| 1240 | const struct comm *comm = &cr->comm;
|
|---|
| 1241 | const uint pid = comm->id, np = comm->np;
|
|---|
| 1242 | buffer *const buf = &cr->data;
|
|---|
| 1243 | struct file fi={0,0}, fj={0,0}, fp={0,0};
|
|---|
| 1244 | uint i,n;
|
|---|
| 1245 | int code;
|
|---|
| 1246 |
|
|---|
| 1247 | n = dump_matrix_setdata(buf, mat,uid,cr);
|
|---|
| 1248 |
|
|---|
| 1249 | code = 0;
|
|---|
| 1250 | if(pid==0) {
|
|---|
| 1251 | char str1[132];
|
|---|
| 1252 | char str2[132];
|
|---|
| 1253 | char str3[132];
|
|---|
| 1254 | sprintf(str1,"%s.amgdmp_i.dat",datafname);
|
|---|
| 1255 | sprintf(str2,"%s.amgdmp_j.dat",datafname);
|
|---|
| 1256 | sprintf(str3,"%s.amgdmp_p.dat",datafname);
|
|---|
| 1257 | fi=dopen(str1,'w',&code);
|
|---|
| 1258 | fj=dopen(str2,'w',&code);
|
|---|
| 1259 | fp=dopen(str3,'w',&code);
|
|---|
| 1260 | }
|
|---|
| 1261 | comm_bcast(comm,&code,sizeof(int),0);
|
|---|
| 1262 | if(code!=0) die(1);
|
|---|
| 1263 |
|
|---|
| 1264 | for(i=0;i<np;++i) {
|
|---|
| 1265 | comm_barrier(comm);
|
|---|
| 1266 | if(pid!=0 && i==pid)
|
|---|
| 1267 | comm_send(comm, &n,sizeof(uint), 0,i),
|
|---|
| 1268 | comm_send(comm, buf->ptr,3*n*sizeof(double), 0,np+i);
|
|---|
| 1269 | else if(pid==0) {
|
|---|
| 1270 | double *v;
|
|---|
| 1271 | printf("AMG writing data from proc %u\n",(unsigned)i),fflush(stdout);
|
|---|
| 1272 | if(i!=0) {
|
|---|
| 1273 | comm_recv(comm, &n,sizeof(uint), i,i);
|
|---|
| 1274 | buffer_reserve(buf,3*n*sizeof(double));
|
|---|
| 1275 | comm_recv(comm, buf->ptr,3*n*sizeof(double), i,np+i);
|
|---|
| 1276 | }
|
|---|
| 1277 | v = buf->ptr;
|
|---|
| 1278 | if(code==0) {
|
|---|
| 1279 | dwrite(fi, v , n,&code);
|
|---|
| 1280 | dwrite(fj, v+ n, n,&code);
|
|---|
| 1281 | dwrite(fp, v+2*n, n,&code);
|
|---|
| 1282 | }
|
|---|
| 1283 | }
|
|---|
| 1284 | }
|
|---|
| 1285 | if(pid==0) dclose(fi),dclose(fj),dclose(fp);
|
|---|
| 1286 | comm_bcast(comm,&code,sizeof(int),0);
|
|---|
| 1287 | if(code!=0) die(1);
|
|---|
| 1288 | comm_barrier(comm);
|
|---|
| 1289 | }
|
|---|
| 1290 |
|
|---|
| 1291 | /* assumes data->comm and data->gs_top are set */
|
|---|
| 1292 | static void amg_dump(
|
|---|
| 1293 | uint n, const ulong *id,
|
|---|
| 1294 | uint nz, const uint *Ai, const uint *Aj, const double *A,
|
|---|
| 1295 | struct crs_data *data, const char *datafname)
|
|---|
| 1296 | {
|
|---|
| 1297 | struct crystal cr;
|
|---|
| 1298 | struct array uid; struct rid *rid_map = tmalloc(struct rid,n);
|
|---|
| 1299 |
|
|---|
| 1300 | struct array mat;
|
|---|
| 1301 | uint k; struct rnz *out;
|
|---|
| 1302 |
|
|---|
| 1303 | crystal_init(&cr, &data->comm);
|
|---|
| 1304 | assign_dofs(&uid,rid_map, id,n,data->comm.id,data->gs_top,&cr.data);
|
|---|
| 1305 |
|
|---|
| 1306 | array_init(struct rnz,&mat,nz);
|
|---|
| 1307 | for(out=mat.ptr,k=0;k<nz;++k) {
|
|---|
| 1308 | uint i=Ai[k], j=Aj[k]; double a=A[k];
|
|---|
| 1309 | if(id[i]==0 || id[j]==0 || fabs(a)==0) continue;
|
|---|
| 1310 | out->v = a, out->i=rid_map[i], out->j=rid_map[j];
|
|---|
| 1311 | ++out;
|
|---|
| 1312 | }
|
|---|
| 1313 | mat.n = out-(struct rnz*)mat.ptr;
|
|---|
| 1314 | free(rid_map);
|
|---|
| 1315 |
|
|---|
| 1316 | dump_matrix(&mat,uid.ptr,&cr,datafname);
|
|---|
| 1317 |
|
|---|
| 1318 | array_free(&uid);
|
|---|
| 1319 | crystal_free(&cr);
|
|---|
| 1320 | }
|
|---|