| 1 | #include "gslib.h"
|
|---|
| 2 |
|
|---|
| 3 | #if defined(PARRSB)
|
|---|
| 4 | #include "parRSB.h"
|
|---|
| 5 | #endif
|
|---|
| 6 |
|
|---|
| 7 | #define MAXNV 8 /* maximum number of vertices per element */
|
|---|
| 8 | typedef struct{
|
|---|
| 9 | long long vtx[MAXNV];
|
|---|
| 10 | long long eid;
|
|---|
| 11 | int proc;
|
|---|
| 12 | uint seq;
|
|---|
| 13 | } edata;
|
|---|
| 14 |
|
|---|
| 15 | #ifdef PARMETIS
|
|---|
| 16 |
|
|---|
| 17 | #include "parmetis.h"
|
|---|
| 18 | #include "defs.h"
|
|---|
| 19 |
|
|---|
| 20 | int parMETIS_partMesh(int *part, long long *vl, int nel, int nv, int *opt, comm_ext ce)
|
|---|
| 21 | {
|
|---|
| 22 | int i, j;
|
|---|
| 23 | int ierrm;
|
|---|
| 24 | double time, time0;
|
|---|
| 25 |
|
|---|
| 26 | MPI_Comm comms;
|
|---|
| 27 | struct comm comm;
|
|---|
| 28 | int color;
|
|---|
| 29 | int ibuf;
|
|---|
| 30 |
|
|---|
| 31 | struct crystal cr;
|
|---|
| 32 | struct array A;
|
|---|
| 33 | edata *row;
|
|---|
| 34 |
|
|---|
| 35 | long long nell;
|
|---|
| 36 | long long *nelarray;
|
|---|
| 37 | idx_t *elmdist;
|
|---|
| 38 | idx_t *evlptr;
|
|---|
| 39 | idx_t *part_;
|
|---|
| 40 | real_t *tpwgts;
|
|---|
| 41 | idx_t edgecut;
|
|---|
| 42 | real_t ubvec;
|
|---|
| 43 | idx_t *elmwgt;
|
|---|
| 44 | idx_t wgtflag;
|
|---|
| 45 | idx_t numflag;
|
|---|
| 46 | idx_t ncon;
|
|---|
| 47 | idx_t ncommonnodes;
|
|---|
| 48 | idx_t nparts;
|
|---|
| 49 | idx_t nelsm;
|
|---|
| 50 | idx_t options[10];
|
|---|
| 51 |
|
|---|
| 52 | ierrm = METIS_OK;
|
|---|
| 53 | nell = nel;
|
|---|
| 54 | edgecut = 0;
|
|---|
| 55 | wgtflag = 0;
|
|---|
| 56 | numflag = 0;
|
|---|
| 57 | ncon = 1;
|
|---|
| 58 | ubvec = 1.02;
|
|---|
| 59 | elmwgt = NULL; /* no weights */
|
|---|
| 60 | ncommonnodes = 2;
|
|---|
| 61 |
|
|---|
| 62 | part_ = (idx_t*) malloc(nel*sizeof(idx_t));
|
|---|
| 63 |
|
|---|
| 64 | if (sizeof(idx_t) != sizeof(long long)){
|
|---|
| 65 | printf("ERROR: invalid sizeof(idx_t)!\n");
|
|---|
| 66 | goto err;
|
|---|
| 67 | }
|
|---|
| 68 | if (nv != 4 && nv != 8){
|
|---|
| 69 | printf("ERROR: nv is %d but only 4 and 8 are supported!\n", nv);
|
|---|
| 70 | goto err;
|
|---|
| 71 | }
|
|---|
| 72 |
|
|---|
| 73 | color = MPI_UNDEFINED;
|
|---|
| 74 | if (nel > 0) color = 1;
|
|---|
| 75 | MPI_Comm_split(ce, color, 0, &comms);
|
|---|
| 76 | if (color == MPI_UNDEFINED)
|
|---|
| 77 | goto end;
|
|---|
| 78 |
|
|---|
| 79 | comm_init(&comm,comms);
|
|---|
| 80 | if (comm.id == 0)
|
|---|
| 81 | printf("Running parMETIS ... "), fflush(stdout);
|
|---|
| 82 |
|
|---|
| 83 | nelarray = (long long*) malloc(comm.np*sizeof(long long));
|
|---|
| 84 | MPI_Allgather(&nell, 1, MPI_LONG_LONG_INT, nelarray, 1, MPI_LONG_LONG_INT, comm.c);
|
|---|
| 85 | elmdist = (idx_t*) malloc((comm.np+1)*sizeof(idx_t));
|
|---|
| 86 | elmdist[0] = 0;
|
|---|
| 87 | for (i=0; i<comm.np; ++i)
|
|---|
| 88 | elmdist[i+1] = elmdist[i] + (idx_t)nelarray[i];
|
|---|
| 89 | free(nelarray);
|
|---|
| 90 |
|
|---|
| 91 | evlptr = (idx_t*) malloc((nel+1)*sizeof(idx_t));
|
|---|
| 92 | evlptr[0] = 0;
|
|---|
| 93 | for (i=0; i<nel; ++i)
|
|---|
| 94 | evlptr[i+1] = evlptr[i] + nv;
|
|---|
| 95 | nelsm = elmdist[comm.id+1] - elmdist[comm.id];
|
|---|
| 96 | evlptr[nelsm]--;
|
|---|
| 97 |
|
|---|
| 98 | if (nv == 8) ncommonnodes = 4;
|
|---|
| 99 | nparts = comm.np;
|
|---|
| 100 |
|
|---|
| 101 | options[0] = 1;
|
|---|
| 102 | options[PMV3_OPTION_DBGLVL] = 0;
|
|---|
| 103 | options[PMV3_OPTION_SEED] = 0;
|
|---|
| 104 | if (opt[0] != 0) {
|
|---|
| 105 | options[PMV3_OPTION_DBGLVL] = opt[1];
|
|---|
| 106 | if (opt[2] != 0) {
|
|---|
| 107 | options[3] = PARMETIS_PSR_UNCOUPLED;
|
|---|
| 108 | nparts = opt[2];
|
|---|
| 109 | }
|
|---|
| 110 | }
|
|---|
| 111 |
|
|---|
| 112 | tpwgts = (real_t*) malloc(ncon*nparts*sizeof(real_t));
|
|---|
| 113 | for (i=0; i<ncon*nparts; ++i)
|
|---|
| 114 | tpwgts[i] = 1./(real_t)nparts;
|
|---|
| 115 |
|
|---|
| 116 | if (options[3] == PARMETIS_PSR_UNCOUPLED)
|
|---|
| 117 | for (i=0; i<nel; ++i)
|
|---|
| 118 | part_[i] = comm.id;
|
|---|
| 119 |
|
|---|
| 120 | comm_barrier(&comm);
|
|---|
| 121 | time0 = comm_time();
|
|---|
| 122 | ierrm = ParMETIS_V3_PartMeshKway(elmdist,
|
|---|
| 123 | evlptr,
|
|---|
| 124 | (idx_t*)vl,
|
|---|
| 125 | elmwgt,
|
|---|
| 126 | &wgtflag,
|
|---|
| 127 | &numflag,
|
|---|
| 128 | &ncon,
|
|---|
| 129 | &ncommonnodes,
|
|---|
| 130 | &nparts,
|
|---|
| 131 | tpwgts,
|
|---|
| 132 | &ubvec,
|
|---|
| 133 | options,
|
|---|
| 134 | &edgecut,
|
|---|
| 135 | part_,
|
|---|
| 136 | &comm.c);
|
|---|
| 137 |
|
|---|
| 138 | time = comm_time() - time0;
|
|---|
| 139 | if (comm.id == 0)
|
|---|
| 140 | printf("%lf sec\n", time), fflush(stdout);
|
|---|
| 141 |
|
|---|
| 142 | for (i=0; i<nel; ++i)
|
|---|
| 143 | part[i] = part_[i];
|
|---|
| 144 |
|
|---|
| 145 | free(elmdist);
|
|---|
| 146 | free(evlptr);
|
|---|
| 147 | free(tpwgts);
|
|---|
| 148 | MPI_Comm_free(&comms);
|
|---|
| 149 | comm_free(&comm);
|
|---|
| 150 |
|
|---|
| 151 | end:
|
|---|
| 152 | comm_init(&comm,ce);
|
|---|
| 153 | comm_allreduce(&comm, gs_int, gs_min, &ierrm, 1, &ibuf);
|
|---|
| 154 | if (ierrm != METIS_OK) goto err;
|
|---|
| 155 | return 0;
|
|---|
| 156 |
|
|---|
| 157 | err:
|
|---|
| 158 | return 1;
|
|---|
| 159 | }
|
|---|
| 160 | #endif
|
|---|
| 161 |
|
|---|
| 162 | void printPartStat(long long *vtx, int nel, int nv, comm_ext ce)
|
|---|
| 163 | {
|
|---|
| 164 | int i,j;
|
|---|
| 165 |
|
|---|
| 166 | struct comm comm;
|
|---|
| 167 | int np, id;
|
|---|
| 168 |
|
|---|
| 169 | int Nmsg;
|
|---|
| 170 | int *Ncomm;
|
|---|
| 171 |
|
|---|
| 172 | int nelMin, nelMax;
|
|---|
| 173 | int ncMin, ncMax, ncSum;
|
|---|
| 174 | int nsMin, nsMax, nsSum;
|
|---|
| 175 | int nssMin, nssMax, nssSum;
|
|---|
| 176 |
|
|---|
| 177 | struct gs_data *gsh;
|
|---|
| 178 | int b;
|
|---|
| 179 |
|
|---|
| 180 | int numPoints;
|
|---|
| 181 | long long *data;
|
|---|
| 182 |
|
|---|
| 183 | comm_init(&comm,ce);
|
|---|
| 184 | np = comm.np;
|
|---|
| 185 | id = comm.id;
|
|---|
| 186 |
|
|---|
| 187 | if (np == 1) return;
|
|---|
| 188 |
|
|---|
| 189 | numPoints = nel*nv;
|
|---|
| 190 | data = (long long*) malloc(numPoints*sizeof(long long));
|
|---|
| 191 | for(i = 0; i < numPoints; i++) data[i] = vtx[i];
|
|---|
| 192 |
|
|---|
| 193 | gsh = gs_setup(data, numPoints, &comm, 0, gs_pairwise, 0);
|
|---|
| 194 |
|
|---|
| 195 | pw_data_nmsg(gsh, &Nmsg);
|
|---|
| 196 | Ncomm = (int *) malloc(Nmsg*sizeof(int));
|
|---|
| 197 | pw_data_size(gsh, Ncomm);
|
|---|
| 198 |
|
|---|
| 199 | gs_free(gsh);
|
|---|
| 200 | free(data);
|
|---|
| 201 |
|
|---|
| 202 | ncMax = Nmsg;
|
|---|
| 203 | ncMin = Nmsg;
|
|---|
| 204 | ncSum = Nmsg;
|
|---|
| 205 | comm_allreduce(&comm, gs_int, gs_max, &ncMax , 1, &b);
|
|---|
| 206 | comm_allreduce(&comm, gs_int, gs_min, &ncMin , 1, &b);
|
|---|
| 207 | comm_allreduce(&comm, gs_int, gs_add, &ncSum , 1, &b);
|
|---|
| 208 |
|
|---|
| 209 | nsMax = Ncomm[0];
|
|---|
| 210 | nsMin = Ncomm[0];
|
|---|
| 211 | nsSum = Ncomm[0];
|
|---|
| 212 | for (i=1; i<Nmsg; ++i){
|
|---|
| 213 | nsMax = Ncomm[i] > Ncomm[i-1] ? Ncomm[i] : Ncomm[i-1];
|
|---|
| 214 | nsMin = Ncomm[i] < Ncomm[i-1] ? Ncomm[i] : Ncomm[i-1];
|
|---|
| 215 | nsSum += Ncomm[i];
|
|---|
| 216 | }
|
|---|
| 217 | comm_allreduce(&comm, gs_int, gs_max, &nsMax , 1, &b);
|
|---|
| 218 | comm_allreduce(&comm, gs_int, gs_min, &nsMin , 1, &b);
|
|---|
| 219 |
|
|---|
| 220 | nssMin = nsSum;
|
|---|
| 221 | nssMax = nsSum;
|
|---|
| 222 | nssSum = nsSum;
|
|---|
| 223 | comm_allreduce(&comm, gs_int, gs_max, &nssMax , 1, &b);
|
|---|
| 224 | comm_allreduce(&comm, gs_int, gs_min, &nssMin , 1, &b);
|
|---|
| 225 | comm_allreduce(&comm, gs_int, gs_add, &nssSum , 1, &b);
|
|---|
| 226 |
|
|---|
| 227 | nsSum = nsSum/Nmsg;
|
|---|
| 228 | comm_allreduce(&comm, gs_int, gs_add, &nsSum , 1, &b);
|
|---|
| 229 |
|
|---|
| 230 | nelMax = nel;
|
|---|
| 231 | nelMin = nel;
|
|---|
| 232 | comm_allreduce(&comm, gs_int, gs_max, &nelMax, 1, &b);
|
|---|
| 233 | comm_allreduce(&comm, gs_int, gs_min, &nelMin, 1, &b);
|
|---|
| 234 |
|
|---|
| 235 | if (id == 0) {
|
|---|
| 236 | printf(
|
|---|
| 237 | " nElements max/min/bal: %d %d %.2f\n",
|
|---|
| 238 | nelMax, nelMin, (double)nelMax/nelMin);
|
|---|
| 239 | printf(
|
|---|
| 240 | " nMessages max/min/avg: %d %d %.2f\n",
|
|---|
| 241 | ncMax, ncMin, (double)ncSum/np);
|
|---|
| 242 | printf(
|
|---|
| 243 | " msgSize max/min/avg: %d %d %.2f\n",
|
|---|
| 244 | nsMax, nsMin, (double)nsSum/np);
|
|---|
| 245 | printf(
|
|---|
| 246 | " msgSizeSum max/min/avg: %d %d %.2f\n",
|
|---|
| 247 | nssMax, nssMin, (double)nssSum/np);
|
|---|
| 248 | fflush(stdout);
|
|---|
| 249 | }
|
|---|
| 250 |
|
|---|
| 251 | comm_free(&comm);
|
|---|
| 252 | }
|
|---|
| 253 |
|
|---|
| 254 | int redistributeData(int *nel_,long long *vl,long long *el,
|
|---|
| 255 | int *part,int *seq,int nv,int lelt,struct comm *comm)
|
|---|
| 256 | {
|
|---|
| 257 | int nel=*nel_;
|
|---|
| 258 |
|
|---|
| 259 | struct crystal cr;
|
|---|
| 260 | struct array eList;
|
|---|
| 261 | edata *data;
|
|---|
| 262 |
|
|---|
| 263 | int count,e,n,ibuf;
|
|---|
| 264 |
|
|---|
| 265 | /* redistribute data */
|
|---|
| 266 | array_init(edata, &eList, nel), eList.n = nel;
|
|---|
| 267 | for(data = eList.ptr, e = 0; e < nel; ++e) {
|
|---|
| 268 | data[e].proc= part[e];
|
|---|
| 269 | data[e].eid = el[e];
|
|---|
| 270 | for(n = 0; n < nv; ++n) {
|
|---|
| 271 | data[e].vtx[n] = vl[e*nv + n];
|
|---|
| 272 | }
|
|---|
| 273 | }
|
|---|
| 274 |
|
|---|
| 275 | if(seq!=NULL){
|
|---|
| 276 | for(data=eList.ptr, e=0; e<nel; ++e)
|
|---|
| 277 | data[e].seq=seq[e];
|
|---|
| 278 | }
|
|---|
| 279 |
|
|---|
| 280 | crystal_init(&cr,comm);
|
|---|
| 281 | sarray_transfer(edata, &eList, proc, 0, &cr);
|
|---|
| 282 | crystal_free(&cr);
|
|---|
| 283 |
|
|---|
| 284 | *nel_=nel=eList.n;
|
|---|
| 285 |
|
|---|
| 286 | count = 0;
|
|---|
| 287 | if (nel > lelt) count = 1;
|
|---|
| 288 | comm_allreduce(comm, gs_int, gs_add, &count, 1, &ibuf);
|
|---|
| 289 | if (count > 0) {
|
|---|
| 290 | if (comm->id == 0)
|
|---|
| 291 | printf("ERROR: resulting parition requires lelt=%d!\n", nel);
|
|---|
| 292 | return 1;
|
|---|
| 293 | }
|
|---|
| 294 |
|
|---|
| 295 | //TODO: sort by seq
|
|---|
| 296 | if(seq!=NULL){
|
|---|
| 297 | buffer bfr; buffer_init(&bfr,1024);
|
|---|
| 298 | sarray_sort(edata,eList.ptr,eList.n,seq,0,&bfr);
|
|---|
| 299 | buffer_free(&bfr);
|
|---|
| 300 | }
|
|---|
| 301 |
|
|---|
| 302 | for(data = eList.ptr, e = 0; e < nel; ++e) {
|
|---|
| 303 | el[e] = data[e].eid;
|
|---|
| 304 | for(n = 0; n < nv; ++n) {
|
|---|
| 305 | vl[e*nv + n] = data[e].vtx[n];
|
|---|
| 306 | }
|
|---|
| 307 | }
|
|---|
| 308 |
|
|---|
| 309 | array_free(&eList);
|
|---|
| 310 |
|
|---|
| 311 | return 0;
|
|---|
| 312 | }
|
|---|
| 313 |
|
|---|
| 314 | #define fpartMesh FORTRAN_UNPREFIXED(fpartmesh,FPARTMESH)
|
|---|
| 315 | void fpartMesh(long long *el, long long *vl, double *xyz,
|
|---|
| 316 | const int *lelt, int *nell, const int *nve,
|
|---|
| 317 | int *fcomm, int *fmode,int *rtval)
|
|---|
| 318 | {
|
|---|
| 319 | struct comm comm;
|
|---|
| 320 |
|
|---|
| 321 | int nel, nv, mode;
|
|---|
| 322 | int e, n;
|
|---|
| 323 | int count, ierr, ibuf;
|
|---|
| 324 | int *part,*seq;
|
|---|
| 325 | int opt[3];
|
|---|
| 326 |
|
|---|
| 327 | nel = *nell;
|
|---|
| 328 | nv = *nve;
|
|---|
| 329 | mode = *fmode;
|
|---|
| 330 |
|
|---|
| 331 | #if defined(MPI)
|
|---|
| 332 | comm_ext cext = MPI_Comm_f2c(*fcomm);
|
|---|
| 333 | #else
|
|---|
| 334 | comm_ext cext = 0;
|
|---|
| 335 | #endif
|
|---|
| 336 | comm_init(&comm, cext);
|
|---|
| 337 |
|
|---|
| 338 | part = (int*) malloc(*lelt * sizeof(int));
|
|---|
| 339 |
|
|---|
| 340 | ierr = 1;
|
|---|
| 341 | #if defined(PARRSB)
|
|---|
| 342 | int rsb,rcb;
|
|---|
| 343 | rsb=mode&1;
|
|---|
| 344 | rcb=mode&2;
|
|---|
| 345 |
|
|---|
| 346 | opt[0] = 1;
|
|---|
| 347 | opt[1] = 1; /* verbosity */
|
|---|
| 348 | opt[2] = 0;
|
|---|
| 349 |
|
|---|
| 350 | if(rcb){
|
|---|
| 351 | ierr = parRCB_partMesh(part,NULL,xyz,nel,nv,opt,comm.c);
|
|---|
| 352 | if (ierr != 0) goto err;
|
|---|
| 353 |
|
|---|
| 354 | ierr=redistributeData(&nel,vl,el,part,NULL,nv,*lelt,&comm);
|
|---|
| 355 | if (ierr != 0) goto err;
|
|---|
| 356 | }
|
|---|
| 357 |
|
|---|
| 358 | if(rsb){
|
|---|
| 359 | ierr = parRSB_partMesh(part,vl,nel,nv,opt,comm.c);
|
|---|
| 360 | if (ierr != 0) goto err;
|
|---|
| 361 |
|
|---|
| 362 | ierr=redistributeData(&nel,vl,el,part,NULL,nv,*lelt,&comm);
|
|---|
| 363 | if (ierr != 0) goto err;
|
|---|
| 364 | }
|
|---|
| 365 | #elif defined(PARMETIS)
|
|---|
| 366 | int metis; metis=mode&4;
|
|---|
| 367 |
|
|---|
| 368 | if(metis){
|
|---|
| 369 | opt[0] = 1;
|
|---|
| 370 | opt[1] = 0; /* verbosity */
|
|---|
| 371 | opt[2] = comm.np;
|
|---|
| 372 |
|
|---|
| 373 | ierr = parMETIS_partMesh(part,vl,nel,nv,opt,comm.c);
|
|---|
| 374 |
|
|---|
| 375 | ierr=redistributeData(&nel,vl,el,part,NULL,nv,*lelt,&comm);
|
|---|
| 376 | if (ierr != 0) goto err;
|
|---|
| 377 |
|
|---|
| 378 | /* printPartStat(vl, nel, nv, cext); */
|
|---|
| 379 | }
|
|---|
| 380 | #endif
|
|---|
| 381 |
|
|---|
| 382 | free(part);
|
|---|
| 383 |
|
|---|
| 384 | *nell = nel;
|
|---|
| 385 | *rtval = 0;
|
|---|
| 386 | return;
|
|---|
| 387 |
|
|---|
| 388 | err:
|
|---|
| 389 | fflush(stdout);
|
|---|
| 390 | *rtval = 1;
|
|---|
| 391 | }
|
|---|
| 392 |
|
|---|
| 393 | #define fprintPartStat FORTRAN_UNPREFIXED(printpartstat,PRINTPARTSTAT)
|
|---|
| 394 | void fprintPartStat(long long *vtx, int *nel, int *nv, int *comm)
|
|---|
| 395 | {
|
|---|
| 396 |
|
|---|
| 397 | #if defined(MPI)
|
|---|
| 398 | comm_ext c = MPI_Comm_f2c(*comm);
|
|---|
| 399 | #else
|
|---|
| 400 | comm_ext c = 0;
|
|---|
| 401 | #endif
|
|---|
| 402 |
|
|---|
| 403 | printPartStat(vtx, *nel, *nv, c);
|
|---|
| 404 | }
|
|---|