| 1 | /*
|
|---|
| 2 | A simple 2D hydro code
|
|---|
| 3 | (C) Romain Teyssier : CEA/IRFU -- original F90 code
|
|---|
| 4 | (C) Pierre-Francois Lavallee : IDRIS -- original F90 code
|
|---|
| 5 | (C) Guillaume Colin de Verdiere : CEA/DAM -- for the C version
|
|---|
| 6 | (C) Adele Villiermet : CINES -- for FTI integration
|
|---|
| 7 | */
|
|---|
| 8 | /*
|
|---|
| 9 |
|
|---|
| 10 | This software is governed by the CeCILL license under French law and
|
|---|
| 11 | abiding by the rules of distribution of free software. You can use,
|
|---|
| 12 | modify and/ or redistribute the software under the terms of the CeCILL
|
|---|
| 13 | license as circulated by CEA, CNRS and INRIA at the following URL
|
|---|
| 14 | "http://www.cecill.info".
|
|---|
| 15 |
|
|---|
| 16 | As a counterpart to the access to the source code and rights to copy,
|
|---|
| 17 | modify and redistribute granted by the license, users are provided only
|
|---|
| 18 | with a limited warranty and the software's author, the holder of the
|
|---|
| 19 | economic rights, and the successive licensors have only limited
|
|---|
| 20 | liability.
|
|---|
| 21 |
|
|---|
| 22 | In this respect, the user's attention is drawn to the risks associated
|
|---|
| 23 | with loading, using, modifying and/or developing or reproducing the
|
|---|
| 24 | software by the user in light of its specific status of free software,
|
|---|
| 25 | that may mean that it is complicated to manipulate, and that also
|
|---|
| 26 | therefore means that it is reserved for developers and experienced
|
|---|
| 27 | professionals having in-depth computer knowledge. Users are therefore
|
|---|
| 28 | encouraged to load and test the software's suitability as regards their
|
|---|
| 29 | requirements in conditions enabling the security of their systems and/or
|
|---|
| 30 | data to be ensured and, more generally, to use and operate it in the
|
|---|
| 31 | same conditions as regards security.
|
|---|
| 32 |
|
|---|
| 33 | The fact that you are presently reading this means that you have had
|
|---|
| 34 | knowledge of the CeCILL license and that you accept its terms.
|
|---|
| 35 |
|
|---|
| 36 | */
|
|---|
| 37 | #ifdef MPI
|
|---|
| 38 | #include <mpi.h>
|
|---|
| 39 | #if FTI>0
|
|---|
| 40 | #include <fti.h>
|
|---|
| 41 | #endif
|
|---|
| 42 | #endif
|
|---|
| 43 | #include <unistd.h>
|
|---|
| 44 | #include <stdio.h>
|
|---|
| 45 | #include <time.h>
|
|---|
| 46 | #include <string.h>
|
|---|
| 47 | #ifdef _OPENMP
|
|---|
| 48 | #include <omp.h>
|
|---|
| 49 | #endif
|
|---|
| 50 |
|
|---|
| 51 | #include "parametres.h"
|
|---|
| 52 | #include "hydro_funcs.h"
|
|---|
| 53 | #include "vtkfile.h"
|
|---|
| 54 | #include "compute_deltat.h"
|
|---|
| 55 | #include "hydro_godunov.h"
|
|---|
| 56 | #include "perfcnt.h"
|
|---|
| 57 | #include "cclock.h"
|
|---|
| 58 | #include "utils.h"
|
|---|
| 59 |
|
|---|
| 60 | hydroparam_t H;
|
|---|
| 61 | hydrovar_t Hv; // nvar
|
|---|
| 62 | //for compute_delta
|
|---|
| 63 | hydrovarwork_t Hvw_deltat; // nvar
|
|---|
| 64 | hydrowork_t Hw_deltat;
|
|---|
| 65 | hydrovarwork_t Hvw_godunov; // nvar
|
|---|
| 66 | hydrowork_t Hw_godunov;
|
|---|
| 67 | double functim[TIM_END];
|
|---|
| 68 |
|
|---|
| 69 | int sizeLabel(double *tim, const int N) {
|
|---|
| 70 | double maxi = 0;
|
|---|
| 71 | int i;
|
|---|
| 72 |
|
|---|
| 73 | for (i = 0; i < N; i++)
|
|---|
| 74 | if (maxi < tim[i]) maxi = tim[i];
|
|---|
| 75 |
|
|---|
| 76 | // if (maxi < 100) return 8;
|
|---|
| 77 | // if (maxi < 1000) return 9;
|
|---|
| 78 | // if (maxi < 10000) return 10;
|
|---|
| 79 | return 9;
|
|---|
| 80 | }
|
|---|
| 81 | void percentTimings(double *tim, const int N)
|
|---|
| 82 | {
|
|---|
| 83 | double sum = 0;
|
|---|
| 84 | int i;
|
|---|
| 85 |
|
|---|
| 86 | for (i = 0; i < N; i++)
|
|---|
| 87 | sum += tim[i];
|
|---|
| 88 |
|
|---|
| 89 | for (i = 0; i < N; i++)
|
|---|
| 90 | tim[i] = 100.0 * tim[i] / sum;
|
|---|
| 91 | }
|
|---|
| 92 |
|
|---|
| 93 | void avgTimings(double *tim, const int N, const int nbr)
|
|---|
| 94 | {
|
|---|
| 95 | int i;
|
|---|
| 96 |
|
|---|
| 97 | for (i = 0; i < N; i++)
|
|---|
| 98 | tim[i] = tim[i] / nbr;
|
|---|
| 99 | }
|
|---|
| 100 |
|
|---|
| 101 | void printTimings(double *tim, const int N, const int sizeFmt)
|
|---|
| 102 | {
|
|---|
| 103 | double sum = 0;
|
|---|
| 104 | int i;
|
|---|
| 105 | char fmt[256];
|
|---|
| 106 |
|
|---|
| 107 | sprintf(fmt, "%%-%d.4lf ", sizeFmt);
|
|---|
| 108 |
|
|---|
| 109 | for (i = 0; i < N; i++)
|
|---|
| 110 | fprintf(stdout, fmt, tim[i]);
|
|---|
| 111 | }
|
|---|
| 112 | void printTimingsLabel(const int N, const int fmtSize)
|
|---|
| 113 | {
|
|---|
| 114 | int i;
|
|---|
| 115 | char *txt;
|
|---|
| 116 | char fmt[256];
|
|---|
| 117 |
|
|---|
| 118 | sprintf(fmt, "%%-%ds ", fmtSize);
|
|---|
| 119 | for (i = 0; i < N; i++) {
|
|---|
| 120 | switch(i) {
|
|---|
| 121 | case TIM_COMPDT: txt = "COMPDT"; break;
|
|---|
| 122 | case TIM_MAKBOU: txt = "MAKBOU"; break;
|
|---|
| 123 | case TIM_GATCON: txt = "GATCON"; break;
|
|---|
| 124 | case TIM_CONPRI: txt = "CONPRI"; break;
|
|---|
| 125 | case TIM_EOS: txt = "EOS"; break;
|
|---|
| 126 | case TIM_SLOPE: txt = "SLOPE"; break;
|
|---|
| 127 | case TIM_TRACE: txt = "TRACE"; break;
|
|---|
| 128 | case TIM_QLEFTR: txt = "QLEFTR"; break;
|
|---|
| 129 | case TIM_RIEMAN: txt = "RIEMAN"; break;
|
|---|
| 130 | case TIM_CMPFLX: txt = "CMPFLX"; break;
|
|---|
| 131 | case TIM_UPDCON: txt = "UPDCON"; break;
|
|---|
| 132 | case TIM_ALLRED: txt = "ALLRED"; break;
|
|---|
| 133 | default:;
|
|---|
| 134 | }
|
|---|
| 135 | fprintf(stdout, fmt, txt);
|
|---|
| 136 | }
|
|---|
| 137 | }
|
|---|
| 138 |
|
|---|
| 139 | int
|
|---|
| 140 | main(int argc, char **argv) {
|
|---|
| 141 | char myhost[256];
|
|---|
| 142 | real_t dt = 0;
|
|---|
| 143 | int nvtk = 0;
|
|---|
| 144 | char outnum[80];
|
|---|
| 145 | int time_output = 0;
|
|---|
| 146 | long flops = 0;
|
|---|
| 147 |
|
|---|
| 148 | // real_t output_time = 0.0;
|
|---|
| 149 | real_t next_output_time = 0;
|
|---|
| 150 | double start_time = 0, end_time = 0;
|
|---|
| 151 | double start_iter = 0, end_iter = 0;
|
|---|
| 152 | double elaps = 0;
|
|---|
| 153 | struct timespec start, end;
|
|---|
| 154 | double cellPerCycle = 0;
|
|---|
| 155 | double avgCellPerCycle = 0;
|
|---|
| 156 | long nbCycle = 0;
|
|---|
| 157 |
|
|---|
| 158 | // array of timers to profile the code
|
|---|
| 159 | memset(functim, 0, TIM_END * sizeof(functim[0]));
|
|---|
| 160 |
|
|---|
| 161 | #ifdef MPI
|
|---|
| 162 | MPI_Init(&argc, &argv);
|
|---|
| 163 | #endif
|
|---|
| 164 |
|
|---|
| 165 | process_args(argc, argv, &H);
|
|---|
| 166 | hydro_init(&H, &Hv);
|
|---|
| 167 |
|
|---|
| 168 | if (H.mype == 0)
|
|---|
| 169 | fprintf(stdout, "Hydro starts in %s precision.\n", ((sizeof(real_t) == sizeof(double))? "double": "single"));
|
|---|
| 170 | //gethostname(myhost, 255);
|
|---|
| 171 | if (H.mype == 0) {
|
|---|
| 172 | fprintf(stdout, "Hydro: Main process running on %s\n", myhost);
|
|---|
| 173 | }
|
|---|
| 174 |
|
|---|
| 175 | #ifdef _OPENMP
|
|---|
| 176 | if (H.mype == 0) {
|
|---|
| 177 | fprintf(stdout, "Hydro: OpenMP mode ON\n");
|
|---|
| 178 | fprintf(stdout, "Hydro: OpenMP %d max threads\n", omp_get_max_threads());
|
|---|
| 179 | fprintf(stdout, "Hydro: OpenMP %d num threads\n", omp_get_num_threads());
|
|---|
| 180 | fprintf(stdout, "Hydro: OpenMP %d num procs\n", omp_get_num_procs());
|
|---|
| 181 | }
|
|---|
| 182 | #endif
|
|---|
| 183 | #ifdef MPI
|
|---|
| 184 | if (H.mype == 0) {
|
|---|
| 185 | fprintf(stdout, "Hydro: MPI run with %d procs\n", H.nproc);
|
|---|
| 186 | }
|
|---|
| 187 | #else
|
|---|
| 188 | fprintf(stdout, "Hydro: standard build\n");
|
|---|
| 189 | #endif
|
|---|
| 190 |
|
|---|
| 191 |
|
|---|
| 192 | // PRINTUOLD(H, &Hv);
|
|---|
| 193 | #ifdef MPI
|
|---|
| 194 | if (H.nproc > 1)
|
|---|
| 195 | #if FTI>0
|
|---|
| 196 | MPI_Barrier(FTI_COMM_WORLD);
|
|---|
| 197 | #endif
|
|---|
| 198 | #if FTI==0
|
|---|
| 199 | MPI_Barrier(MPI_COMM_WORLD);
|
|---|
| 200 | #endif
|
|---|
| 201 | #endif
|
|---|
| 202 |
|
|---|
| 203 | if (H.dtoutput > 0) {
|
|---|
| 204 | // outputs are in physical time not in time steps
|
|---|
| 205 | time_output = 1;
|
|---|
| 206 | next_output_time = next_output_time + H.dtoutput;
|
|---|
| 207 | }
|
|---|
| 208 |
|
|---|
| 209 | if (H.dtoutput > 0 || H.noutput > 0)
|
|---|
| 210 | vtkfile(++nvtk, H, &Hv);
|
|---|
| 211 |
|
|---|
| 212 | if (H.mype == 0)
|
|---|
| 213 | fprintf(stdout, "Hydro starts main loop.\n");
|
|---|
| 214 |
|
|---|
| 215 | //pre-allocate memory before entering in loop
|
|---|
| 216 | //For godunov scheme
|
|---|
| 217 | start = cclock();
|
|---|
| 218 | start = cclock();
|
|---|
| 219 | allocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov);
|
|---|
| 220 | compute_deltat_init_mem(H, &Hw_deltat, &Hvw_deltat);
|
|---|
| 221 | end = cclock();
|
|---|
| 222 | #ifdef MPI
|
|---|
| 223 | #if FTI==1
|
|---|
| 224 | FTI_Protect(0,functim, TIM_END,FTI_DBLE);
|
|---|
| 225 | FTI_Protect(1,&nvtk,1,FTI_INTG);
|
|---|
| 226 | FTI_Protect(2,&next_output_time,1,FTI_DBLE);
|
|---|
| 227 | FTI_Protect(3,&dt,1,FTI_DBLE);
|
|---|
| 228 | FTI_Protect(4,&MflopsSUM,1,FTI_DBLE);
|
|---|
| 229 | FTI_Protect(5,&nbFLOPS,1,FTI_LONG);
|
|---|
| 230 | FTI_Protect(6,&(H.nstep),1,FTI_INTG);
|
|---|
| 231 | FTI_Protect(7,&(H.t),1,FTI_DBLE);
|
|---|
| 232 | FTI_Protect(8,Hv.uold,H.nvar * H.nxt * H.nyt,FTI_DBLE);
|
|---|
| 233 | #endif
|
|---|
| 234 | #endif
|
|---|
| 235 | if (H.mype == 0) fprintf(stdout, "Hydro: init mem %lfs\n", ccelaps(start, end));
|
|---|
| 236 | // we start timings here to avoid the cost of initial memory allocation
|
|---|
| 237 | start_time = dcclock();
|
|---|
| 238 |
|
|---|
| 239 | while ((H.t < H.tend) && (H.nstep < H.nstepmax)) {
|
|---|
| 240 | //system("top -b -n1");
|
|---|
| 241 | // reset perf counter for this iteration
|
|---|
| 242 | flopsAri = flopsSqr = flopsMin = flopsTra = 0;
|
|---|
| 243 | start_iter = dcclock();
|
|---|
| 244 | outnum[0] = 0;
|
|---|
| 245 | if ((H.nstep % 2) == 0) {
|
|---|
| 246 | dt = 0;
|
|---|
| 247 | // if (H.mype == 0) fprintf(stdout, "Hydro computes deltat.\n");
|
|---|
| 248 | start = cclock();
|
|---|
| 249 | compute_deltat(&dt, H, &Hw_deltat, &Hv, &Hvw_deltat);
|
|---|
| 250 | end = cclock();
|
|---|
| 251 | functim[TIM_COMPDT] += ccelaps(start, end);
|
|---|
| 252 | if (H.nstep == 0) {
|
|---|
| 253 | dt = dt / 2.0;
|
|---|
| 254 | if (H.mype == 0) fprintf(stdout, "Hydro computes initial deltat: %le\n", dt);
|
|---|
| 255 | }
|
|---|
| 256 | #ifdef MPI
|
|---|
| 257 | if (H.nproc > 1) {
|
|---|
| 258 | real_t dtmin;
|
|---|
| 259 | // printf("pe=%4d\tdt=%lg\n",H.mype, dt);
|
|---|
| 260 | #if FTI==0
|
|---|
| 261 | if (sizeof(real_t) == sizeof(double)) {
|
|---|
| 262 | MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
|---|
| 263 | } else {
|
|---|
| 264 | MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);
|
|---|
| 265 | }
|
|---|
| 266 | #endif
|
|---|
| 267 | #if FTI>0
|
|---|
| 268 | if (sizeof(real_t) == sizeof(double)) {
|
|---|
| 269 | MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD);
|
|---|
| 270 | } else {
|
|---|
| 271 | MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, FTI_COMM_WORLD);
|
|---|
| 272 | }
|
|---|
| 273 | #endif
|
|---|
| 274 | dt = dtmin;
|
|---|
| 275 | }
|
|---|
| 276 | #endif
|
|---|
| 277 | }
|
|---|
| 278 | // dt = 1.e-3;
|
|---|
| 279 | // if (H.mype == 1) fprintf(stdout, "Hydro starts godunov.\n");
|
|---|
| 280 | if ((H.nstep % 2) == 0) {
|
|---|
| 281 | hydro_godunov(1, dt, H, &Hv, &Hw_godunov, &Hvw_godunov);
|
|---|
| 282 | // hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw);
|
|---|
| 283 | } else {
|
|---|
| 284 | hydro_godunov(2, dt, H, &Hv, &Hw_godunov, &Hvw_godunov);
|
|---|
| 285 | // hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw);
|
|---|
| 286 | }
|
|---|
| 287 | end_iter = dcclock();
|
|---|
| 288 | cellPerCycle = (double) (H.globnx * H.globny) / (end_iter - start_iter) / 1000000.0L;
|
|---|
| 289 | avgCellPerCycle += cellPerCycle;
|
|---|
| 290 | nbCycle++;
|
|---|
| 291 |
|
|---|
| 292 | H.nstep++;
|
|---|
| 293 | H.t += dt;
|
|---|
| 294 | {
|
|---|
| 295 | real_t iter_time = (real_t) (end_iter - start_iter);
|
|---|
| 296 | #ifdef MPI
|
|---|
| 297 | long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t;
|
|---|
| 298 | start = cclock();
|
|---|
| 299 | #if FTI==0
|
|---|
| 300 | MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
|
|---|
| 301 | MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
|
|---|
| 302 | MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
|
|---|
| 303 | MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
|
|---|
| 304 | #endif
|
|---|
| 305 | #if FTI>0
|
|---|
| 306 | MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
|
|---|
| 307 | MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
|
|---|
| 308 | MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
|
|---|
| 309 | MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
|
|---|
| 310 | #endif
|
|---|
| 311 | // if (H.mype == 1)
|
|---|
| 312 | // printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t);
|
|---|
| 313 | flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA;
|
|---|
| 314 | end = cclock();
|
|---|
| 315 | functim[TIM_ALLRED] += ccelaps(start, end);
|
|---|
| 316 | #else
|
|---|
| 317 | flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA;
|
|---|
| 318 | #endif
|
|---|
| 319 | nbFLOPS++;
|
|---|
| 320 |
|
|---|
| 321 | if (flops > 0) {
|
|---|
| 322 | if (iter_time > 1.e-9) {
|
|---|
| 323 | double mflops = (double) flops / (double) 1.e+6 / iter_time;
|
|---|
| 324 | MflopsSUM += mflops;
|
|---|
| 325 | sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time);
|
|---|
| 326 | }
|
|---|
| 327 | } else {
|
|---|
| 328 | sprintf(outnum, "%s (%.3fs)", outnum, iter_time);
|
|---|
| 329 | }
|
|---|
| 330 | }
|
|---|
| 331 | if (time_output == 0 && H.noutput > 0) {
|
|---|
| 332 | if ((H.nstep % H.noutput) == 0) {
|
|---|
| 333 | vtkfile(++nvtk, H, &Hv);
|
|---|
| 334 | sprintf(outnum, "%s [%04d]", outnum, nvtk);
|
|---|
| 335 | }
|
|---|
| 336 | } else {
|
|---|
| 337 | if (time_output == 1 && H.t >= next_output_time) {
|
|---|
| 338 | vtkfile(++nvtk, H, &Hv);
|
|---|
| 339 | next_output_time = next_output_time + H.dtoutput;
|
|---|
| 340 | sprintf(outnum, "%s [%04d]", outnum, nvtk);
|
|---|
| 341 | }
|
|---|
| 342 | }
|
|---|
| 343 | if (H.mype == 0) {
|
|---|
| 344 | fprintf(stdout, "--> step=%4d, %12.5e, %10.5e %.3lf MC/s%s\n", H.nstep, H.t, dt, cellPerCycle, outnum);
|
|---|
| 345 | fflush(stdout);
|
|---|
| 346 | }
|
|---|
| 347 | #ifdef MPI
|
|---|
| 348 | #if FTI==1
|
|---|
| 349 | FTI_Snapshot();
|
|---|
| 350 | #endif
|
|---|
| 351 | #endif
|
|---|
| 352 | } // while
|
|---|
| 353 | end_time = dcclock();
|
|---|
| 354 |
|
|---|
| 355 | // Deallocate work spaces
|
|---|
| 356 | deallocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov);
|
|---|
| 357 | compute_deltat_clean_mem(H, &Hw_deltat, &Hvw_deltat);
|
|---|
| 358 |
|
|---|
| 359 | hydro_finish(H, &Hv);
|
|---|
| 360 | elaps = (double) (end_time - start_time);
|
|---|
| 361 | timeToString(outnum, elaps);
|
|---|
| 362 | if (H.mype == 0) {
|
|---|
| 363 | fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS));
|
|---|
| 364 | fprintf(stdout, " ");
|
|---|
| 365 | }
|
|---|
| 366 | if (H.nproc == 1) {
|
|---|
| 367 | int sizeFmt = sizeLabel(functim, TIM_END);
|
|---|
| 368 | printTimingsLabel(TIM_END, sizeFmt);
|
|---|
| 369 | fprintf(stdout, "\n");
|
|---|
| 370 | if (sizeof(real_t) == sizeof(double)) {
|
|---|
| 371 | fprintf(stdout, "PE0_DP ");
|
|---|
| 372 | } else {
|
|---|
| 373 | fprintf(stdout, "PE0_SP ");
|
|---|
| 374 | }
|
|---|
| 375 | printTimings(functim, TIM_END, sizeFmt);
|
|---|
| 376 | fprintf(stdout, "\n");
|
|---|
| 377 | fprintf(stdout, "%% ");
|
|---|
| 378 | percentTimings(functim, TIM_END);
|
|---|
| 379 | printTimings(functim, TIM_END, sizeFmt);
|
|---|
| 380 | fprintf(stdout, "\n");
|
|---|
| 381 | }
|
|---|
| 382 | #ifdef MPI
|
|---|
| 383 | if (H.nproc > 1) {
|
|---|
| 384 | double timMAX[TIM_END];
|
|---|
| 385 | double timMIN[TIM_END];
|
|---|
| 386 | double timSUM[TIM_END];
|
|---|
| 387 | #if FTI==0
|
|---|
| 388 | MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
|---|
| 389 | MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
|---|
| 390 | MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|---|
| 391 | #endif
|
|---|
| 392 | #if FTI>0
|
|---|
| 393 | MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD);
|
|---|
| 394 | MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD);
|
|---|
| 395 | MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, FTI_COMM_WORLD);
|
|---|
| 396 | #endif
|
|---|
| 397 | if (H.mype == 0) {
|
|---|
| 398 | int sizeFmt = sizeLabel(timMAX, TIM_END);
|
|---|
| 399 | printTimingsLabel(TIM_END, sizeFmt);
|
|---|
| 400 | fprintf(stdout, "\n");
|
|---|
| 401 | fprintf(stdout, "MIN ");
|
|---|
| 402 | printTimings(timMIN, TIM_END, sizeFmt);
|
|---|
| 403 | fprintf(stdout, "\n");
|
|---|
| 404 | fprintf(stdout, "MAX ");
|
|---|
| 405 | printTimings(timMAX, TIM_END, sizeFmt);
|
|---|
| 406 | fprintf(stdout, "\n");
|
|---|
| 407 | fprintf(stdout, "AVG ");
|
|---|
| 408 | avgTimings(timSUM, TIM_END, H.nproc);
|
|---|
| 409 | printTimings(timSUM, TIM_END, sizeFmt);
|
|---|
| 410 | fprintf(stdout, "\n");
|
|---|
| 411 | }
|
|---|
| 412 | }
|
|---|
| 413 | #endif
|
|---|
| 414 | if (H.mype == 0) {
|
|---|
| 415 | fprintf(stdout, "Average MC/s: %.3lf\n", (double)(avgCellPerCycle / nbCycle));
|
|---|
| 416 | }
|
|---|
| 417 |
|
|---|
| 418 | #ifdef MPI
|
|---|
| 419 | #if FTI>0
|
|---|
| 420 | FTI_Finalize();
|
|---|
| 421 | #endif
|
|---|
| 422 | MPI_Finalize();
|
|---|
| 423 | #endif
|
|---|
| 424 | return 0;
|
|---|
| 425 | }
|
|---|