| [788bdd2] | 1 | #include "XSbench_header.h"
|
|---|
| 2 |
|
|---|
| 3 | #ifdef MPI
|
|---|
| 4 | #include<mpi.h>
|
|---|
| 5 | #endif
|
|---|
| 6 |
|
|---|
| 7 | int main( int argc, char* argv[] )
|
|---|
| 8 | {
|
|---|
| 9 | // =====================================================================
|
|---|
| 10 | // Initialization & Command Line Read-In
|
|---|
| 11 | // =====================================================================
|
|---|
| 12 | int version = 13;
|
|---|
| 13 | int mype = 0;
|
|---|
| 14 | int max_procs = omp_get_num_procs();
|
|---|
| 15 | int i, thread, mat;
|
|---|
| 16 | unsigned long seed;
|
|---|
| 17 | double omp_start, omp_end, p_energy;
|
|---|
| 18 | unsigned long long vhash = 0;
|
|---|
| 19 | int nprocs;
|
|---|
| 20 |
|
|---|
| 21 | #ifdef MPI
|
|---|
| 22 | MPI_Status stat;
|
|---|
| 23 | MPI_Init(&argc, &argv);
|
|---|
| 24 | MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
|---|
| 25 | MPI_Comm_rank(MPI_COMM_WORLD, &mype);
|
|---|
| 26 | #endif
|
|---|
| 27 |
|
|---|
| 28 | // rand() is only used in the serial initialization stages.
|
|---|
| 29 | // A custom RNG is used in parallel portions.
|
|---|
| 30 | #ifdef VERIFICATION
|
|---|
| 31 | srand(26);
|
|---|
| 32 | #else
|
|---|
| 33 | srand(time(NULL));
|
|---|
| 34 | #endif
|
|---|
| 35 |
|
|---|
| 36 | // Process CLI Fields -- store in "Inputs" structure
|
|---|
| 37 | Inputs in = read_CLI( argc, argv );
|
|---|
| 38 |
|
|---|
| 39 | // Set number of OpenMP Threads
|
|---|
| 40 | omp_set_num_threads(in.nthreads);
|
|---|
| 41 |
|
|---|
| 42 | // Print-out of Input Summary
|
|---|
| 43 | if( mype == 0 )
|
|---|
| 44 | print_inputs( in, nprocs, version );
|
|---|
| 45 |
|
|---|
| 46 | // =====================================================================
|
|---|
| 47 | // Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data
|
|---|
| 48 | // =====================================================================
|
|---|
| 49 |
|
|---|
| 50 | // Allocate & fill energy grids
|
|---|
| 51 | #ifndef BINARY_READ
|
|---|
| 52 | if( mype == 0) printf("Generating Nuclide Energy Grids...\n");
|
|---|
| 53 | #endif
|
|---|
| 54 |
|
|---|
| 55 | NuclideGridPoint ** nuclide_grids = gpmatrix(in.n_isotopes,in.n_gridpoints);
|
|---|
| 56 |
|
|---|
| 57 | #ifdef VERIFICATION
|
|---|
| 58 | generate_grids_v( nuclide_grids, in.n_isotopes, in.n_gridpoints );
|
|---|
| 59 | #else
|
|---|
| 60 | generate_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );
|
|---|
| 61 | #endif
|
|---|
| 62 |
|
|---|
| 63 | // Sort grids by energy
|
|---|
| 64 | #ifndef BINARY_READ
|
|---|
| 65 | if( mype == 0) printf("Sorting Nuclide Energy Grids...\n");
|
|---|
| 66 | sort_nuclide_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );
|
|---|
| 67 | #endif
|
|---|
| 68 |
|
|---|
| 69 | // Prepare Unionized Energy Grid Framework
|
|---|
| 70 | #ifndef BINARY_READ
|
|---|
| 71 | GridPoint * energy_grid = generate_energy_grid( in.n_isotopes,
|
|---|
| 72 | in.n_gridpoints, nuclide_grids );
|
|---|
| 73 | #else
|
|---|
| 74 | GridPoint * energy_grid = (GridPoint *)malloc( in.n_isotopes *
|
|---|
| 75 | in.n_gridpoints * sizeof( GridPoint ) );
|
|---|
| 76 | int * index_data = (int *) malloc( in.n_isotopes * in.n_gridpoints
|
|---|
| 77 | * in.n_isotopes * sizeof(int));
|
|---|
| 78 | for( i = 0; i < in.n_isotopes*in.n_gridpoints; i++ )
|
|---|
| 79 | energy_grid[i].xs_ptrs = &index_data[i*in.n_isotopes];
|
|---|
| 80 | #endif
|
|---|
| 81 |
|
|---|
| 82 | // Double Indexing. Filling in energy_grid with pointers to the
|
|---|
| 83 | // nuclide_energy_grids.
|
|---|
| 84 | #ifndef BINARY_READ
|
|---|
| 85 | set_grid_ptrs( energy_grid, nuclide_grids, in.n_isotopes, in.n_gridpoints );
|
|---|
| 86 | #endif
|
|---|
| 87 |
|
|---|
| 88 | #ifdef BINARY_READ
|
|---|
| 89 | if( mype == 0 ) printf("Reading data from \"XS_data.dat\" file...\n");
|
|---|
| 90 | binary_read(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
|
|---|
| 91 | #endif
|
|---|
| 92 |
|
|---|
| 93 | // Get material data
|
|---|
| 94 | if( mype == 0 )
|
|---|
| 95 | printf("Loading Mats...\n");
|
|---|
| 96 | int *num_nucs = load_num_nucs(in.n_isotopes);
|
|---|
| 97 | int **mats = load_mats(num_nucs, in.n_isotopes);
|
|---|
| 98 |
|
|---|
| 99 | #ifdef VERIFICATION
|
|---|
| 100 | double **concs = load_concs_v(num_nucs);
|
|---|
| 101 | #else
|
|---|
| 102 | double **concs = load_concs(num_nucs);
|
|---|
| 103 | #endif
|
|---|
| 104 |
|
|---|
| 105 | #ifdef BINARY_DUMP
|
|---|
| 106 | if( mype == 0 ) printf("Dumping data to binary file...\n");
|
|---|
| 107 | binary_dump(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
|
|---|
| 108 | if( mype == 0 ) printf("Binary file \"XS_data.dat\" written! Exiting...\n");
|
|---|
| 109 | return 0;
|
|---|
| 110 | #endif
|
|---|
| 111 |
|
|---|
| 112 | // =====================================================================
|
|---|
| 113 | // Cross Section (XS) Parallel Lookup Simulation Begins
|
|---|
| 114 | // =====================================================================
|
|---|
| 115 |
|
|---|
| 116 | // Outer benchmark loop can loop through all possible # of threads
|
|---|
| 117 | #ifdef BENCHMARK
|
|---|
| 118 | for( int bench_n = 1; bench_n <=omp_get_num_procs(); bench_n++ )
|
|---|
| 119 | {
|
|---|
| 120 | in.nthreads = bench_n;
|
|---|
| 121 | omp_set_num_threads(in.nthreads);
|
|---|
| 122 | #endif
|
|---|
| 123 |
|
|---|
| 124 | if( mype == 0 )
|
|---|
| 125 | {
|
|---|
| 126 | printf("\n");
|
|---|
| 127 | border_print();
|
|---|
| 128 | center_print("SIMULATION", 79);
|
|---|
| 129 | border_print();
|
|---|
| 130 | }
|
|---|
| 131 |
|
|---|
| 132 | omp_start = omp_get_wtime();
|
|---|
| 133 |
|
|---|
| 134 | //initialize papi with one thread (master) here
|
|---|
| 135 | #ifdef PAPI
|
|---|
| 136 | if ( PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT){
|
|---|
| 137 | fprintf(stderr, "PAPI library init error!\n");
|
|---|
| 138 | exit(1);
|
|---|
| 139 | }
|
|---|
| 140 | #endif
|
|---|
| 141 |
|
|---|
| 142 | // OpenMP compiler directives - declaring variables as shared or private
|
|---|
| 143 | #pragma omp parallel default(none) \
|
|---|
| 144 | private(i, thread, p_energy, mat, seed) \
|
|---|
| 145 | shared( max_procs, in, energy_grid, nuclide_grids, \
|
|---|
| 146 | mats, concs, num_nucs, mype, vhash)
|
|---|
| 147 | {
|
|---|
| 148 | // Initialize parallel PAPI counters
|
|---|
| 149 | #ifdef PAPI
|
|---|
| 150 | int eventset = PAPI_NULL;
|
|---|
| 151 | int num_papi_events;
|
|---|
| 152 | #pragma omp critical
|
|---|
| 153 | {
|
|---|
| 154 | counter_init(&eventset, &num_papi_events);
|
|---|
| 155 | }
|
|---|
| 156 | #endif
|
|---|
| 157 |
|
|---|
| 158 | double macro_xs_vector[5];
|
|---|
| 159 | double * xs = (double *) calloc(5, sizeof(double));
|
|---|
| 160 |
|
|---|
| 161 | // Initialize RNG seeds for threads
|
|---|
| 162 | thread = omp_get_thread_num();
|
|---|
| 163 | seed = (thread+1)*19+17;
|
|---|
| 164 |
|
|---|
| 165 | // XS Lookup Loop
|
|---|
| 166 | #pragma omp for schedule(dynamic)
|
|---|
| 167 | for( i = 0; i < in.lookups; i++ )
|
|---|
| 168 | {
|
|---|
| 169 | // Status text
|
|---|
| 170 | if( INFO && mype == 0 && thread == 0 && i % 1000 == 0 )
|
|---|
| 171 | printf("\rCalculating XS's... (%.0lf%% completed)",
|
|---|
| 172 | (i / ( (double)in.lookups / (double) in.nthreads ))
|
|---|
| 173 | / (double) in.nthreads * 100.0);
|
|---|
| 174 |
|
|---|
| 175 | // Randomly pick an energy and material for the particle
|
|---|
| 176 | #ifdef VERIFICATION
|
|---|
| 177 | #pragma omp critical
|
|---|
| 178 | {
|
|---|
| 179 | p_energy = rn_v();
|
|---|
| 180 | mat = pick_mat(&seed);
|
|---|
| 181 | }
|
|---|
| 182 | #else
|
|---|
| 183 | p_energy = rn(&seed);
|
|---|
| 184 | mat = pick_mat(&seed);
|
|---|
| 185 | #endif
|
|---|
| 186 |
|
|---|
| 187 | // debugging
|
|---|
| 188 | //printf("E = %lf mat = %d\n", p_energy, mat);
|
|---|
| 189 |
|
|---|
| 190 | // This returns the macro_xs_vector, but we're not going
|
|---|
| 191 | // to do anything with it in this program, so return value
|
|---|
| 192 | // is written over.
|
|---|
| 193 | calculate_macro_xs( p_energy, mat, in.n_isotopes,
|
|---|
| 194 | in.n_gridpoints, num_nucs, concs,
|
|---|
| 195 | energy_grid, nuclide_grids, mats,
|
|---|
| 196 | macro_xs_vector );
|
|---|
| 197 |
|
|---|
| 198 | // Copy results from above function call onto heap
|
|---|
| 199 | // so that compiler cannot optimize function out
|
|---|
| 200 | // (only occurs if -flto flag is used)
|
|---|
| 201 | memcpy(xs, macro_xs_vector, 5*sizeof(double));
|
|---|
| 202 |
|
|---|
| 203 | // Verification hash calculation
|
|---|
| 204 | // This method provides a consistent hash accross
|
|---|
| 205 | // architectures and compilers.
|
|---|
| 206 | #ifdef VERIFICATION
|
|---|
| 207 | char line[256];
|
|---|
| 208 | sprintf(line, "%.5lf %d %.5lf %.5lf %.5lf %.5lf %.5lf",
|
|---|
| 209 | p_energy, mat,
|
|---|
| 210 | macro_xs_vector[0],
|
|---|
| 211 | macro_xs_vector[1],
|
|---|
| 212 | macro_xs_vector[2],
|
|---|
| 213 | macro_xs_vector[3],
|
|---|
| 214 | macro_xs_vector[4]);
|
|---|
| 215 | unsigned long long vhash_local = hash(line, 10000);
|
|---|
| 216 | #pragma omp atomic
|
|---|
| 217 | vhash += vhash_local;
|
|---|
| 218 | #endif
|
|---|
| 219 | }
|
|---|
| 220 |
|
|---|
| 221 | // Prints out thread local PAPI counters
|
|---|
| 222 | #ifdef PAPI
|
|---|
| 223 | if( mype == 0 && thread == 0 )
|
|---|
| 224 | {
|
|---|
| 225 | printf("\n");
|
|---|
| 226 | border_print();
|
|---|
| 227 | center_print("PAPI COUNTER RESULTS", 79);
|
|---|
| 228 | border_print();
|
|---|
| 229 | printf("Count \tSmybol \tDescription\n");
|
|---|
| 230 | }
|
|---|
| 231 | {
|
|---|
| 232 | #pragma omp barrier
|
|---|
| 233 | }
|
|---|
| 234 | counter_stop(&eventset, num_papi_events);
|
|---|
| 235 | #endif
|
|---|
| 236 |
|
|---|
| 237 | }
|
|---|
| 238 |
|
|---|
| 239 | #ifndef PAPI
|
|---|
| 240 | if( mype == 0)
|
|---|
| 241 | {
|
|---|
| 242 | printf("\n" );
|
|---|
| 243 | printf("Simulation complete.\n" );
|
|---|
| 244 | }
|
|---|
| 245 | #endif
|
|---|
| 246 |
|
|---|
| 247 | omp_end = omp_get_wtime();
|
|---|
| 248 |
|
|---|
| 249 | // Print / Save Results and Exit
|
|---|
| 250 | print_results( in, mype, omp_end-omp_start, nprocs, vhash );
|
|---|
| 251 |
|
|---|
| 252 | #ifdef BENCHMARK
|
|---|
| 253 | }
|
|---|
| 254 | #endif
|
|---|
| 255 |
|
|---|
| 256 | #ifdef MPI
|
|---|
| 257 | MPI_Finalize();
|
|---|
| 258 | #endif
|
|---|
| 259 |
|
|---|
| 260 | return 0;
|
|---|
| 261 | }
|
|---|