Changes between Version 37 and Version 38 of OpenMPTransformation


Ignore:
Timestamp:
06/10/14 12:15:50 (12 years ago)
Author:
siegel
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • OpenMPTransformation

    v37 v38  
    142142
    143143{{{
    144   #pragma omp parallel ...
    145   S
    146 }}}
    147 
    148 =>
    149 
    150 {{{
     144  float x; // shared
     145  int y; // private
     146  #pragma omp parallel shared(x) private(y)
     147  {
     148    ...
     149    x=5.2;
     150    y=3;
     151   ...
     152  }
     153}}}
     154
     155=>
     156
     157{{{
     158  float x;
     159  int y;
    151160  { // begin parallel construct
    152161    int _nthreads = 1+$choose_int(THREAD_MAX);
    153     $omp_gteam = $omp_gteam_create($here, nthreads);
    154     // declare shared variables and create shared objects
     162    $omp_gteam gteam = $omp_gteam_create($here, nthreads);
     163    $omp_gshared x_gshared = $omp_gshared_create(gteam, &x);
    155164
    156165    $parfor (int _tid : 0..nthreads-1) {
    157166      $omp_team team = $omp_team_create($here, gteam, _tid);
    158       // declare local copies of shared variables
    159       // declare private variables
    160 
    161       translate(S) but replace each private variable `x` with `_x`, and
    162         translate access to shared variables using protocols above;
    163       flush any writes to shared variables;
     167      $omp_shared x_shared = $omp_shared_create(team, x_gshared);
     168      int _y; // private variable
     169
     170      ...
     171      { // "x=5.2":
     172        float tmp = 5.2;
     173 
     174        $omp_write($omp_identity_ref(x_shared), &tmp);
     175      }
     176      _y = 3;
     177      ...
     178     $omp_barrier_and_flush(team); // implicit at end of parallel region
     179     $omp_shared_destroy(x_shared);
     180     $omp_team_destroy(team);
    164181    } // end $parfor
     182    $omp_gshared_destroy(x_gshared);
     183    $omp_gteam_destroy(gteam);
    165184  } // end parallel construct
    166185}}}
     
    168187All variables that occur in the parallel construct, i.e., the lexical extent of the parallel construct, must be determined to be either private or shared.   This is determined by the clauses and the default rules as specified in the OpenMP Standard.  Obviously any variable declared within the construct itself must be private.
    169188
    170 For all private variables `x` not declared within the parallel construct, create a new variable of the same type, `_x`.    The new variable is declared within the thread scope.  If `x` is also firstprivate,  then `_x` is initialized with the value of `x`, e.g. `int _x=x;`.  Otherwise, `_x` is uninitialized, so has an undefined value.
     189For all private variables `y` not declared within the parallel construct, create a new variable of the same type, `_y`.    The new variable is declared within the thread scope.  If `y` is also firstprivate,  then `_y` is initialized with the value of `y`, e.g. `int _y=y;`.  Otherwise, `_y` is uninitialized, so has an undefined value.
    171190
    172191=== Translating `for` ===
     
    176195
    177196{{{
    178 // location 23:
    179197#pragma omp parallel for
    180198for (i=0; i<n; i++) 
     
    186204{{{
    187205{
    188   CIVL_omp_loop_info info = $omp_ws_arrive_loop(_ws, 23);
    189 
    190   int numIters = info.numIters;
    191   for (int j=0; j<numIters; j++) {
    192     int i = info.iters[j][0];
    193 
     206  $domain loop_domain = 0..n-1;
     207  $domain my_iters = $omp_arrive_loop(team, loop_domain);
     208
     209  $for (int i : my_iters) {
    194210    translate(S);
    195211  }
    196   barrier_and_flush();
    197 }
    198 }}}
    199 
    200 We can vary the way iterators are chosen to explore different tradeoffs and strategies.  On one extreme, every kind of partition can be explored; on the other, some fixed strategy like round-robin with chunksize 1 can be used.  This only changes the definition of `$omp_ws_arrive_loop`, not the translation above.
     212  $barrier_and_flush(team);
     213}
     214}}}
     215
     216We can vary the way iterators are chosen to explore different tradeoffs and strategies.  On one extreme, every kind of partition can be explored; on the other, some fixed strategy like round-robin with chunksize 1 can be used.  This only changes the definition of `$omp_arrive_loop`, not the translation above.
    201217
    202218{{{
     
    214230{{{
    215231{
    216   CIVL_omp_loop_info info = $omp_ws_arrive_loop(_ws, 78);
    217 
    218   int numIters = info.numIters;
    219   for (int count=0; count<numIters; count++) {
    220     int i = info.iters[count][0];
    221     int j = info.iters[count][1];
    222     int k = info.iters[count][2];
    223 
     232  $domain loop_domain = {0..n-1, 0..m-1, 0..l-1};
     233  $domain my_iters = $omp_arrive_loop(team, loop_domain);
     234
     235  $for (int i, j, k : my_iters) {
    224236    translate(S);
    225237  }
    226   barrier_and_flush();
     238  $barrier_and_flush(team);
    227239}
    228240}}}
     
    241253{{{
    242254{
    243   CIVL_omp_loop_info info = $omp_ws_arrive_loop(_ws, 23);
     255  $domain loop_domain = a..b-1;
     256  $domain my_iters = $omp_arrive_loop(team, loop_domain);
    244257  double _x=0.0, _y=0.0;
    245258
    246   int numIters = info.numIters;
    247   for (int _count=0; _count<numIters; _count++) {
    248     int i = info.iters[_count][0];
    249 
     259  $for (int i : my_iters) {
    250260    translate(S) but replace x with _x and y with _y;
    251261  }
    252   x += _x;
    253   y += _y;
    254   // note: do something with POR so it knows the operations above from
    255   // different threads commute
    256   barrier_and_flush();
     262  $omp_apply_assoc(x_shared, CIVL_SUM, &_x);
     263  $omp_apply_assoc(y_shared, CIVL_SUM, &_y);
     264  $omp_barrier_and_flush(team);
    257265}
    258266}}}
     
    262270=== Translating `sections` ===
    263271
    264 If there are n sections, create n functions: section1, section2, ....  Again the question is how to distribute them among threads and in what order.
    265 As with loops, you really want to check these are independent and only do the interleaving exploration as a last resort.
    266 
    267 {{{
    268 // location 42:
     272Say there are `numSections` sections.  This number is known statically.
     273
     274{{{
    269275#pragma omp sections
    270276#pragma omp section
     
    279285{{{
    280286{
    281   $int_iter iter = $omp_ws_arrive_sections(_ws, 42);
    282 
    283   while ($int_iter_hasNext(iter)) {
    284     int _i = $int_iter_next(iter);
    285 
    286     switch (_i) {
     287  $domain my_secs = $omp_arrive_sections(team, numSections);
     288
     289  for (int i : my_secs) {
     290    switch (i) {
    287291    case 0: {
    288292      translate(S0);
     
    295299    ...
    296300    } /* end of switch */
    297   } /* end of while loop */
    298   barrier_and_flush();
     301  } /* end of $for loop */
     302  $omp_barrier_and_flush();
    299303}
    300304}}}
     
    304308
    305309{{{
    306 // location 33:
    307310#pragma omp single
    308311S
     
    312315
    313316{{{
    314 if ($omp_arrive_single(_ws, 33)) {
     317int owner = $omp_arrive_single(team);
     318
     319if (owner == _tid) {
    315320  translate(S);
    316321}
    317 barrier_and_flush();
     322$omp_barrier_and_flush(team);
    318323}}}
    319324
     
    322327
    323328{{{
    324 // location 58:
    325329#pragma omp barrier
    326330}}}
     
    329333
    330334{{{
    331 $omp_barrier_arrive(_ws, 58);
    332 barrier_and_flush();
     335$omp_barrier_and_flush(team);
    333336}}}
    334337
     
    388391{{{
    389392{
    390   CIVL_omp_loop_info info = $omp_ws_arrive_loop(_ws, 23);
     393  $domain loop_domain = a..b;
     394  $domain my_iters = $omp_arrive_loop(loop_domain);
    391395  int order1=a, order2=a;
    392   int numIters = info.numIters;
    393 
    394   for (int _i=0; _i<numIters; _i++) {
    395     int i = info.iters[_i][0];
     396
     397  for (int i : my_iters) {
    396398     ...
    397399    $when (order1==i) {