| [c0d806d] | 1 | // global variables and procedures shared by all threads.
|
|---|
| 2 |
|
|---|
| [58da37d] | 3 | /********************* Types *************/
|
|---|
| 4 |
|
|---|
| 5 | typedef void *omp_lock_t; /* represented as a pointer */
|
|---|
| 6 |
|
|---|
| 7 | typedef void *omp_nest_lock_t; /* represented as a pointer */
|
|---|
| 8 |
|
|---|
| 9 | typedef enum omp_sched_t {
|
|---|
| 10 | omp_sched_static = 1,
|
|---|
| 11 | omp_sched_dynamic = 2,
|
|---|
| 12 | omp_sched_guided = 3,
|
|---|
| 13 | omp_sched_auto = 4
|
|---|
| 14 | } omp_sched_t;
|
|---|
| 15 |
|
|---|
| 16 | typedef enum omp_proc_bind_t {
|
|---|
| 17 | omp_proc_bind_false = 0,
|
|---|
| 18 | omp_proc_bind_true = 1,
|
|---|
| 19 | omp_proc_bind_master = 2,
|
|---|
| 20 | omp_proc_bind_close = 3,
|
|---|
| 21 | omp_proc_bind_spread = 4
|
|---|
| 22 | } omp_proc_bind_t;
|
|---|
| 23 |
|
|---|
| 24 | /********************* Internal Control Variables *************/
|
|---|
| 25 |
|
|---|
| 26 | /* dyn-var: whether dynamic adjustment of the number of threads is enabled for encountered
|
|---|
| 27 | parallel regions. There is one copy of this ICV per data environment.
|
|---|
| 28 | The initial value of dyn-var is implementation defined if the implementation supports
|
|---|
| 29 | dynamic adjustment of the number of threads; otherwise, the initial value is false. */
|
|---|
| 30 | int OMP_DYNAMIC = 0; //
|
|---|
| 31 |
|
|---|
| 32 | /* nest-var: whether nested parallelism is enabled for encountered parallel regions.
|
|---|
| 33 | There is one copy of this ICV per data environment. */
|
|---|
| 34 | int OMP_NESTED = 0;
|
|---|
| 35 |
|
|---|
| 36 | /* nthreads-var: controls the number of threads requested for encountered parallel regions.
|
|---|
| 37 | There is one copy of this ICV per data environment. */
|
|---|
| 38 | int OMP_NUM_THREADS;
|
|---|
| 39 |
|
|---|
| 40 | /* thread-limit-var: controls the maximum number of threads participating in the contention group.
|
|---|
| 41 | There is one copy of this ICV per data environment. */
|
|---|
| 42 | int OMP_THREAD_LIMIT;
|
|---|
| 43 |
|
|---|
| 44 | /* max-active-levels-var: controls the maximum number of nested active parallel regions.
|
|---|
| 45 | There is one copy of this ICV per data environment.
|
|---|
| 46 | The initial value of max-active-levels-var is the number of levels of parallelism that
|
|---|
| 47 | the implementation supports. */
|
|---|
| 48 | int OMP_MAX_ACTIVE_LEVELS;
|
|---|
| 49 |
|
|---|
| 50 | /* place-partition-var : controls the place partition available to the execution
|
|---|
| 51 | environment for encountered parallel regions.
|
|---|
| 52 | There is one copy of this ICV per data environment. */
|
|---|
| 53 | int OMP_PLACES;
|
|---|
| 54 |
|
|---|
| 55 | /* the number of nested, active parallel regions enclosing the current task such that
|
|---|
| 56 | all of the parallel regions are enclosed by the outermost initial task region on
|
|---|
| 57 | the current device. There is one copy of this ICV per data environment. */
|
|---|
| 58 | int active_levels_var = 0;
|
|---|
| 59 |
|
|---|
| 60 | /* the number of nested parallel regions enclosing the current task such that all of
|
|---|
| 61 | the parallel regions are enclosed by the outermost initial task region on the current
|
|---|
| 62 | device. There is one copy of this ICV per data environment. */
|
|---|
| 63 | int levels_var = 0;
|
|---|
| 64 |
|
|---|
| 65 | /* bind-var: controls the binding of OpenMP threads to places. When binding is requested, the
|
|---|
| 66 | variable indicates that the execution environment is advised not to move threads between
|
|---|
| 67 | places. The variable can also provide default thread affinity policies.
|
|---|
| 68 | There is one copy of this ICV per data environment. */
|
|---|
| 69 | omp_proc_bind_t OMP_PROC_BIND;
|
|---|
| 70 |
|
|---|
| 71 | /* The following ICVs store values that affect the operation of loop regions. */
|
|---|
| 72 |
|
|---|
| 73 | /* run-sched-var: controls the schedule that the runtime schedule clause uses for loop regions.
|
|---|
| 74 | There is one copy of this ICV per data environment. */
|
|---|
| 75 | omp_sched_t OMP_SCHEDULE;
|
|---|
| 76 |
|
|---|
| 77 | /* the chunk size */
|
|---|
| 78 | int CHUNK_SIZE;
|
|---|
| 79 |
|
|---|
| 80 | /* controls the implementation defined default scheduling of loop regions.
|
|---|
| 81 | There is one copy of this ICV per data environment. */
|
|---|
| 82 | omp_sched_t def_sched_var;
|
|---|
| 83 |
|
|---|
| 84 | /* stacksize-var: controls the stack size for threads that the OpenMP implementation
|
|---|
| 85 | creates. There is one copy of this ICV per data environment. */
|
|---|
| 86 | int OMP_STACKSIZE;
|
|---|
| 87 |
|
|---|
| 88 | /* wait-policy-var: controls the desired behavior of waiting threads.
|
|---|
| 89 | There is one copy of this ICV per data environment. */
|
|---|
| 90 | int OMP_WAIT_POLICY;
|
|---|
| 91 |
|
|---|
| 92 | /* cancel-var : controls the desired behavior of the cancel construct and cancellation points.
|
|---|
| 93 | There is one copy of this ICV per data environment. */
|
|---|
| 94 | int OMP_CANCELLATION = 0;
|
|---|
| 95 |
|
|---|
| 96 | /* default-device-var: controls the default target device.
|
|---|
| 97 | There is one copy of this ICV per data environment. */
|
|---|
| 98 | int OMP_DEFAULT_DEVICE;
|
|---|
| 99 |
|
|---|
| 100 | /********************* implicit variables *************/
|
|---|
| 101 | int in_barrier[1024];
|
|---|
| [c0d806d] | 102 | int num_in_barrier = 0;
|
|---|
| 103 |
|
|---|
| [58da37d] | 104 | /****** routines for modifying and retrieving the values of ICVs ******/
|
|---|
| 105 |
|
|---|
| 106 | /*void omp_set_dynamic(_Bool value) {
|
|---|
| 107 | OMP_DYNAMIC = value;
|
|---|
| 108 | }*/
|
|---|
| 109 |
|
|---|
| 110 | /*_Bool omp_get_dynamic() {
|
|---|
| 111 | return OMP_DYNAMIC;
|
|---|
| 112 | }*/
|
|---|
| 113 |
|
|---|
| 114 | /*void omp_set_nested(_Bool value) {
|
|---|
| 115 | OMP_NESTED = value;
|
|---|
| 116 | }*/
|
|---|
| 117 |
|
|---|
| 118 | /*_Bool omp_get_nested() {
|
|---|
| 119 | return OMP_NESTED;
|
|---|
| 120 | }*/
|
|---|
| 121 |
|
|---|
| 122 | /* affects the number of threads to be used for subsequent parallel regions that do not
|
|---|
| 123 | specify a num_threads clause, by setting the value of the first element of the
|
|---|
| 124 | nthreads-var ICV of the current task. */
|
|---|
| 125 | void omp_set_num_threads(int value) {
|
|---|
| 126 | OMP_NUM_THREADS = value;
|
|---|
| 127 | printf("NUM_THREADS is %d\n", OMP_NUM_THREADS);
|
|---|
| 128 | }
|
|---|
| 129 |
|
|---|
| 130 | /* returns an upper bound on the number of threads that could be used to form
|
|---|
| 131 | a new team if a parallel construct without a num_threads clause were encountered
|
|---|
| 132 | after execution returns from this routine. */
|
|---|
| 133 | int omp_get_max_threads() {
|
|---|
| 134 | return OMP_NUM_THREADS;
|
|---|
| 135 | }
|
|---|
| 136 |
|
|---|
| 137 | /* returns the number of threads in the current team. */
|
|---|
| [c0d806d] | 138 | int omp_get_num_threads() {
|
|---|
| 139 | return OMP_NUM_THREADS;
|
|---|
| 140 | }
|
|---|
| 141 |
|
|---|
| [58da37d] | 142 | /* returns the number of processors available to the device. */
|
|---|
| 143 | int omp_get_num_procs() {
|
|---|
| 144 | return 1;//TODO
|
|---|
| 145 | }
|
|---|
| 146 |
|
|---|
| 147 | /* returns true if the active-levels-var ICV is greater than zero;
|
|---|
| 148 | otherwise, it returns false. */
|
|---|
| 149 | int omp_in_parallel() {
|
|---|
| 150 | return active_levels_var;
|
|---|
| 151 | }
|
|---|
| 152 |
|
|---|
| 153 | /* enables or disables dynamic adjustment of the number of threads available
|
|---|
| 154 | for the execution of subsequent parallel regions by setting the value of
|
|---|
| 155 | the dyn-var ICV. */
|
|---|
| 156 | void omp_set_dynamic(int dynamic_threads) {
|
|---|
| 157 | return; //do not support dynamic schedule
|
|---|
| 158 | }
|
|---|
| 159 |
|
|---|
| 160 | /* returns the value of the dyn-var ICV, which determines whether dynamic
|
|---|
| 161 | adjustment of the number of threads is enabled or disabled. */
|
|---|
| 162 | int omp_get_dynamic() {
|
|---|
| 163 | return OMP_DYNAMIC;
|
|---|
| 164 | }
|
|---|
| 165 |
|
|---|
| 166 | /* returns the value of the cancel-var ICV, which controls the behavior of
|
|---|
| 167 | the cancel construct and cancellation points. */
|
|---|
| 168 | int omp_get_cancellation() {
|
|---|
| 169 | return OMP_CANCELLATION;
|
|---|
| 170 | }
|
|---|
| 171 |
|
|---|
| 172 | /* enables or disables nested parallelism, by setting the nest-var ICV. */
|
|---|
| 173 | void omp_set_nested(int nested) {
|
|---|
| 174 | OMP_NESTED = nested;
|
|---|
| 175 | }
|
|---|
| 176 |
|
|---|
| 177 | /* returns the value of the nest-var ICV, which determines if nested
|
|---|
| 178 | parallelism is enabled or disabled. */
|
|---|
| 179 | int omp_get_nested(void) {
|
|---|
| 180 | return OMP_NESTED;
|
|---|
| 181 | }
|
|---|
| 182 |
|
|---|
| 183 | /* affects the schedule that is applied when runtime is used as schedule kind,
|
|---|
| 184 | by setting the value of the run-sched-var ICV.
|
|---|
| 185 | For the schedule types static, dynamic, and guided the chunk_size is set to the value
|
|---|
| 186 | of the second argument, or to the default chunk_size if the value of the second
|
|---|
| 187 | argument is less than 1; for the schedule type auto the second argument has no meaning;
|
|---|
| 188 | for implementation specific schedule types, the values and associated meanings of
|
|---|
| 189 | the second argument are implementation defined. */
|
|---|
| 190 | void omp_set_schedule(omp_sched_t kind, int modifier) {
|
|---|
| 191 | OMP_SCHEDULE = kind;
|
|---|
| 192 | /*switch(kind) {
|
|---|
| 193 | case omp_sched_static:
|
|---|
| 194 | case omp_sched_dynamic:
|
|---|
| 195 | case omp_sched_guided:
|
|---|
| 196 | CHUNK_SIZE = modifier;
|
|---|
| 197 | break;
|
|---|
| 198 | default:
|
|---|
| 199 | }*/
|
|---|
| 200 | }
|
|---|
| 201 |
|
|---|
| 202 | /* returns the schedule that is applied when the runtime schedule is used. */
|
|---|
| 203 | void omp_get_schedule(omp_sched_t * kind, int * modifier) {
|
|---|
| 204 | *kind = OMP_SCHEDULE;
|
|---|
| 205 | *modifier = CHUNK_SIZE;
|
|---|
| 206 | }
|
|---|
| 207 |
|
|---|
| 208 | /* returns the maximum number of OpenMP threads available on the device. */
|
|---|
| 209 | int omp_get_thread_limit() {
|
|---|
| 210 | return OMP_THREAD_LIMIT;
|
|---|
| 211 | }
|
|---|
| 212 |
|
|---|
| 213 | /* imits the number of nested active parallel regions on the device, by setting the
|
|---|
| 214 | max-active-levels-var ICV. */
|
|---|
| 215 | void omp_set_max_active_levels (int max_levels) {
|
|---|
| 216 | OMP_MAX_ACTIVE_LEVELS = max_levels;
|
|---|
| 217 | }
|
|---|
| 218 |
|
|---|
| 219 | /* returns the value of the max-active-levels-var ICV, which determines the
|
|---|
| 220 | maximum number of nested active parallel regions on the device. */
|
|---|
| 221 | int omp_get_max_active_levels() {
|
|---|
| 222 | return OMP_MAX_ACTIVE_LEVELS;
|
|---|
| 223 | }
|
|---|
| 224 |
|
|---|
| 225 | /* returns the value of the levels-var ICV. */
|
|---|
| 226 | int omp_get_level() {
|
|---|
| 227 | return levels_var;
|
|---|
| 228 | }
|
|---|
| 229 |
|
|---|
| 230 | /* returns, for a given nested level of the current thread,
|
|---|
| 231 | the thread number of the ancestor of the current thread.
|
|---|
| 232 | returns the thread number of the ancestor at a given nest level of the current thread or
|
|---|
| 233 | the thread number of the current thread. If the requested nest level is outside the range
|
|---|
| 234 | of 0 and the nest level of the current thread, as returned by the omp_get_level routine,
|
|---|
| 235 | the routine returns -1. */
|
|---|
| 236 | int omp_get_ancestor_thread_num(int level) {
|
|---|
| 237 | if(levels_var < level || level < 0)
|
|---|
| 238 | return -1;
|
|---|
| 239 | return 0; //TODO
|
|---|
| 240 | }
|
|---|
| 241 |
|
|---|
| 242 | /* returns, for a given nested level of the current thread, the size of the thread team to
|
|---|
| 243 | which the ancestor or the current thread belongs.
|
|---|
| 244 | returns the size of the thread team to which the ancestor or the current thread belongs.
|
|---|
| 245 | If the requested nested level is outside the range of 0 and the nested level of the current
|
|---|
| 246 | thread, as returned by the omp_get_level routine, the routine returns -1.
|
|---|
| 247 | Inactive parallel regions are regarded like active parallel
|
|---|
| 248 | regions executed with one thread. */
|
|---|
| 249 | int omp_get_team_size(int level) {
|
|---|
| 250 | if(levels_var < level || level < 0)
|
|---|
| 251 | return -1;
|
|---|
| 252 | return OMP_NUM_THREADS; //TODO
|
|---|
| 253 | }
|
|---|
| 254 |
|
|---|
| 255 | /* returns the value of the active-level-vars ICV.
|
|---|
| 256 | return the number of nested, active parallel regions enclosing the current task such
|
|---|
| 257 | that all of the parallel regions are enclosed by the outermost initial task region on
|
|---|
| 258 | the current device. */
|
|---|
| 259 | int omp_get_active_level() {
|
|---|
| 260 | return active_levels_var;
|
|---|
| 261 | }
|
|---|
| 262 |
|
|---|
| 263 | /* returns true if the enclosing task region is final. Otherwise, it returns false. */
|
|---|
| 264 | int omp_in_final() {
|
|---|
| 265 | return 1;//TODO
|
|---|
| 266 | }
|
|---|
| 267 |
|
|---|
| 268 | /* returns the thread affinity policy to be used for the subsequent nested parallel
|
|---|
| 269 | regions that do not specify a proc_bind clause. */
|
|---|
| 270 | omp_proc_bind_t omp_get_proc_bind(void) {
|
|---|
| 271 | return OMP_PROC_BIND;
|
|---|
| 272 | }
|
|---|
| 273 |
|
|---|
| 274 | /* controls the default target device by assigning the value of the default-device-var ICV.
|
|---|
| 275 | When called from within a target region the effect of this routine is unspecified. */
|
|---|
| 276 | void omp_set_default_device(int device_num) {
|
|---|
| 277 | OMP_DEFAULT_DEVICE = device_num;//TODO
|
|---|
| 278 | }
|
|---|
| 279 |
|
|---|
| 280 | /* returns the default target device. When called from within a target region the effect of
|
|---|
| 281 | this routine is unspecified. */
|
|---|
| 282 | int omp_get_default_device() {
|
|---|
| 283 | return OMP_DEFAULT_DEVICE;//TODO
|
|---|
| 284 | }
|
|---|
| 285 |
|
|---|
| 286 | /* returns the number of target devices. When called from within a target region
|
|---|
| 287 | the effect of this routine is unspecified.*/
|
|---|
| 288 | int omp_get_num_devices() {
|
|---|
| 289 | return 1;//TODO
|
|---|
| 290 | }
|
|---|
| 291 |
|
|---|
| 292 | /********************* helper functions for loop translation *************/
|
|---|
| 293 |
|
|---|
| 294 | /*
|
|---|
| 295 | For a team of p threads and a loop of n iterations, let   Upper( n⁄p )  be the integer q that
|
|---|
| 296 | satisfies n = p*q - r, with 0 ≤ r < p . One compliant implementation of the static schedule
|
|---|
| 297 | (with no specified chunk_size) would behave as though chunk_size had been specified with
|
|---|
| 298 | value q. Another compliant implementation would assign q iterations to the first p-r threads,
|
|---|
| 299 | and q-1 iterations to the remaining r threads. This illustrates why a conforming program must not rely on the details of a particular implementation.
|
|---|
| 300 | A compliant implementation of the guided schedule with a chunk_size value of k would assign q
|
|---|
| 301 | = Upper(n ⁄ p) iterations to the first available thread and set n to the larger of n-q and p*k.
|
|---|
| 302 | It would then repeat this process until q is greater than or equal to the number of
|
|---|
| 303 | remaining iterations, at which time the remaining iterations form the final chunk.
|
|---|
| 304 | Another compliant implementation could use the same method, except with
|
|---|
| 305 | q = Upper(n⁄(2p)) , and set n to the larger of n-q and 2*p*k.
|
|---|
| 306 | */
|
|---|
| [c0d806d] | 307 |
|
|---|
| 308 | /* Common functions for translating for loops */
|
|---|
| 309 | // computes the start index for a given thread
|
|---|
| 310 | int __for_start(int tid, int total) {
|
|---|
| 311 | return ((total/OMP_NUM_THREADS) * tid);
|
|---|
| 312 | }
|
|---|
| 313 |
|
|---|
| 314 | //computes the end index for a given thread
|
|---|
| 315 | int __for_end(int tid, int total) {
|
|---|
| 316 | return ((total/OMP_NUM_THREADS) * (tid + 1));
|
|---|
| 317 | }
|
|---|
| 318 |
|
|---|
| 319 | //computes the extra index for a given thread
|
|---|
| 320 | int __for_extra(int tid, int total) {
|
|---|
| 321 | int offset = total % OMP_NUM_THREADS;
|
|---|
| 322 |
|
|---|
| 323 | if(tid < offset) {
|
|---|
| 324 | return total - offset + tid;
|
|---|
| 325 | }
|
|---|
| 326 | return 0;
|
|---|
| 327 | }
|
|---|
| 328 |
|
|---|
| [2c44d945] | 329 | /********************* barrier implementation *************/
|
|---|
| [c0d806d] | 330 |
|
|---|
| [2c44d945] | 331 | void __barrier_init() {
|
|---|
| [c0d806d] | 332 | for (int i=0; i<OMP_NUM_THREADS; i++) in_barrier[i] = 0;
|
|---|
| 333 | }
|
|---|
| 334 |
|
|---|
| 335 | // model the synchronization of threads in the same block
|
|---|
| [2c44d945] | 336 | void __barrier(int tid) {
|
|---|
| [c0d806d] | 337 | $atomic {
|
|---|
| 338 | in_barrier[tid] = 1; // I am in the barrier
|
|---|
| 339 | num_in_barrier++; // increment number in barrier
|
|---|
| 340 | if (num_in_barrier == OMP_NUM_THREADS) { // I am last to enter
|
|---|
| 341 | for (int i=0; i<OMP_NUM_THREADS; i++) in_barrier[i] = 0; // release all
|
|---|
| 342 | num_in_barrier = 0; // now none are in barrier
|
|---|
| [58da37d] | 343 | }
|
|---|
| [c0d806d] | 344 | }
|
|---|
| [58da37d] | 345 | $when (in_barrier[tid] == 0); // wait till I am released
|
|---|
| [c0d806d] | 346 | }
|
|---|