source: CIVL/examples/omp/omp.cvh@ bb03188

main test-branch
Last change on this file since bb03188 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 12.3 KB
Line 
1// global variables and procedures shared by all threads.
2
3/********************* Types *************/
4
5typedef void *omp_lock_t; /* represented as a pointer */
6
7typedef void *omp_nest_lock_t; /* represented as a pointer */
8
9typedef enum omp_sched_t {
10 omp_sched_static = 1,
11 omp_sched_dynamic = 2,
12 omp_sched_guided = 3,
13 omp_sched_auto = 4
14} omp_sched_t;
15
16typedef enum omp_proc_bind_t {
17 omp_proc_bind_false = 0,
18 omp_proc_bind_true = 1,
19 omp_proc_bind_master = 2,
20 omp_proc_bind_close = 3,
21 omp_proc_bind_spread = 4
22} omp_proc_bind_t;
23
24/********************* Internal Control Variables *************/
25
26/* dyn-var: whether dynamic adjustment of the number of threads is enabled for encountered
27parallel regions. There is one copy of this ICV per data environment.
28The initial value of dyn-var is implementation defined if the implementation supports
29dynamic adjustment of the number of threads; otherwise, the initial value is false. */
30int OMP_DYNAMIC = 0; //
31
32/* nest-var: whether nested parallelism is enabled for encountered parallel regions.
33There is one copy of this ICV per data environment. */
34int OMP_NESTED = 0;
35
36/* nthreads-var: controls the number of threads requested for encountered parallel regions.
37There is one copy of this ICV per data environment. */
38int OMP_NUM_THREADS;
39
40/* thread-limit-var: controls the maximum number of threads participating in the contention group.
41There is one copy of this ICV per data environment. */
42int OMP_THREAD_LIMIT;
43
44/* max-active-levels-var: controls the maximum number of nested active parallel regions.
45There is one copy of this ICV per data environment.
46The initial value of max-active-levels-var is the number of levels of parallelism that
47the implementation supports. */
48int OMP_MAX_ACTIVE_LEVELS;
49
50/* place-partition-var : controls the place partition available to the execution
51environment for encountered parallel regions.
52There is one copy of this ICV per data environment. */
53int OMP_PLACES;
54
55/* the number of nested, active parallel regions enclosing the current task such that
56all of the parallel regions are enclosed by the outermost initial task region on
57the current device. There is one copy of this ICV per data environment. */
58int active_levels_var = 0;
59
60/* the number of nested parallel regions enclosing the current task such that all of
61the parallel regions are enclosed by the outermost initial task region on the current
62device. There is one copy of this ICV per data environment. */
63int levels_var = 0;
64
65/* bind-var: controls the binding of OpenMP threads to places. When binding is requested, the
66variable indicates that the execution environment is advised not to move threads between
67places. The variable can also provide default thread affinity policies.
68There is one copy of this ICV per data environment. */
69omp_proc_bind_t OMP_PROC_BIND;
70
71/* The following ICVs store values that affect the operation of loop regions. */
72
73/* run-sched-var: controls the schedule that the runtime schedule clause uses for loop regions.
74There is one copy of this ICV per data environment. */
75omp_sched_t OMP_SCHEDULE;
76
77/* the chunk size */
78int CHUNK_SIZE;
79
80/* controls the implementation defined default scheduling of loop regions.
81There is one copy of this ICV per data environment. */
82omp_sched_t def_sched_var;
83
84/* stacksize-var: controls the stack size for threads that the OpenMP implementation
85creates. There is one copy of this ICV per data environment. */
86int OMP_STACKSIZE;
87
88/* wait-policy-var: controls the desired behavior of waiting threads.
89There is one copy of this ICV per data environment. */
90int OMP_WAIT_POLICY;
91
92/* cancel-var : controls the desired behavior of the cancel construct and cancellation points.
93There is one copy of this ICV per data environment. */
94int OMP_CANCELLATION = 0;
95
96/* default-device-var: controls the default target device.
97There is one copy of this ICV per data environment. */
98int OMP_DEFAULT_DEVICE;
99
100/********************* implicit variables *************/
101int in_barrier[1024];
102int num_in_barrier = 0;
103
104/****** routines for modifying and retrieving the values of ICVs ******/
105
106/*void omp_set_dynamic(_Bool value) {
107 OMP_DYNAMIC = value;
108}*/
109
110/*_Bool omp_get_dynamic() {
111 return OMP_DYNAMIC;
112}*/
113
114/*void omp_set_nested(_Bool value) {
115 OMP_NESTED = value;
116}*/
117
118/*_Bool omp_get_nested() {
119 return OMP_NESTED;
120}*/
121
122/* affects the number of threads to be used for subsequent parallel regions that do not
123specify a num_threads clause, by setting the value of the first element of the
124nthreads-var ICV of the current task. */
125void omp_set_num_threads(int value) {
126 OMP_NUM_THREADS = value;
127 printf("NUM_THREADS is %d\n", OMP_NUM_THREADS);
128}
129
130/* returns an upper bound on the number of threads that could be used to form
131a new team if a parallel construct without a num_threads clause were encountered
132after execution returns from this routine. */
133int omp_get_max_threads() {
134 return OMP_NUM_THREADS;
135}
136
137/* returns the number of threads in the current team. */
138int omp_get_num_threads() {
139 return OMP_NUM_THREADS;
140}
141
142/* returns the number of processors available to the device. */
143int omp_get_num_procs() {
144 return 1;//TODO
145}
146
147/* returns true if the active-levels-var ICV is greater than zero;
148otherwise, it returns false. */
149int omp_in_parallel() {
150 return active_levels_var;
151}
152
153/* enables or disables dynamic adjustment of the number of threads available
154for the execution of subsequent parallel regions by setting the value of
155the dyn-var ICV. */
156void omp_set_dynamic(int dynamic_threads) {
157 return; //do not support dynamic schedule
158}
159
160/* returns the value of the dyn-var ICV, which determines whether dynamic
161adjustment of the number of threads is enabled or disabled. */
162int omp_get_dynamic() {
163 return OMP_DYNAMIC;
164}
165
166/* returns the value of the cancel-var ICV, which controls the behavior of
167the cancel construct and cancellation points. */
168int omp_get_cancellation() {
169 return OMP_CANCELLATION;
170}
171
172/* enables or disables nested parallelism, by setting the nest-var ICV. */
173void omp_set_nested(int nested) {
174 OMP_NESTED = nested;
175}
176
177/* returns the value of the nest-var ICV, which determines if nested
178parallelism is enabled or disabled. */
179int omp_get_nested(void) {
180 return OMP_NESTED;
181}
182
183/* affects the schedule that is applied when runtime is used as schedule kind,
184by setting the value of the run-sched-var ICV.
185For the schedule types static, dynamic, and guided the chunk_size is set to the value
186of the second argument, or to the default chunk_size if the value of the second
187argument is less than 1; for the schedule type auto the second argument has no meaning;
188for implementation specific schedule types, the values and associated meanings of
189the second argument are implementation defined. */
190void omp_set_schedule(omp_sched_t kind, int modifier) {
191 OMP_SCHEDULE = kind;
192 /*switch(kind) {
193 case omp_sched_static:
194 case omp_sched_dynamic:
195 case omp_sched_guided:
196 CHUNK_SIZE = modifier;
197 break;
198 default:
199 }*/
200}
201
202/* returns the schedule that is applied when the runtime schedule is used. */
203void omp_get_schedule(omp_sched_t * kind, int * modifier) {
204 *kind = OMP_SCHEDULE;
205 *modifier = CHUNK_SIZE;
206}
207
208/* returns the maximum number of OpenMP threads available on the device. */
209int omp_get_thread_limit() {
210 return OMP_THREAD_LIMIT;
211}
212
213/* imits the number of nested active parallel regions on the device, by setting the
214max-active-levels-var ICV. */
215void omp_set_max_active_levels (int max_levels) {
216 OMP_MAX_ACTIVE_LEVELS = max_levels;
217}
218
219/* returns the value of the max-active-levels-var ICV, which determines the
220maximum number of nested active parallel regions on the device. */
221int omp_get_max_active_levels() {
222 return OMP_MAX_ACTIVE_LEVELS;
223}
224
225/* returns the value of the levels-var ICV. */
226int omp_get_level() {
227 return levels_var;
228}
229
230/* returns, for a given nested level of the current thread,
231the thread number of the ancestor of the current thread.
232returns the thread number of the ancestor at a given nest level of the current thread or
233the thread number of the current thread. If the requested nest level is outside the range
234of 0 and the nest level of the current thread, as returned by the omp_get_level routine,
235the routine returns -1. */
236int omp_get_ancestor_thread_num(int level) {
237 if(levels_var < level || level < 0)
238 return -1;
239 return 0; //TODO
240}
241
242/* returns, for a given nested level of the current thread, the size of the thread team to
243which the ancestor or the current thread belongs.
244returns the size of the thread team to which the ancestor or the current thread belongs.
245If the requested nested level is outside the range of 0 and the nested level of the current
246thread, as returned by the omp_get_level routine, the routine returns -1.
247Inactive parallel regions are regarded like active parallel
248regions executed with one thread. */
249int omp_get_team_size(int level) {
250 if(levels_var < level || level < 0)
251 return -1;
252 return OMP_NUM_THREADS; //TODO
253}
254
255/* returns the value of the active-level-vars ICV.
256return the number of nested, active parallel regions enclosing the current task such
257that all of the parallel regions are enclosed by the outermost initial task region on
258the current device. */
259int omp_get_active_level() {
260 return active_levels_var;
261}
262
263/* returns true if the enclosing task region is final. Otherwise, it returns false. */
264int omp_in_final() {
265 return 1;//TODO
266}
267
268/* returns the thread affinity policy to be used for the subsequent nested parallel
269regions that do not specify a proc_bind clause. */
270omp_proc_bind_t omp_get_proc_bind(void) {
271 return OMP_PROC_BIND;
272}
273
274/* controls the default target device by assigning the value of the default-device-var ICV.
275When called from within a target region the effect of this routine is unspecified. */
276void omp_set_default_device(int device_num) {
277 OMP_DEFAULT_DEVICE = device_num;//TODO
278}
279
280/* returns the default target device. When called from within a target region the effect of
281this routine is unspecified. */
282int omp_get_default_device() {
283 return OMP_DEFAULT_DEVICE;//TODO
284}
285
286/* returns the number of target devices. When called from within a target region
287the effect of this routine is unspecified.*/
288int omp_get_num_devices() {
289 return 1;//TODO
290}
291
292/********************* helper functions for loop translation *************/
293
294/*
295For a team of p threads and a loop of n iterations, let   Upper( n⁄p )  be the integer q that
296 satisfies n = p*q - r, with 0 ≤ r < p . One compliant implementation of the static schedule
297 (with no specified chunk_size) would behave as though chunk_size had been specified with
298 value q. Another compliant implementation would assign q iterations to the first p-r threads,
299 and q-1 iterations to the remaining r threads. This illustrates why a conforming program must not rely on the details of a particular implementation.
300A compliant implementation of the guided schedule with a chunk_size value of k would assign q
301 = Upper(n ⁄ p) iterations to the first available thread and set n to the larger of n-q and p*k.
302 It would then repeat this process until q is greater than or equal to the number of
303 remaining iterations, at which time the remaining iterations form the final chunk.
304 Another compliant implementation could use the same method, except with
305q = Upper(n⁄(2p)) , and set n to the larger of n-q and 2*p*k.
306*/
307
308/* Common functions for translating for loops */
309// computes the start index for a given thread
310int __for_start(int tid, int total) {
311 return ((total/OMP_NUM_THREADS) * tid);
312}
313
314//computes the end index for a given thread
315int __for_end(int tid, int total) {
316 return ((total/OMP_NUM_THREADS) * (tid + 1));
317}
318
319//computes the extra index for a given thread
320int __for_extra(int tid, int total) {
321 int offset = total % OMP_NUM_THREADS;
322
323 if(tid < offset) {
324 return total - offset + tid;
325 }
326 return 0;
327}
328
329/********************* barrier implementation *************/
330
331void __barrier_init() {
332 for (int i=0; i<OMP_NUM_THREADS; i++) in_barrier[i] = 0;
333}
334
335// model the synchronization of threads in the same block
336void __barrier(int tid) {
337 $atomic {
338 in_barrier[tid] = 1; // I am in the barrier
339 num_in_barrier++; // increment number in barrier
340 if (num_in_barrier == OMP_NUM_THREADS) { // I am last to enter
341 for (int i=0; i<OMP_NUM_THREADS; i++) in_barrier[i] = 0; // release all
342 num_in_barrier = 0; // now none are in barrier
343 }
344 }
345 $when (in_barrier[tid] == 0); // wait till I am released
346 }
Note: See TracBrowser for help on using the repository browser.