Context Navigation

omp.cvh@ bb03188

main test-branch

Last change on this file since bb03188 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 12.3 KB

Line
1	// global variables and procedures shared by all threads.
2
3	/******************* Types ***********/
4
5	typedef void omp_lock_t; / represented as a pointer */
6
7	typedef void omp_nest_lock_t; / represented as a pointer */
8
9	typedef enum omp_sched_t {
10	omp_sched_static = 1,
11	omp_sched_dynamic = 2,
12	omp_sched_guided = 3,
13	omp_sched_auto = 4
14	} omp_sched_t;
15
16	typedef enum omp_proc_bind_t {
17	omp_proc_bind_false = 0,
18	omp_proc_bind_true = 1,
19	omp_proc_bind_master = 2,
20	omp_proc_bind_close = 3,
21	omp_proc_bind_spread = 4
22	} omp_proc_bind_t;
23
24	/******************* Internal Control Variables ***********/
25
26	/* dyn-var: whether dynamic adjustment of the number of threads is enabled for encountered
27	parallel regions. There is one copy of this ICV per data environment.
28	The initial value of dyn-var is implementation defined if the implementation supports
29	dynamic adjustment of the number of threads; otherwise, the initial value is false. */
30	int OMP_DYNAMIC = 0; //
31
32	/* nest-var: whether nested parallelism is enabled for encountered parallel regions.
33	There is one copy of this ICV per data environment. */
34	int OMP_NESTED = 0;
35
36	/* nthreads-var: controls the number of threads requested for encountered parallel regions.
37	There is one copy of this ICV per data environment. */
38	int OMP_NUM_THREADS;
39
40	/* thread-limit-var: controls the maximum number of threads participating in the contention group.
41	There is one copy of this ICV per data environment. */
42	int OMP_THREAD_LIMIT;
43
44	/* max-active-levels-var: controls the maximum number of nested active parallel regions.
45	There is one copy of this ICV per data environment.
46	The initial value of max-active-levels-var is the number of levels of parallelism that
47	the implementation supports. */
48	int OMP_MAX_ACTIVE_LEVELS;
49
50	/* place-partition-var : controls the place partition available to the execution
51	environment for encountered parallel regions.
52	There is one copy of this ICV per data environment. */
53	int OMP_PLACES;
54
55	/* the number of nested, active parallel regions enclosing the current task such that
56	all of the parallel regions are enclosed by the outermost initial task region on
57	the current device. There is one copy of this ICV per data environment. */
58	int active_levels_var = 0;
59
60	/* the number of nested parallel regions enclosing the current task such that all of
61	the parallel regions are enclosed by the outermost initial task region on the current
62	device. There is one copy of this ICV per data environment. */
63	int levels_var = 0;
64
65	/* bind-var: controls the binding of OpenMP threads to places. When binding is requested, the
66	variable indicates that the execution environment is advised not to move threads between
67	places. The variable can also provide default thread affinity policies.
68	There is one copy of this ICV per data environment. */
69	omp_proc_bind_t OMP_PROC_BIND;
70
71	/* The following ICVs store values that affect the operation of loop regions. */
72
73	/* run-sched-var: controls the schedule that the runtime schedule clause uses for loop regions.
74	There is one copy of this ICV per data environment. */
75	omp_sched_t OMP_SCHEDULE;
76
77	/* the chunk size */
78	int CHUNK_SIZE;
79
80	/* controls the implementation defined default scheduling of loop regions.
81	There is one copy of this ICV per data environment. */
82	omp_sched_t def_sched_var;
83
84	/* stacksize-var: controls the stack size for threads that the OpenMP implementation
85	creates. There is one copy of this ICV per data environment. */
86	int OMP_STACKSIZE;
87
88	/* wait-policy-var: controls the desired behavior of waiting threads.
89	There is one copy of this ICV per data environment. */
90	int OMP_WAIT_POLICY;
91
92	/* cancel-var : controls the desired behavior of the cancel construct and cancellation points.
93	There is one copy of this ICV per data environment. */
94	int OMP_CANCELLATION = 0;
95
96	/* default-device-var: controls the default target device.
97	There is one copy of this ICV per data environment. */
98	int OMP_DEFAULT_DEVICE;
99
100	/******************* implicit variables ***********/
101	int in_barrier[1024];
102	int num_in_barrier = 0;
103
104	/**** routines for modifying and retrieving the values of ICVs ****/
105
106	/*void omp_set_dynamic(_Bool value) {
107	OMP_DYNAMIC = value;
108	}*/
109
110	/*_Bool omp_get_dynamic() {
111	return OMP_DYNAMIC;
112	}*/
113
114	/*void omp_set_nested(_Bool value) {
115	OMP_NESTED = value;
116	}*/
117
118	/*_Bool omp_get_nested() {
119	return OMP_NESTED;
120	}*/
121
122	/* affects the number of threads to be used for subsequent parallel regions that do not
123	specify a num_threads clause, by setting the value of the first element of the
124	nthreads-var ICV of the current task. */
125	void omp_set_num_threads(int value) {
126	OMP_NUM_THREADS = value;
127	printf("NUM_THREADS is %d\n", OMP_NUM_THREADS);
128	}
129
130	/* returns an upper bound on the number of threads that could be used to form
131	a new team if a parallel construct without a num_threads clause were encountered
132	after execution returns from this routine. */
133	int omp_get_max_threads() {
134	return OMP_NUM_THREADS;
135	}
136
137	/* returns the number of threads in the current team. */
138	int omp_get_num_threads() {
139	return OMP_NUM_THREADS;
140	}
141
142	/* returns the number of processors available to the device. */
143	int omp_get_num_procs() {
144	return 1;//TODO
145	}
146
147	/* returns true if the active-levels-var ICV is greater than zero;
148	otherwise, it returns false. */
149	int omp_in_parallel() {
150	return active_levels_var;
151	}
152
153	/* enables or disables dynamic adjustment of the number of threads available
154	for the execution of subsequent parallel regions by setting the value of
155	the dyn-var ICV. */
156	void omp_set_dynamic(int dynamic_threads) {
157	return; //do not support dynamic schedule
158	}
159
160	/* returns the value of the dyn-var ICV, which determines whether dynamic
161	adjustment of the number of threads is enabled or disabled. */
162	int omp_get_dynamic() {
163	return OMP_DYNAMIC;
164	}
165
166	/* returns the value of the cancel-var ICV, which controls the behavior of
167	the cancel construct and cancellation points. */
168	int omp_get_cancellation() {
169	return OMP_CANCELLATION;
170	}
171
172	/* enables or disables nested parallelism, by setting the nest-var ICV. */
173	void omp_set_nested(int nested) {
174	OMP_NESTED = nested;
175	}
176
177	/* returns the value of the nest-var ICV, which determines if nested
178	parallelism is enabled or disabled. */
179	int omp_get_nested(void) {
180	return OMP_NESTED;
181	}
182
183	/* affects the schedule that is applied when runtime is used as schedule kind,
184	by setting the value of the run-sched-var ICV.
185	For the schedule types static, dynamic, and guided the chunk_size is set to the value
186	of the second argument, or to the default chunk_size if the value of the second
187	argument is less than 1; for the schedule type auto the second argument has no meaning;
188	for implementation specific schedule types, the values and associated meanings of
189	the second argument are implementation defined. */
190	void omp_set_schedule(omp_sched_t kind, int modifier) {
191	OMP_SCHEDULE = kind;
192	/*switch(kind) {
193	case omp_sched_static:
194	case omp_sched_dynamic:
195	case omp_sched_guided:
196	CHUNK_SIZE = modifier;
197	break;
198	default:
199	}*/
200	}
201
202	/* returns the schedule that is applied when the runtime schedule is used. */
203	void omp_get_schedule(omp_sched_t * kind, int * modifier) {
204	*kind = OMP_SCHEDULE;
205	*modifier = CHUNK_SIZE;
206	}
207
208	/* returns the maximum number of OpenMP threads available on the device. */
209	int omp_get_thread_limit() {
210	return OMP_THREAD_LIMIT;
211	}
212
213	/* imits the number of nested active parallel regions on the device, by setting the
214	max-active-levels-var ICV. */
215	void omp_set_max_active_levels (int max_levels) {
216	OMP_MAX_ACTIVE_LEVELS = max_levels;
217	}
218
219	/* returns the value of the max-active-levels-var ICV, which determines the
220	maximum number of nested active parallel regions on the device. */
221	int omp_get_max_active_levels() {
222	return OMP_MAX_ACTIVE_LEVELS;
223	}
224
225	/* returns the value of the levels-var ICV. */
226	int omp_get_level() {
227	return levels_var;
228	}
229
230	/* returns, for a given nested level of the current thread,
231	the thread number of the ancestor of the current thread.
232	returns the thread number of the ancestor at a given nest level of the current thread or
233	the thread number of the current thread. If the requested nest level is outside the range
234	of 0 and the nest level of the current thread, as returned by the omp_get_level routine,
235	the routine returns -1. */
236	int omp_get_ancestor_thread_num(int level) {
237	if(levels_var < level \|\| level < 0)
238	return -1;
239	return 0; //TODO
240	}
241
242	/* returns, for a given nested level of the current thread, the size of the thread team to
243	which the ancestor or the current thread belongs.
244	returns the size of the thread team to which the ancestor or the current thread belongs.
245	If the requested nested level is outside the range of 0 and the nested level of the current
246	thread, as returned by the omp_get_level routine, the routine returns -1.
247	Inactive parallel regions are regarded like active parallel
248	regions executed with one thread. */
249	int omp_get_team_size(int level) {
250	if(levels_var < level \|\| level < 0)
251	return -1;
252	return OMP_NUM_THREADS; //TODO
253	}
254
255	/* returns the value of the active-level-vars ICV.
256	return the number of nested, active parallel regions enclosing the current task such
257	that all of the parallel regions are enclosed by the outermost initial task region on
258	the current device. */
259	int omp_get_active_level() {
260	return active_levels_var;
261	}
262
263	/* returns true if the enclosing task region is final. Otherwise, it returns false. */
264	int omp_in_final() {
265	return 1;//TODO
266	}
267
268	/* returns the thread affinity policy to be used for the subsequent nested parallel
269	regions that do not specify a proc_bind clause. */
270	omp_proc_bind_t omp_get_proc_bind(void) {
271	return OMP_PROC_BIND;
272	}
273
274	/* controls the default target device by assigning the value of the default-device-var ICV.
275	When called from within a target region the effect of this routine is unspecified. */
276	void omp_set_default_device(int device_num) {
277	OMP_DEFAULT_DEVICE = device_num;//TODO
278	}
279
280	/* returns the default target device. When called from within a target region the effect of
281	this routine is unspecified. */
282	int omp_get_default_device() {
283	return OMP_DEFAULT_DEVICE;//TODO
284	}
285
286	/* returns the number of target devices. When called from within a target region
287	the effect of this routine is unspecified.*/
288	int omp_get_num_devices() {
289	return 1;//TODO
290	}
291
292	/******************* helper functions for loop translation ***********/
293
294	/*
295	For a team of p threads and a loop of n iterations, let Upper( n⁄p ) be the integer q that
296	satisfies n = p*q - r, with 0 ≤ r < p . One compliant implementation of the static schedule
297	(with no specified chunk_size) would behave as though chunk_size had been specified with
298	value q. Another compliant implementation would assign q iterations to the first p-r threads,
299	and q-1 iterations to the remaining r threads. This illustrates why a conforming program must not rely on the details of a particular implementation.
300	A compliant implementation of the guided schedule with a chunk_size value of k would assign q
301	= Upper(n ⁄ p) iterations to the first available thread and set n to the larger of n-q and p*k.
302	It would then repeat this process until q is greater than or equal to the number of
303	remaining iterations, at which time the remaining iterations form the final chunk.
304	Another compliant implementation could use the same method, except with
305	q = Upper(n⁄(2p)) , and set n to the larger of n-q and 2pk.
306	*/
307
308	/* Common functions for translating for loops */
309	// computes the start index for a given thread
310	int __for_start(int tid, int total) {
311	return ((total/OMP_NUM_THREADS) * tid);
312	}
313
314	//computes the end index for a given thread
315	int __for_end(int tid, int total) {
316	return ((total/OMP_NUM_THREADS) * (tid + 1));
317	}
318
319	//computes the extra index for a given thread
320	int __for_extra(int tid, int total) {
321	int offset = total % OMP_NUM_THREADS;
322
323	if(tid < offset) {
324	return total - offset + tid;
325	}
326	return 0;
327	}
328
329	/******************* barrier implementation ***********/
330
331	void __barrier_init() {
332	for (int i=0; i<OMP_NUM_THREADS; i++) in_barrier[i] = 0;
333	}
334
335	// model the synchronization of threads in the same block
336	void __barrier(int tid) {
337	$atomic {
338	in_barrier[tid] = 1; // I am in the barrier
339	num_in_barrier++; // increment number in barrier
340	if (num_in_barrier == OMP_NUM_THREADS) { // I am last to enter
341	for (int i=0; i<OMP_NUM_THREADS; i++) in_barrier[i] = 0; // release all
342	num_in_barrier = 0; // now none are in barrier
343	}
344	}
345	$when (in_barrier[tid] == 0); // wait till I am released
346	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format