source: CIVL/examples/mpi-omp/AMG2013/struct_mv/box_pthreads.h@ beab7f2

main test-branch
Last change on this file since beab7f2 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 15.8 KB
Line 
1/*BHEADER**********************************************************************
2 * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3 * Produced at the Lawrence Livermore National Laboratory.
4 * This file is part of HYPRE. See file COPYRIGHT for details.
5 *
6 * HYPRE is free software; you can redistribute it and/or modify it under the
7 * terms of the GNU Lesser General Public License (as published by the Free
8 * Software Foundation) version 2.1 dated February 1999.
9 *
10 * $Revision: 2.4 $
11 ***********************************************************************EHEADER*/
12
13
14/******************************************************************************
15 *
16 * Header info for the Box structures
17 *
18 *****************************************************************************/
19
20#ifdef HYPRE_USE_PTHREADS
21
22#ifndef hypre_BOX_PTHREADS_HEADER
23#define hypre_BOX_PTHREADS_HEADER
24
25#include <pthread.h>
26#include "threading.h"
27
28
29extern volatile int hypre_thread_counter;
30extern int iteration_counter;
31
32/*--------------------------------------------------------------------------
33 * Threaded Looping macros:
34 *--------------------------------------------------------------------------*/
35
36#ifndef CHUNK_GOAL
37#define CHUNK_GOAL (hypre_NumThreads*1)
38#endif
39#ifndef MIN_VOL
40#define MIN_VOL 125
41#endif
42#ifndef MAX_VOL
43#define MAX_VOL 64000
44#endif
45
46#define hypre_BoxLoopDeclare(loop_size, data_box, stride, iinc, jinc, kinc) \
47int iinc = (hypre_IndexX(stride));\
48int jinc = (hypre_IndexY(stride)*hypre_BoxSizeX(data_box) -\
49 hypre_IndexX(loop_size)*hypre_IndexX(stride));\
50int kinc = (hypre_IndexZ(stride)*\
51 hypre_BoxSizeX(data_box)*hypre_BoxSizeY(data_box) -\
52 hypre_IndexY(loop_size)*\
53 hypre_IndexY(stride)*hypre_BoxSizeX(data_box))
54
55#define vol_cbrt(vol) (int) pow((double)(vol), 1. / 3.)
56
57#define hypre_ThreadLoopBegin(local_counter, init_val, stop_val, tl_index,\
58 tl_mtx, tl_body)\
59 for (local_counter = ifetchadd(&tl_index, &tl_mtx) + init_val;\
60 local_counter < stop_val;\
61 local_counter = ifetchadd(&tl_index, &tl_mtx) + init_val)\
62 {\
63 tl_body;
64
65#define hypre_ThreadLoop(tl_index,\
66 tl_count, tl_release, tl_mtx)\
67 if (pthread_equal(initial_thread, pthread_self()) == 0)\
68 {\
69 pthread_mutex_lock(&tl_mtx);\
70 tl_count++;\
71 if (tl_count < hypre_NumThreads)\
72 {\
73 pthread_mutex_unlock(&tl_mtx);\
74 while (!tl_release);\
75 pthread_mutex_lock(&tl_mtx);\
76 tl_count--;\
77 pthread_mutex_unlock(&tl_mtx);\
78 while (tl_release);\
79 }\
80 else\
81 {\
82 tl_count--;\
83 tl_index = 0;\
84 pthread_mutex_unlock(&tl_mtx);\
85 tl_release = 1;\
86 while (tl_count);\
87 tl_release = 0;\
88 }\
89 }\
90 else\
91 tl_index = 0
92
93#define hypre_ThreadLoopOld(local_counter, init_val, stop_val, tl_index,\
94 tl_count, tl_release, tl_mtx, tl_body)\
95{\
96 for (local_counter = ifetchadd(&tl_index, &tl_mtx) + init_val;\
97 local_counter < stop_val;\
98 local_counter = ifetchadd(&tl_index, &tl_mtx) + init_val)\
99 {\
100 tl_body;\
101 }\
102 if (pthread_equal(initial_thread, pthread_self()) == 0)\
103 {\
104 pthread_mutex_lock(&tl_mtx);\
105 tl_count++;\
106 if (tl_count < hypre_NumThreads)\
107 {\
108 pthread_mutex_unlock(&tl_mtx);\
109 while (!tl_release);\
110 pthread_mutex_lock(&tl_mtx);\
111 tl_count--;\
112 pthread_mutex_unlock(&tl_mtx);\
113 while (tl_release);\
114 }\
115 else\
116 {\
117 tl_count--;\
118 tl_index = 0;\
119 pthread_mutex_unlock(&tl_mtx);\
120 tl_release = 1;\
121 while (tl_count);\
122 tl_release = 0;\
123 }\
124 }\
125 else\
126 tl_index = 0;\
127}
128
129#define hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz)\
130 int target_vol, target_area, target_len;\
131 int cbrt_tar_vol, sqrt_tar_area;\
132 int edge_divisor;\
133 int znumchunk, ynumchunk, xnumchunk;\
134 int hypre__cz, hypre__cy, hypre__cx;\
135 int numchunks;\
136 int clfreq[3], clreset[3];\
137 int clstart[3];\
138 int clfinish[3];\
139 int chunkcount;\
140 target_vol = hypre_min(hypre_max((hypre__nx * hypre__ny * hypre__nz) / CHUNK_GOAL,\
141 MIN_VOL), MAX_VOL);\
142 cbrt_tar_vol = (int) (pow ((double)target_vol, 1./3.));\
143 edge_divisor = hypre__nz / cbrt_tar_vol + !!(hypre__nz % cbrt_tar_vol);\
144 hypre__cz = hypre__nz / edge_divisor + !!(hypre__nz % edge_divisor);\
145 znumchunk = hypre__nz / hypre__cz + !!(hypre__nz % hypre__cz);\
146 target_area = target_vol / hypre__cz;\
147 sqrt_tar_area = (int) (sqrt((double)target_area));\
148 edge_divisor = hypre__ny / sqrt_tar_area + !!(hypre__ny % sqrt_tar_area);\
149 hypre__cy = hypre__ny / edge_divisor + !!(hypre__ny % edge_divisor);\
150 ynumchunk = hypre__ny / hypre__cy + !!(hypre__ny % hypre__cy);\
151 target_len = target_area / hypre__cy;\
152 edge_divisor = hypre__nx / target_len + !!(hypre__nx % target_len);\
153 hypre__cx = hypre__nx / edge_divisor + !!(hypre__nx % edge_divisor);\
154 xnumchunk = hypre__nx / hypre__cx + !!(hypre__nx % hypre__cx);\
155 numchunks = znumchunk * ynumchunk * xnumchunk;\
156 clfreq[0] = 1;\
157 clreset[0] = xnumchunk;\
158 clfreq[1] = clreset[0];\
159 clreset[1] = ynumchunk * xnumchunk;\
160 clfreq[2] = clreset[1];\
161 clreset[2] = znumchunk * ynumchunk * xnumchunk
162
163#define hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\
164 hypre__nx, hypre__ny, hypre__nz,\
165 hypre__cx, hypre__cy, hypre__cz,\
166 chunkcount)\
167 clstart[0] = ((chunkcount % clreset[0]) / clfreq[0]) * hypre__cx;\
168 if (clstart[0] < hypre__nx - hypre__cx)\
169 clfinish[0] = clstart[0] + hypre__cx;\
170 else\
171 clfinish[0] = hypre__nx;\
172 clstart[1] = ((chunkcount % clreset[1]) / clfreq[1]) * hypre__cy;\
173 if (clstart[1] < hypre__ny - hypre__cy)\
174 clfinish[1] = clstart[1] + hypre__cy;\
175 else\
176 clfinish[1] = hypre__ny;\
177 clstart[2] = ((chunkcount % clreset[2]) / clfreq[2]) * hypre__cz;\
178 if (clstart[2] < hypre__nz - hypre__cz)\
179 clfinish[2] = clstart[2] + hypre__cz;\
180 else\
181 clfinish[2] = hypre__nz
182
183#define hypre_BoxLoop0Begin(loop_size)\
184{\
185 int hypre__nx = hypre_IndexX(loop_size);\
186 int hypre__ny = hypre_IndexY(loop_size);\
187 int hypre__nz = hypre_IndexZ(loop_size);\
188 if (hypre__nx && hypre__ny && hypre__nz )\
189 {\
190 hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\
191 hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\
192 hypre_mutex_boxloops,\
193 hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\
194 hypre__nx, hypre__ny, hypre__nz,\
195 hypre__cx, hypre__cy, hypre__cz,\
196 chunkcount));
197
198#define hypre_BoxLoop0For(i, j, k)\
199 for (k = clstart[2]; k < clfinish[2]; k++ )\
200 {\
201 for (j = clstart[1]; j < clfinish[1]; j++ )\
202 {\
203 for (i = clstart[0]; i < clfinish[0]; i++ )\
204 {
205
206#define hypre_BoxLoop0End() }}}hypre_ThreadLoop(iteration_counter,\
207 hypre_thread_counter, hypre_thread_release,\
208 hypre_mutex_boxloops);}}}
209
210
211#define hypre_BoxLoop1Begin(loop_size,\
212 data_box1, start1, stride1, i1)\
213{\
214 hypre_BoxLoopDeclare(loop_size, data_box1, stride1,\
215 hypre__iinc1, hypre__jinc1, hypre__kinc1);\
216 int hypre__nx = hypre_IndexX(loop_size);\
217 int hypre__ny = hypre_IndexY(loop_size);\
218 int hypre__nz = hypre_IndexZ(loop_size);\
219 int orig_i1 = hypre_BoxIndexRank(data_box1, start1);\
220 if (hypre__nx && hypre__ny && hypre__nz )\
221 {\
222 hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\
223 hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\
224 hypre_mutex_boxloops,\
225 hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\
226 hypre__nx, hypre__ny, hypre__nz,\
227 hypre__cx, hypre__cy, hypre__cz,\
228 chunkcount));
229
230#define hypre_BoxLoop1For(i, j, k, i1)\
231 for (k = clstart[2]; k < clfinish[2]; k++)\
232 {\
233 for (j = clstart[1]; j < clfinish[1]; j++)\
234 {\
235 for (i = clstart[0]; i < clfinish[0]; i++)\
236 {\
237 i1 = orig_i1 +\
238 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc1 +\
239 (j + hypre__ny*k)*hypre__jinc1 + k*hypre__kinc1;
240
241#define hypre_BoxLoop1End(i1) }}}hypre_ThreadLoop(iteration_counter,\
242 hypre_thread_counter, hypre_thread_release,\
243 hypre_mutex_boxloops);}}}
244
245#define hypre_BoxLoop2Begin(loop_size,\
246 data_box1, start1, stride1, i1,\
247 data_box2, start2, stride2, i2)\
248{\
249 hypre_BoxLoopDeclare(loop_size, data_box1, stride1,\
250 hypre__iinc1, hypre__jinc1, hypre__kinc1);\
251 hypre_BoxLoopDeclare(loop_size, data_box2, stride2,\
252 hypre__iinc2, hypre__jinc2, hypre__kinc2);\
253 int hypre__nx = hypre_IndexX(loop_size);\
254 int hypre__ny = hypre_IndexY(loop_size);\
255 int hypre__nz = hypre_IndexZ(loop_size);\
256 int orig_i1 = hypre_BoxIndexRank(data_box1, start1);\
257 int orig_i2 = hypre_BoxIndexRank(data_box2, start2);\
258 if (hypre__nx && hypre__ny && hypre__nz )\
259 {\
260 hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\
261 hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\
262 hypre_mutex_boxloops,\
263 hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\
264 hypre__nx, hypre__ny, hypre__nz,\
265 hypre__cx, hypre__cy, hypre__cz,\
266 chunkcount))
267
268#define hypre_BoxLoop2For(i, j, k, i1, i2)\
269 for (k = clstart[2]; k < clfinish[2]; k++)\
270 {\
271 for (j = clstart[1]; j < clfinish[1]; j++)\
272 {\
273 for (i = clstart[0]; i < clfinish[0]; i++)\
274 {\
275 i1 = orig_i1 +\
276 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc1 +\
277 (j + hypre__ny*k)*hypre__jinc1 + k*hypre__kinc1;\
278 i2 = orig_i2 +\
279 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc2 +\
280 (j + hypre__ny*k)*hypre__jinc2 + k*hypre__kinc2;
281
282#define hypre_BoxLoop2End(i1, i2) }}}hypre_ThreadLoop(iteration_counter,\
283 hypre_thread_counter, hypre_thread_release,\
284 hypre_mutex_boxloops);}}}
285
286
287
288
289#define hypre_BoxLoop3Begin(loop_size,\
290 data_box1, start1, stride1, i1,\
291 data_box2, start2, stride2, i2,\
292 data_box3, start3, stride3, i3)\
293{\
294 hypre_BoxLoopDeclare(loop_size, data_box1, stride1,\
295 hypre__iinc1, hypre__jinc1, hypre__kinc1);\
296 hypre_BoxLoopDeclare(loop_size, data_box2, stride2,\
297 hypre__iinc2, hypre__jinc2, hypre__kinc2);\
298 hypre_BoxLoopDeclare(loop_size, data_box3, stride3,\
299 hypre__iinc3, hypre__jinc3, hypre__kinc3);\
300 int hypre__nx = hypre_IndexX(loop_size);\
301 int hypre__ny = hypre_IndexY(loop_size);\
302 int hypre__nz = hypre_IndexZ(loop_size);\
303 int orig_i1 = hypre_BoxIndexRank(data_box1, start1);\
304 int orig_i2 = hypre_BoxIndexRank(data_box2, start2);\
305 int orig_i3 = hypre_BoxIndexRank(data_box3, start3);\
306 if (hypre__nx && hypre__ny && hypre__nz )\
307 {\
308 hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\
309 hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\
310 hypre_mutex_boxloops,\
311 hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\
312 hypre__nx, hypre__ny, hypre__nz,\
313 hypre__cx, hypre__cy, hypre__cz,\
314 chunkcount))
315
316#define hypre_BoxLoop3For(i, j, k, i1, i2, i3)\
317 for (k = clstart[2]; k < clfinish[2]; k++)\
318 {\
319 for (j = clstart[1]; j < clfinish[1]; j++)\
320 {\
321 for (i = clstart[0]; i < clfinish[0]; i++)\
322 {\
323 i1 = orig_i1 +\
324 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc1 +\
325 (j + hypre__ny*k)*hypre__jinc1 + k*hypre__kinc1;\
326 i2 = orig_i2 +\
327 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc2 +\
328 (j + hypre__ny*k)*hypre__jinc2 + k*hypre__kinc2;\
329 i3 = orig_i3 +\
330 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc3 +\
331 (j + hypre__ny*k)*hypre__jinc3 + k*hypre__kinc3;\
332
333#define hypre_BoxLoop3End(i1, i2, i3) }}}hypre_ThreadLoop(iteration_counter,\
334 hypre_thread_counter, hypre_thread_release,\
335 hypre_mutex_boxloops);}}}
336
337
338#define hypre_BoxLoop4Begin(loop_size,\
339 data_box1, start1, stride1, i1,\
340 data_box2, start2, stride2, i2,\
341 data_box3, start3, stride3, i3,\
342 data_box4, start4, stride4, i4)\
343{\
344 hypre_BoxLoopDeclare(loop_size, data_box1, stride1,\
345 hypre__iinc1, hypre__jinc1, hypre__kinc1);\
346 hypre_BoxLoopDeclare(loop_size, data_box2, stride2,\
347 hypre__iinc2, hypre__jinc2, hypre__kinc2);\
348 hypre_BoxLoopDeclare(loop_size, data_box3, stride3,\
349 hypre__iinc3, hypre__jinc3, hypre__kinc3);\
350 hypre_BoxLoopDeclare(loop_size, data_box4, stride4,\
351 hypre__iinc4, hypre__jinc4, hypre__kinc4);\
352 int hypre__nx = hypre_IndexX(loop_size);\
353 int hypre__ny = hypre_IndexY(loop_size);\
354 int hypre__nz = hypre_IndexZ(loop_size);\
355 int orig_i1 = hypre_BoxIndexRank(data_box1, start1);\
356 int orig_i2 = hypre_BoxIndexRank(data_box2, start2);\
357 int orig_i3 = hypre_BoxIndexRank(data_box3, start3);\
358 int orig_i4 = hypre_BoxIndexRank(data_box4, start4);\
359 if (hypre__nx && hypre__ny && hypre__nz )\
360 {\
361 hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\
362 hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\
363 hypre_mutex_boxloops,\
364 hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\
365 hypre__nx, hypre__ny, hypre__nz,\
366 hypre__cx, hypre__cy, hypre__cz,\
367 chunkcount))
368
369#define hypre_BoxLoop4For(i, j, k, i1, i2, i3, i4)\
370 for (k = clstart[2]; k < clfinish[2]; k++)\
371 {\
372 for (j = clstart[1]; j < clfinish[1]; j++)\
373 {\
374 for (i = clstart[0]; i < clfinish[0]; i++)\
375 {\
376 i1 = orig_i1 +\
377 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc1 +\
378 (j + hypre__ny*k)*hypre__jinc1 + k*hypre__kinc1;\
379 i2 = orig_i2 +\
380 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc2 +\
381 (j + hypre__ny*k)*hypre__jinc2 + k*hypre__kinc2;\
382 i3 = orig_i3 +\
383 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc3 +\
384 (j + hypre__ny*k)*hypre__jinc3 + k*hypre__kinc3;\
385 i4 = orig_i4 +\
386 (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc4 +\
387 (j + hypre__ny*k)*hypre__jinc4 + k*hypre__kinc4;\
388
389
390#define hypre_BoxLoop4End(i1, i2, i3, i4) }}}hypre_ThreadLoop(iteration_counter,\
391 hypre_thread_counter, hypre_thread_release,\
392 hypre_mutex_boxloops);}}}
393
394
395#endif
396
397#endif
398
Note: See TracBrowser for help on using the repository browser.