/*BHEADER********************************************************************** * Copyright (c) 2008, Lawrence Livermore National Security, LLC. * Produced at the Lawrence Livermore National Laboratory. * This file is part of HYPRE. See file COPYRIGHT for details. * * HYPRE is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License (as published by the Free * Software Foundation) version 2.1 dated February 1999. * * $Revision: 2.4 $ ***********************************************************************EHEADER*/ /****************************************************************************** * * Header info for the Box structures * *****************************************************************************/ #ifdef HYPRE_USE_PTHREADS #ifndef hypre_BOX_PTHREADS_HEADER #define hypre_BOX_PTHREADS_HEADER #include #include "threading.h" extern volatile int hypre_thread_counter; extern int iteration_counter; /*-------------------------------------------------------------------------- * Threaded Looping macros: *--------------------------------------------------------------------------*/ #ifndef CHUNK_GOAL #define CHUNK_GOAL (hypre_NumThreads*1) #endif #ifndef MIN_VOL #define MIN_VOL 125 #endif #ifndef MAX_VOL #define MAX_VOL 64000 #endif #define hypre_BoxLoopDeclare(loop_size, data_box, stride, iinc, jinc, kinc) \ int iinc = (hypre_IndexX(stride));\ int jinc = (hypre_IndexY(stride)*hypre_BoxSizeX(data_box) -\ hypre_IndexX(loop_size)*hypre_IndexX(stride));\ int kinc = (hypre_IndexZ(stride)*\ hypre_BoxSizeX(data_box)*hypre_BoxSizeY(data_box) -\ hypre_IndexY(loop_size)*\ hypre_IndexY(stride)*hypre_BoxSizeX(data_box)) #define vol_cbrt(vol) (int) pow((double)(vol), 1. / 3.) #define hypre_ThreadLoopBegin(local_counter, init_val, stop_val, tl_index,\ tl_mtx, tl_body)\ for (local_counter = ifetchadd(&tl_index, &tl_mtx) + init_val;\ local_counter < stop_val;\ local_counter = ifetchadd(&tl_index, &tl_mtx) + init_val)\ {\ tl_body; #define hypre_ThreadLoop(tl_index,\ tl_count, tl_release, tl_mtx)\ if (pthread_equal(initial_thread, pthread_self()) == 0)\ {\ pthread_mutex_lock(&tl_mtx);\ tl_count++;\ if (tl_count < hypre_NumThreads)\ {\ pthread_mutex_unlock(&tl_mtx);\ while (!tl_release);\ pthread_mutex_lock(&tl_mtx);\ tl_count--;\ pthread_mutex_unlock(&tl_mtx);\ while (tl_release);\ }\ else\ {\ tl_count--;\ tl_index = 0;\ pthread_mutex_unlock(&tl_mtx);\ tl_release = 1;\ while (tl_count);\ tl_release = 0;\ }\ }\ else\ tl_index = 0 #define hypre_ThreadLoopOld(local_counter, init_val, stop_val, tl_index,\ tl_count, tl_release, tl_mtx, tl_body)\ {\ for (local_counter = ifetchadd(&tl_index, &tl_mtx) + init_val;\ local_counter < stop_val;\ local_counter = ifetchadd(&tl_index, &tl_mtx) + init_val)\ {\ tl_body;\ }\ if (pthread_equal(initial_thread, pthread_self()) == 0)\ {\ pthread_mutex_lock(&tl_mtx);\ tl_count++;\ if (tl_count < hypre_NumThreads)\ {\ pthread_mutex_unlock(&tl_mtx);\ while (!tl_release);\ pthread_mutex_lock(&tl_mtx);\ tl_count--;\ pthread_mutex_unlock(&tl_mtx);\ while (tl_release);\ }\ else\ {\ tl_count--;\ tl_index = 0;\ pthread_mutex_unlock(&tl_mtx);\ tl_release = 1;\ while (tl_count);\ tl_release = 0;\ }\ }\ else\ tl_index = 0;\ } #define hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz)\ int target_vol, target_area, target_len;\ int cbrt_tar_vol, sqrt_tar_area;\ int edge_divisor;\ int znumchunk, ynumchunk, xnumchunk;\ int hypre__cz, hypre__cy, hypre__cx;\ int numchunks;\ int clfreq[3], clreset[3];\ int clstart[3];\ int clfinish[3];\ int chunkcount;\ target_vol = hypre_min(hypre_max((hypre__nx * hypre__ny * hypre__nz) / CHUNK_GOAL,\ MIN_VOL), MAX_VOL);\ cbrt_tar_vol = (int) (pow ((double)target_vol, 1./3.));\ edge_divisor = hypre__nz / cbrt_tar_vol + !!(hypre__nz % cbrt_tar_vol);\ hypre__cz = hypre__nz / edge_divisor + !!(hypre__nz % edge_divisor);\ znumchunk = hypre__nz / hypre__cz + !!(hypre__nz % hypre__cz);\ target_area = target_vol / hypre__cz;\ sqrt_tar_area = (int) (sqrt((double)target_area));\ edge_divisor = hypre__ny / sqrt_tar_area + !!(hypre__ny % sqrt_tar_area);\ hypre__cy = hypre__ny / edge_divisor + !!(hypre__ny % edge_divisor);\ ynumchunk = hypre__ny / hypre__cy + !!(hypre__ny % hypre__cy);\ target_len = target_area / hypre__cy;\ edge_divisor = hypre__nx / target_len + !!(hypre__nx % target_len);\ hypre__cx = hypre__nx / edge_divisor + !!(hypre__nx % edge_divisor);\ xnumchunk = hypre__nx / hypre__cx + !!(hypre__nx % hypre__cx);\ numchunks = znumchunk * ynumchunk * xnumchunk;\ clfreq[0] = 1;\ clreset[0] = xnumchunk;\ clfreq[1] = clreset[0];\ clreset[1] = ynumchunk * xnumchunk;\ clfreq[2] = clreset[1];\ clreset[2] = znumchunk * ynumchunk * xnumchunk #define hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\ hypre__nx, hypre__ny, hypre__nz,\ hypre__cx, hypre__cy, hypre__cz,\ chunkcount)\ clstart[0] = ((chunkcount % clreset[0]) / clfreq[0]) * hypre__cx;\ if (clstart[0] < hypre__nx - hypre__cx)\ clfinish[0] = clstart[0] + hypre__cx;\ else\ clfinish[0] = hypre__nx;\ clstart[1] = ((chunkcount % clreset[1]) / clfreq[1]) * hypre__cy;\ if (clstart[1] < hypre__ny - hypre__cy)\ clfinish[1] = clstart[1] + hypre__cy;\ else\ clfinish[1] = hypre__ny;\ clstart[2] = ((chunkcount % clreset[2]) / clfreq[2]) * hypre__cz;\ if (clstart[2] < hypre__nz - hypre__cz)\ clfinish[2] = clstart[2] + hypre__cz;\ else\ clfinish[2] = hypre__nz #define hypre_BoxLoop0Begin(loop_size)\ {\ int hypre__nx = hypre_IndexX(loop_size);\ int hypre__ny = hypre_IndexY(loop_size);\ int hypre__nz = hypre_IndexZ(loop_size);\ if (hypre__nx && hypre__ny && hypre__nz )\ {\ hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\ hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\ hypre_mutex_boxloops,\ hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\ hypre__nx, hypre__ny, hypre__nz,\ hypre__cx, hypre__cy, hypre__cz,\ chunkcount)); #define hypre_BoxLoop0For(i, j, k)\ for (k = clstart[2]; k < clfinish[2]; k++ )\ {\ for (j = clstart[1]; j < clfinish[1]; j++ )\ {\ for (i = clstart[0]; i < clfinish[0]; i++ )\ { #define hypre_BoxLoop0End() }}}hypre_ThreadLoop(iteration_counter,\ hypre_thread_counter, hypre_thread_release,\ hypre_mutex_boxloops);}}} #define hypre_BoxLoop1Begin(loop_size,\ data_box1, start1, stride1, i1)\ {\ hypre_BoxLoopDeclare(loop_size, data_box1, stride1,\ hypre__iinc1, hypre__jinc1, hypre__kinc1);\ int hypre__nx = hypre_IndexX(loop_size);\ int hypre__ny = hypre_IndexY(loop_size);\ int hypre__nz = hypre_IndexZ(loop_size);\ int orig_i1 = hypre_BoxIndexRank(data_box1, start1);\ if (hypre__nx && hypre__ny && hypre__nz )\ {\ hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\ hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\ hypre_mutex_boxloops,\ hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\ hypre__nx, hypre__ny, hypre__nz,\ hypre__cx, hypre__cy, hypre__cz,\ chunkcount)); #define hypre_BoxLoop1For(i, j, k, i1)\ for (k = clstart[2]; k < clfinish[2]; k++)\ {\ for (j = clstart[1]; j < clfinish[1]; j++)\ {\ for (i = clstart[0]; i < clfinish[0]; i++)\ {\ i1 = orig_i1 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc1 +\ (j + hypre__ny*k)*hypre__jinc1 + k*hypre__kinc1; #define hypre_BoxLoop1End(i1) }}}hypre_ThreadLoop(iteration_counter,\ hypre_thread_counter, hypre_thread_release,\ hypre_mutex_boxloops);}}} #define hypre_BoxLoop2Begin(loop_size,\ data_box1, start1, stride1, i1,\ data_box2, start2, stride2, i2)\ {\ hypre_BoxLoopDeclare(loop_size, data_box1, stride1,\ hypre__iinc1, hypre__jinc1, hypre__kinc1);\ hypre_BoxLoopDeclare(loop_size, data_box2, stride2,\ hypre__iinc2, hypre__jinc2, hypre__kinc2);\ int hypre__nx = hypre_IndexX(loop_size);\ int hypre__ny = hypre_IndexY(loop_size);\ int hypre__nz = hypre_IndexZ(loop_size);\ int orig_i1 = hypre_BoxIndexRank(data_box1, start1);\ int orig_i2 = hypre_BoxIndexRank(data_box2, start2);\ if (hypre__nx && hypre__ny && hypre__nz )\ {\ hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\ hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\ hypre_mutex_boxloops,\ hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\ hypre__nx, hypre__ny, hypre__nz,\ hypre__cx, hypre__cy, hypre__cz,\ chunkcount)) #define hypre_BoxLoop2For(i, j, k, i1, i2)\ for (k = clstart[2]; k < clfinish[2]; k++)\ {\ for (j = clstart[1]; j < clfinish[1]; j++)\ {\ for (i = clstart[0]; i < clfinish[0]; i++)\ {\ i1 = orig_i1 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc1 +\ (j + hypre__ny*k)*hypre__jinc1 + k*hypre__kinc1;\ i2 = orig_i2 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc2 +\ (j + hypre__ny*k)*hypre__jinc2 + k*hypre__kinc2; #define hypre_BoxLoop2End(i1, i2) }}}hypre_ThreadLoop(iteration_counter,\ hypre_thread_counter, hypre_thread_release,\ hypre_mutex_boxloops);}}} #define hypre_BoxLoop3Begin(loop_size,\ data_box1, start1, stride1, i1,\ data_box2, start2, stride2, i2,\ data_box3, start3, stride3, i3)\ {\ hypre_BoxLoopDeclare(loop_size, data_box1, stride1,\ hypre__iinc1, hypre__jinc1, hypre__kinc1);\ hypre_BoxLoopDeclare(loop_size, data_box2, stride2,\ hypre__iinc2, hypre__jinc2, hypre__kinc2);\ hypre_BoxLoopDeclare(loop_size, data_box3, stride3,\ hypre__iinc3, hypre__jinc3, hypre__kinc3);\ int hypre__nx = hypre_IndexX(loop_size);\ int hypre__ny = hypre_IndexY(loop_size);\ int hypre__nz = hypre_IndexZ(loop_size);\ int orig_i1 = hypre_BoxIndexRank(data_box1, start1);\ int orig_i2 = hypre_BoxIndexRank(data_box2, start2);\ int orig_i3 = hypre_BoxIndexRank(data_box3, start3);\ if (hypre__nx && hypre__ny && hypre__nz )\ {\ hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\ hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\ hypre_mutex_boxloops,\ hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\ hypre__nx, hypre__ny, hypre__nz,\ hypre__cx, hypre__cy, hypre__cz,\ chunkcount)) #define hypre_BoxLoop3For(i, j, k, i1, i2, i3)\ for (k = clstart[2]; k < clfinish[2]; k++)\ {\ for (j = clstart[1]; j < clfinish[1]; j++)\ {\ for (i = clstart[0]; i < clfinish[0]; i++)\ {\ i1 = orig_i1 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc1 +\ (j + hypre__ny*k)*hypre__jinc1 + k*hypre__kinc1;\ i2 = orig_i2 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc2 +\ (j + hypre__ny*k)*hypre__jinc2 + k*hypre__kinc2;\ i3 = orig_i3 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc3 +\ (j + hypre__ny*k)*hypre__jinc3 + k*hypre__kinc3;\ #define hypre_BoxLoop3End(i1, i2, i3) }}}hypre_ThreadLoop(iteration_counter,\ hypre_thread_counter, hypre_thread_release,\ hypre_mutex_boxloops);}}} #define hypre_BoxLoop4Begin(loop_size,\ data_box1, start1, stride1, i1,\ data_box2, start2, stride2, i2,\ data_box3, start3, stride3, i3,\ data_box4, start4, stride4, i4)\ {\ hypre_BoxLoopDeclare(loop_size, data_box1, stride1,\ hypre__iinc1, hypre__jinc1, hypre__kinc1);\ hypre_BoxLoopDeclare(loop_size, data_box2, stride2,\ hypre__iinc2, hypre__jinc2, hypre__kinc2);\ hypre_BoxLoopDeclare(loop_size, data_box3, stride3,\ hypre__iinc3, hypre__jinc3, hypre__kinc3);\ hypre_BoxLoopDeclare(loop_size, data_box4, stride4,\ hypre__iinc4, hypre__jinc4, hypre__kinc4);\ int hypre__nx = hypre_IndexX(loop_size);\ int hypre__ny = hypre_IndexY(loop_size);\ int hypre__nz = hypre_IndexZ(loop_size);\ int orig_i1 = hypre_BoxIndexRank(data_box1, start1);\ int orig_i2 = hypre_BoxIndexRank(data_box2, start2);\ int orig_i3 = hypre_BoxIndexRank(data_box3, start3);\ int orig_i4 = hypre_BoxIndexRank(data_box4, start4);\ if (hypre__nx && hypre__ny && hypre__nz )\ {\ hypre_ChunkLoopExternalSetup(hypre__nx, hypre__ny, hypre__nz);\ hypre_ThreadLoopBegin(chunkcount, 0, numchunks, iteration_counter,\ hypre_mutex_boxloops,\ hypre_ChunkLoopInternalSetup(clstart, clfinish, clreset, clfreq,\ hypre__nx, hypre__ny, hypre__nz,\ hypre__cx, hypre__cy, hypre__cz,\ chunkcount)) #define hypre_BoxLoop4For(i, j, k, i1, i2, i3, i4)\ for (k = clstart[2]; k < clfinish[2]; k++)\ {\ for (j = clstart[1]; j < clfinish[1]; j++)\ {\ for (i = clstart[0]; i < clfinish[0]; i++)\ {\ i1 = orig_i1 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc1 +\ (j + hypre__ny*k)*hypre__jinc1 + k*hypre__kinc1;\ i2 = orig_i2 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc2 +\ (j + hypre__ny*k)*hypre__jinc2 + k*hypre__kinc2;\ i3 = orig_i3 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc3 +\ (j + hypre__ny*k)*hypre__jinc3 + k*hypre__kinc3;\ i4 = orig_i4 +\ (i + hypre__nx*j + hypre__nx*hypre__ny*k)*hypre__iinc4 +\ (j + hypre__ny*k)*hypre__jinc4 + k*hypre__kinc4;\ #define hypre_BoxLoop4End(i1, i2, i3, i4) }}}hypre_ThreadLoop(iteration_counter,\ hypre_thread_counter, hypre_thread_release,\ hypre_mutex_boxloops);}}} #endif #endif