/*BHEADER********************************************************************** * Copyright (c) 2008, Lawrence Livermore National Security, LLC. * Produced at the Lawrence Livermore National Laboratory. * This file is part of HYPRE. See file COPYRIGHT for details. * * HYPRE is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License (as published by the Free * Software Foundation) version 2.1 dated February 1999. * * $Revision: 2.4 $ ***********************************************************************EHEADER*/ #include "headers.h" #define DEBUG 0 #if DEBUG char filename[255]; FILE *file; #endif /* this computes a (large enough) size (in doubles) for the message prefix */ #define hypre_CommPrefixSize(ne) \ ( ((ne*sizeof(int) + ne*sizeof(hypre_Box))/sizeof(double)) + 1 ) /*-------------------------------------------------------------------------- * Create a communication package. A grid-based description of a * communication exchange is passed in. This description is then * compiled into an intermediate processor-based description of the * communication. The intermediate processor-based description is * used directly to pack and unpack buffers during the communications. * * Note: The input boxes and processes are destroyed. *--------------------------------------------------------------------------*/ int hypre_CommPkgCreate( hypre_CommInfo *comm_info, hypre_BoxArray *send_data_space, hypre_BoxArray *recv_data_space, int num_values, MPI_Comm comm, hypre_CommPkg **comm_pkg_ptr ) { int ierr = 0; hypre_BoxArrayArray *send_boxes; hypre_BoxArrayArray *recv_boxes; hypre_IndexRef send_stride; hypre_IndexRef recv_stride; int **send_processes; int **recv_processes; int **send_rboxnums; hypre_BoxArrayArray *send_rboxes; hypre_CommPkg *comm_pkg; hypre_CommType *comm_types; hypre_CommType *comm_type; hypre_CommEntryType *ct_entries; int *ct_loc_boxnums; int *ct_rem_boxnums; hypre_Box *ct_loc_boxes; hypre_Box *ct_rem_boxes; int *p_comm_types; int num_comms, num_entries, comm_bufsize; hypre_BoxArray *box_array; hypre_Box *box; hypre_BoxArray *rbox_array; hypre_Box *rbox; hypre_Box *data_box; int *data_offsets; int data_offset; int i, j, k, p, m, size; int num_procs, my_proc; /*------------------------------------------------------ *------------------------------------------------------*/ send_boxes = hypre_CommInfoSendBoxes(comm_info); recv_boxes = hypre_CommInfoRecvBoxes(comm_info); send_stride = hypre_CommInfoSendStride(comm_info); recv_stride = hypre_CommInfoRecvStride(comm_info); send_processes = hypre_CommInfoSendProcesses(comm_info); recv_processes = hypre_CommInfoRecvProcesses(comm_info); send_rboxnums = hypre_CommInfoSendRBoxnums(comm_info); send_rboxes = hypre_CommInfoSendRBoxes(comm_info); MPI_Comm_size(comm, &num_procs ); MPI_Comm_rank(comm, &my_proc ); /*------------------------------------------------------ * Set up various entries in CommPkg *------------------------------------------------------*/ comm_pkg = hypre_CTAlloc(hypre_CommPkg, 1); hypre_CommPkgComm(comm_pkg) = comm; hypre_CommPkgFirstSend(comm_pkg) = 1; hypre_CommPkgFirstRecv(comm_pkg) = 1; hypre_CommPkgNumValues(comm_pkg) = num_values; hypre_CopyIndex(send_stride, hypre_CommPkgSendStride(comm_pkg)); hypre_CopyIndex(recv_stride, hypre_CommPkgRecvStride(comm_pkg)); /*------------------------------------------------------ * Set up send CommType information *------------------------------------------------------*/ p_comm_types = hypre_CTAlloc(int, num_procs); /* set send_data_offsets and send_data_space */ data_offsets = hypre_TAlloc(int, hypre_BoxArraySize(send_data_space)); data_offset = 0; hypre_ForBoxI(i, send_data_space) { data_offsets[i] = data_offset; data_box = hypre_BoxArrayBox(send_data_space, i); data_offset += hypre_BoxVolume(data_box) * num_values; } /* compute num_comms and use -p_comm_types to compute num_entries */ num_comms = 0; num_entries = 0; hypre_ForBoxArrayI(i, send_boxes) { box_array = hypre_BoxArrayArrayBoxArray(send_boxes, i); hypre_ForBoxI(j, box_array) { box = hypre_BoxArrayBox(box_array, j); p = send_processes[i][j]; if (hypre_BoxVolume(box) != 0) { p_comm_types[p]--; num_entries++; if ((p_comm_types[p] == -1) && (p != my_proc)) { num_comms++; } } } } /* compute comm_types */ comm_types = hypre_CTAlloc(hypre_CommType, (num_comms + 1)); ct_entries = hypre_TAlloc(hypre_CommEntryType, num_entries); ct_loc_boxnums = hypre_TAlloc(int, num_entries); ct_rem_boxnums = hypre_TAlloc(int, num_entries); ct_loc_boxes = hypre_TAlloc(hypre_Box, num_entries); ct_rem_boxes = hypre_TAlloc(hypre_Box, num_entries); /* initialize local copy type */ comm_type = &comm_types[0]; k = -p_comm_types[my_proc]; p_comm_types[my_proc] = 0; hypre_CommTypeProc(comm_type) = my_proc; hypre_CommTypeNumEntries(comm_type) = 0; hypre_CommTypeEntries(comm_type) = ct_entries; hypre_CommTypeLocBoxnums(comm_type) = ct_loc_boxnums; hypre_CommTypeRemBoxnums(comm_type) = ct_rem_boxnums; hypre_CommTypeLocBoxes(comm_type) = ct_loc_boxes; hypre_CommTypeRemBoxes(comm_type) = ct_rem_boxes; ct_entries += k; ct_loc_boxnums += k; ct_rem_boxnums += k; ct_loc_boxes += k; ct_rem_boxes += k; m = 1; comm_bufsize = 0; hypre_ForBoxArrayI(i, send_boxes) { box_array = hypre_BoxArrayArrayBoxArray(send_boxes, i); rbox_array = hypre_BoxArrayArrayBoxArray(send_rboxes, i); hypre_ForBoxI(j, box_array) { box = hypre_BoxArrayBox(box_array, j); rbox = hypre_BoxArrayBox(rbox_array, j); p = send_processes[i][j]; if (hypre_BoxVolume(box) != 0) { /* initialize comm type for process p */ if (p_comm_types[p] < 0) { comm_type = &comm_types[m]; k = -p_comm_types[p]; p_comm_types[p] = m; hypre_CommTypeProc(comm_type) = p; size = hypre_CommPrefixSize(k); hypre_CommTypeBufsize(comm_type) = size; comm_bufsize += size; hypre_CommTypeNumEntries(comm_type) = 0; hypre_CommTypeEntries(comm_type) = ct_entries; hypre_CommTypeLocBoxnums(comm_type) = ct_loc_boxnums; hypre_CommTypeRemBoxnums(comm_type) = ct_rem_boxnums; hypre_CommTypeLocBoxes(comm_type) = ct_loc_boxes; hypre_CommTypeRemBoxes(comm_type) = ct_rem_boxes; ct_entries += k; ct_loc_boxnums += k; ct_rem_boxnums += k; ct_loc_boxes += k; ct_rem_boxes += k; m++; } comm_type = &comm_types[p_comm_types[p]]; k = hypre_CommTypeNumEntries(comm_type); hypre_BoxGetStrideVolume(box, send_stride, &size); hypre_CommTypeBufsize(comm_type) += (size*num_values); comm_bufsize += (size*num_values); hypre_CommTypeLocBoxnum(comm_type, k) = i; hypre_CommTypeRemBoxnum(comm_type, k) = send_rboxnums[i][j]; hypre_CopyBox(box, hypre_CommTypeLocBox(comm_type, k)); hypre_CopyBox(rbox, hypre_CommTypeRemBox(comm_type, k)); hypre_CommTypeNumEntries(comm_type) ++; } } } /* Loop over comm_types and set entries */ for (m = 0; m < (num_comms + 1); m++) { comm_type = &comm_types[m]; hypre_CommTypeSetEntries(comm_type, hypre_CommTypeLocBoxnums(comm_type), hypre_CommTypeLocBoxes(comm_type), hypre_CommPkgSendStride(comm_pkg), hypre_CommPkgNumValues(comm_pkg), send_data_space, data_offsets ); if (m > 0) { hypre_CommTypeLocBoxnums(comm_type) = NULL; hypre_CommTypeLocBoxes(comm_type) = NULL; } /* reset p_comm_types to zero */ p_comm_types[hypre_CommTypeProc(comm_type)] = 0; } /* set send info in comm_pkg */ hypre_CommPkgSendBufsize(comm_pkg) = comm_bufsize; hypre_CommPkgNumSends(comm_pkg) = num_comms; hypre_CommPkgSendTypes(comm_pkg) = &comm_types[1]; hypre_CommPkgCopyFromType(comm_pkg) = &comm_types[0]; /* free up data_offsets */ hypre_TFree(data_offsets); /* free up local box info */ comm_type = hypre_CommPkgCopyFromType(comm_pkg); hypre_TFree(hypre_CommTypeLocBoxnums(comm_type)); hypre_TFree(hypre_CommTypeLocBoxes(comm_type)); /*------------------------------------------------------ * Set up recv CommType information *------------------------------------------------------*/ /* set data_offsets and recv_data_space */ data_offsets = hypre_TAlloc(int, hypre_BoxArraySize(recv_data_space)); data_offset = 0; hypre_ForBoxI(i, recv_data_space) { data_offsets[i] = data_offset; data_box = hypre_BoxArrayBox(recv_data_space, i); data_offset += hypre_BoxVolume(data_box) * num_values; } hypre_CommPkgRecvDataOffsets(comm_pkg) = data_offsets; hypre_CommPkgRecvDataSpace(comm_pkg) = hypre_BoxArrayDuplicate(recv_data_space); /* compute num_comms and use -p_comm_types to compute num_entries */ num_comms = 0; num_entries = 0; hypre_ForBoxArrayI(i, recv_boxes) { box_array = hypre_BoxArrayArrayBoxArray(recv_boxes, i); hypre_ForBoxI(j, box_array) { box = hypre_BoxArrayBox(box_array, j); p = recv_processes[i][j]; if (hypre_BoxVolume(box) != 0) { p_comm_types[p]--; num_entries++; if ((p_comm_types[p] == -1) && (p != my_proc)) { num_comms++; } } } } /* compute comm_types */ comm_types = hypre_CTAlloc(hypre_CommType, (num_comms + 1)); ct_entries = hypre_TAlloc(hypre_CommEntryType, num_entries); /* initialize local copy type */ comm_type = &comm_types[0]; k = -p_comm_types[my_proc]; p_comm_types[my_proc] = 0; hypre_CommTypeProc(comm_type) = my_proc; hypre_CommTypeNumEntries(comm_type) = 0; hypre_CommTypeEntries(comm_type) = ct_entries; ct_entries += k; m = 1; comm_bufsize = 0; hypre_ForBoxArrayI(i, recv_boxes) { box_array = hypre_BoxArrayArrayBoxArray(recv_boxes, i); hypre_ForBoxI(j, box_array) { box = hypre_BoxArrayBox(box_array, j); p = recv_processes[i][j]; if (hypre_BoxVolume(box) != 0) { /* initialize comm type for process p */ if (p_comm_types[p] < 0) { comm_type = &comm_types[m]; k = -p_comm_types[p]; p_comm_types[p] = m; hypre_CommTypeProc(comm_type) = p; size = hypre_CommPrefixSize(k); hypre_CommTypeBufsize(comm_type) = size; comm_bufsize += size; hypre_CommTypeNumEntries(comm_type) = 0; hypre_CommTypeEntries(comm_type) = ct_entries; ct_entries += k; m++; } comm_type = &comm_types[p_comm_types[p]]; hypre_BoxGetStrideVolume(box, recv_stride, &size); hypre_CommTypeBufsize(comm_type) += (size*num_values); comm_bufsize += (size*num_values); hypre_CommTypeNumEntries(comm_type) ++; } } } /* set copy type entries */ comm_type = hypre_CommPkgCopyFromType(comm_pkg); hypre_CommTypeSetEntries(&comm_types[0], hypre_CommTypeRemBoxnums(comm_type), hypre_CommTypeRemBoxes(comm_type), hypre_CommPkgRecvStride(comm_pkg), hypre_CommPkgNumValues(comm_pkg), hypre_CommPkgRecvDataSpace(comm_pkg), hypre_CommPkgRecvDataOffsets(comm_pkg) ); /* set recv info in comm_pkg */ hypre_CommPkgRecvBufsize(comm_pkg) = comm_bufsize; hypre_CommPkgNumRecvs(comm_pkg) = num_comms; hypre_CommPkgRecvTypes(comm_pkg) = &comm_types[1]; hypre_CommPkgCopyToType(comm_pkg) = &comm_types[0]; /*------------------------------------------------------ * Debugging stuff *------------------------------------------------------*/ #if DEBUG { MPI_Comm_rank(MPI_COMM_WORLD, &my_proc); sprintf(filename, "zcommboxes.%05d", my_proc); if ((file = fopen(filename, "a")) == NULL) { printf("Error: can't open output file %s\n", filename); exit(1); } fprintf(file, "\n\n============================\n\n"); fprintf(file, "SEND boxes:\n\n"); fprintf(file, "Stride = (%d,%d,%d)\n", hypre_IndexX(send_stride), hypre_IndexY(send_stride), hypre_IndexZ(send_stride)); fprintf(file, "BoxArrayArraySize = %d\n", hypre_BoxArrayArraySize(send_boxes)); hypre_ForBoxArrayI(i, send_boxes) { box_array = hypre_BoxArrayArrayBoxArray(send_boxes, i); fprintf(file, "BoxArraySize = %d\n", hypre_BoxArraySize(box_array)); hypre_ForBoxI(j, box_array) { box = hypre_BoxArrayBox(box_array, j); fprintf(file, "(%d,%d): (%d,%d,%d) x (%d,%d,%d)\n", i, j, hypre_BoxIMinX(box), hypre_BoxIMinY(box), hypre_BoxIMinZ(box), hypre_BoxIMaxX(box), hypre_BoxIMaxY(box), hypre_BoxIMaxZ(box)); fprintf(file, "(%d,%d): %d,%d\n", i, j, send_processes[i][j], send_rboxnums[i][j]); } } fprintf(file, "\n\n============================\n\n"); fprintf(file, "RECV boxes:\n\n"); fprintf(file, "Stride = (%d,%d,%d)\n", hypre_IndexX(recv_stride), hypre_IndexY(recv_stride), hypre_IndexZ(recv_stride)); fprintf(file, "BoxArrayArraySize = %d\n", hypre_BoxArrayArraySize(recv_boxes)); hypre_ForBoxArrayI(i, recv_boxes) { box_array = hypre_BoxArrayArrayBoxArray(recv_boxes, i); fprintf(file, "BoxArraySize = %d\n", hypre_BoxArraySize(box_array)); hypre_ForBoxI(j, box_array) { box = hypre_BoxArrayBox(box_array, j); fprintf(file, "(%d,%d): (%d,%d,%d) x (%d,%d,%d)\n", i, j, hypre_BoxIMinX(box), hypre_BoxIMinY(box), hypre_BoxIMinZ(box), hypre_BoxIMaxX(box), hypre_BoxIMaxY(box), hypre_BoxIMaxZ(box)); fprintf(file, "(%d,%d): %d\n", i, j, recv_processes[i][j]); } } fflush(file); fclose(file); } #endif #if DEBUG { hypre_CommEntryType *comm_entry; int offset, dim; int *length; int *stride; MPI_Comm_rank(MPI_COMM_WORLD, &my_proc); sprintf(filename, "zcommentries.%05d", my_proc); if ((file = fopen(filename, "a")) == NULL) { printf("Error: can't open output file %s\n", filename); exit(1); } fprintf(file, "\n\n============================\n\n"); fprintf(file, "SEND entries:\n\n"); fprintf(file, "num_sends = %d\n", hypre_CommPkgNumSends(comm_pkg)); comm_types = hypre_CommPkgCopyFromType(comm_pkg); for (m = 0; m < (hypre_CommPkgNumSends(comm_pkg) + 1); m++) { comm_type = &comm_types[m]; fprintf(file, "process = %d\n", hypre_CommTypeProc(comm_type)); fprintf(file, "num_entries = %d\n", hypre_CommTypeNumEntries(comm_type)); for (i = 0; i < hypre_CommTypeNumEntries(comm_type); i++) { comm_entry = hypre_CommTypeEntry(comm_type, i); offset = hypre_CommEntryTypeOffset(comm_entry); dim = hypre_CommEntryTypeDim(comm_entry); length = hypre_CommEntryTypeLengthArray(comm_entry); stride = hypre_CommEntryTypeStrideArray(comm_entry); fprintf(file, "%d: %d,%d,(%d,%d,%d,%d),(%d,%d,%d,%d)\n", i, offset, dim, length[0], length[1], length[2], length[3], stride[0], stride[1], stride[2], stride[3]); } } fprintf(file, "\n\n============================\n\n"); fprintf(file, "RECV entries:\n\n"); fprintf(file, "num_recvs = %d\n", hypre_CommPkgNumRecvs(comm_pkg)); comm_types = hypre_CommPkgCopyToType(comm_pkg); comm_type = &comm_types[0]; fprintf(file, "process = %d\n", hypre_CommTypeProc(comm_type)); fprintf(file, "num_entries = %d\n", hypre_CommTypeNumEntries(comm_type)); for (i = 0; i < hypre_CommTypeNumEntries(comm_type); i++) { comm_entry = hypre_CommTypeEntry(comm_type, i); offset = hypre_CommEntryTypeOffset(comm_entry); dim = hypre_CommEntryTypeDim(comm_entry); length = hypre_CommEntryTypeLengthArray(comm_entry); stride = hypre_CommEntryTypeStrideArray(comm_entry); fprintf(file, "%d: %d,%d,(%d,%d,%d,%d),(%d,%d,%d,%d)\n", i, offset, dim, length[0], length[1], length[2], length[3], stride[0], stride[1], stride[2], stride[3]); } for (m = 1; m < (hypre_CommPkgNumRecvs(comm_pkg) + 1); m++) { comm_type = &comm_types[m]; fprintf(file, "process = %d\n", hypre_CommTypeProc(comm_type)); fprintf(file, "num_entries = %d\n", hypre_CommTypeNumEntries(comm_type)); } fflush(file); fclose(file); } #endif /*------------------------------------------------------ * Clean up *------------------------------------------------------*/ hypre_CommInfoDestroy(comm_info); hypre_TFree(p_comm_types); *comm_pkg_ptr = comm_pkg; return ierr; } /*-------------------------------------------------------------------------- *--------------------------------------------------------------------------*/ int hypre_CommTypeSetEntries( hypre_CommType *comm_type, int *boxnums, hypre_Box *boxes, hypre_Index stride, int num_values, hypre_BoxArray *data_space, int *data_offsets ) { int ierr = 0; int num_entries = hypre_CommTypeNumEntries(comm_type); hypre_CommEntryType *entries = hypre_CommTypeEntries(comm_type); hypre_Box *box; hypre_Box *data_box; int i, j; for (j = 0; j < num_entries; j++) { i = boxnums[j]; box = &boxes[j]; data_box = hypre_BoxArrayBox(data_space, i); hypre_CommTypeSetEntry(box, stride, data_box, num_values, data_offsets[i], &entries[j]); } return ierr; } /*-------------------------------------------------------------------------- *--------------------------------------------------------------------------*/ int hypre_CommTypeSetEntry( hypre_Box *box, hypre_Index stride, hypre_Box *data_box, int num_values, int data_box_offset, hypre_CommEntryType *comm_entry ) { int ierr = 0; int *length_array; int *stride_array; hypre_Index size; int i, j, dim; /*------------------------------------------------------ * Set offset *------------------------------------------------------*/ hypre_CommEntryTypeOffset(comm_entry) = data_box_offset + hypre_BoxIndexRank(data_box, hypre_BoxIMin(box)); /*------------------------------------------------------ * Set length_array, stride_array, and dim *------------------------------------------------------*/ length_array = hypre_CommEntryTypeLengthArray(comm_entry); stride_array = hypre_CommEntryTypeStrideArray(comm_entry); /* initialize length_array */ hypre_BoxGetStrideSize(box, stride, size); for (i = 0; i < 3; i++) { length_array[i] = hypre_IndexD(size, i); } length_array[3] = num_values; /* initialize stride_array */ for (i = 0; i < 3; i++) { stride_array[i] = hypre_IndexD(stride, i); for (j = 0; j < i; j++) { stride_array[i] *= hypre_BoxSizeD(data_box, j); } } stride_array[3] = hypre_BoxVolume(data_box); /* eliminate dimensions with length_array = 1 */ dim = 4; i = 0; while (i < dim) { if(length_array[i] == 1) { for(j = i; j < (dim - 1); j++) { length_array[j] = length_array[j+1]; stride_array[j] = stride_array[j+1]; } length_array[dim - 1] = 1; stride_array[dim - 1] = 1; dim--; } else { i++; } } #if 0 /* sort the array according to length_array (largest to smallest) */ for (i = (dim-1); i > 0; i--) { for (j = 0; j < i; j++) { if (length_array[j] < length_array[j+1]) { i_tmp = length_array[j]; length_array[j] = length_array[j+1]; length_array[j+1] = i_tmp; i_tmp = stride_array[j]; stride_array[j] = stride_array[j+1]; stride_array[j+1] = i_tmp; } } } #endif /* if every len was 1 we need to fix to communicate at least one */ if(!dim) { dim = 1; } hypre_CommEntryTypeDim(comm_entry) = dim; return ierr; } /*-------------------------------------------------------------------------- * Initialize a non-blocking communication exchange. * * The communication buffers are created, the send buffer is manually * packed, and the communication requests are posted. *--------------------------------------------------------------------------*/ int hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, double *send_data, double *recv_data, hypre_CommHandle **comm_handle_ptr ) { int ierr = 0; hypre_CommHandle *comm_handle; int num_sends = hypre_CommPkgNumSends(comm_pkg); int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_CommPkgComm(comm_pkg); int num_requests; MPI_Request *requests; MPI_Status *status; double **send_buffers; double **recv_buffers; hypre_CommType *comm_type; hypre_CommEntryType *comm_entry; int num_entries; int *length_array, length; int *stride_array, stride; double *dptr, *iptr, *jptr, *kptr, *lptr; int *qptr; int i, j, ii, jj, kk, ll; int size; /*-------------------------------------------------------------------- * allocate requests and status *--------------------------------------------------------------------*/ num_requests = num_sends + num_recvs; requests = hypre_CTAlloc(MPI_Request, num_requests); status = hypre_CTAlloc(MPI_Status, num_requests); /*-------------------------------------------------------------------- * allocate buffers *--------------------------------------------------------------------*/ /* allocate send buffers */ send_buffers = hypre_TAlloc(double *, num_sends); if (num_sends > 0) { size = hypre_CommPkgSendBufsize(comm_pkg); send_buffers[0] = hypre_SharedTAlloc(double, size); for (i = 1; i < num_sends; i++) { comm_type = hypre_CommPkgSendType(comm_pkg, i-1); size = hypre_CommTypeBufsize(comm_type); send_buffers[i] = send_buffers[i-1] + size; } } /* allocate recv buffers */ recv_buffers = hypre_TAlloc(double *, num_recvs); if (num_recvs > 0) { size = hypre_CommPkgRecvBufsize(comm_pkg); recv_buffers[0] = hypre_SharedTAlloc(double, size); for (i = 1; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i-1); size = hypre_CommTypeBufsize(comm_type); recv_buffers[i] = recv_buffers[i-1] + size; } } /*-------------------------------------------------------------------- * pack send buffers *--------------------------------------------------------------------*/ for (i = 0; i < num_sends; i++) { comm_type = hypre_CommPkgSendType(comm_pkg, i); num_entries = hypre_CommTypeNumEntries(comm_type); dptr = (double *) send_buffers[i]; if ( hypre_CommPkgFirstSend(comm_pkg) ) { qptr = (int *) send_buffers[i]; memcpy(qptr, hypre_CommTypeRemBoxnums(comm_type), num_entries*sizeof(int)); qptr += num_entries; memcpy(qptr, hypre_CommTypeRemBoxes(comm_type), num_entries*sizeof(hypre_Box)); hypre_CommTypeRemBoxnums(comm_type) = NULL; hypre_CommTypeRemBoxes(comm_type) = NULL; dptr += hypre_CommPrefixSize(num_entries); } for (j = 0; j < num_entries; j++) { comm_entry = hypre_CommTypeEntry(comm_type, j); length_array = hypre_CommEntryTypeLengthArray(comm_entry); stride_array = hypre_CommEntryTypeStrideArray(comm_entry); lptr = send_data + hypre_CommEntryTypeOffset(comm_entry); for (ll = 0; ll < length_array[3]; ll++) { kptr = lptr; for (kk = 0; kk < length_array[2]; kk++) { jptr = kptr; for (jj = 0; jj < length_array[1]; jj++) { if (stride_array[0] == 1) { memcpy(dptr, jptr, length_array[0]*sizeof(double)); } else { iptr = jptr; length = length_array[0]; stride = stride_array[0]; for (ii = 0; ii < length; ii++) { dptr[ii] = *iptr; iptr += stride; } } dptr += length_array[0]; jptr += stride_array[1]; } kptr += stride_array[2]; } lptr += stride_array[3]; } } } /*-------------------------------------------------------------------- * post receives and initiate sends *--------------------------------------------------------------------*/ j = 0; for(i = 0; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); MPI_Irecv(recv_buffers[i], hypre_CommTypeBufsize(comm_type)*sizeof(double), MPI_BYTE, hypre_CommTypeProc(comm_type), 0, comm, &requests[j++]); if ( hypre_CommPkgFirstRecv(comm_pkg) ) { size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); hypre_CommTypeBufsize(comm_type) -= size; hypre_CommPkgRecvBufsize(comm_pkg) -= size; } } for(i = 0; i < num_sends; i++) { comm_type = hypre_CommPkgSendType(comm_pkg, i); MPI_Isend(send_buffers[i], hypre_CommTypeBufsize(comm_type)*sizeof(double), MPI_BYTE, hypre_CommTypeProc(comm_type), 0, comm, &requests[j++]); if ( hypre_CommPkgFirstSend(comm_pkg) ) { size = hypre_CommPrefixSize(hypre_CommTypeNumEntries(comm_type)); hypre_CommTypeBufsize(comm_type) -= size; hypre_CommPkgSendBufsize(comm_pkg) -= size; } } hypre_ExchangeLocalData(comm_pkg, send_data, recv_data); /*-------------------------------------------------------------------- * free up remote boxnums and turn off first_send indicator *--------------------------------------------------------------------*/ if ( hypre_CommPkgFirstSend(comm_pkg) ) { comm_type = hypre_CommPkgCopyFromType(comm_pkg); hypre_TFree(hypre_CommTypeRemBoxnums(comm_type)); hypre_TFree(hypre_CommTypeRemBoxes(comm_type)); hypre_CommPkgFirstSend(comm_pkg) = 0; } /*-------------------------------------------------------------------- * set up comm_handle and return *--------------------------------------------------------------------*/ comm_handle = hypre_TAlloc(hypre_CommHandle, 1); hypre_CommHandleCommPkg(comm_handle) = comm_pkg; hypre_CommHandleSendData(comm_handle) = send_data; hypre_CommHandleRecvData(comm_handle) = recv_data; hypre_CommHandleNumRequests(comm_handle) = num_requests; hypre_CommHandleRequests(comm_handle) = requests; hypre_CommHandleStatus(comm_handle) = status; hypre_CommHandleSendBuffers(comm_handle) = send_buffers; hypre_CommHandleRecvBuffers(comm_handle) = recv_buffers; *comm_handle_ptr = comm_handle; return ierr; } /*-------------------------------------------------------------------------- * Finalize a communication exchange. This routine blocks until all * of the communication requests are completed. * * The communication requests are completed, and the receive buffer is * manually unpacked. *--------------------------------------------------------------------------*/ int hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) { int ierr = 0; hypre_CommPkg *comm_pkg = hypre_CommHandleCommPkg(comm_handle); double **send_buffers = hypre_CommHandleSendBuffers(comm_handle); double **recv_buffers = hypre_CommHandleRecvBuffers(comm_handle); int num_sends = hypre_CommPkgNumSends(comm_pkg); int num_recvs = hypre_CommPkgNumRecvs(comm_pkg); hypre_CommType *comm_type; hypre_CommEntryType *comm_entry; int num_entries; int *length_array, length; int *stride_array, stride; double *iptr, *jptr, *kptr, *lptr; double *dptr; int *qptr; int *boxnums; hypre_Box *boxes; int i, j, ii, jj, kk, ll; /*-------------------------------------------------------------------- * finish communications *--------------------------------------------------------------------*/ if (hypre_CommHandleNumRequests(comm_handle)) { MPI_Waitall(hypre_CommHandleNumRequests(comm_handle), hypre_CommHandleRequests(comm_handle), hypre_CommHandleStatus(comm_handle)); } /*-------------------------------------------------------------------- * unpack recv buffers *--------------------------------------------------------------------*/ for (i = 0; i < num_recvs; i++) { comm_type = hypre_CommPkgRecvType(comm_pkg, i); num_entries = hypre_CommTypeNumEntries(comm_type); dptr = (double *) recv_buffers[i]; if ( hypre_CommPkgFirstRecv(comm_pkg) ) { qptr = (int *) recv_buffers[i]; boxnums = (int *) qptr; qptr += num_entries; boxes = (hypre_Box *) qptr; hypre_CommTypeSetEntries(comm_type, boxnums, boxes, hypre_CommPkgRecvStride(comm_pkg), hypre_CommPkgNumValues(comm_pkg), hypre_CommPkgRecvDataSpace(comm_pkg), hypre_CommPkgRecvDataOffsets(comm_pkg) ); dptr += hypre_CommPrefixSize(num_entries); } for (j = 0; j < num_entries; j++) { comm_entry = hypre_CommTypeEntry(comm_type, j); length_array = hypre_CommEntryTypeLengthArray(comm_entry); stride_array = hypre_CommEntryTypeStrideArray(comm_entry); lptr = hypre_CommHandleRecvData(comm_handle) + hypre_CommEntryTypeOffset(comm_entry); for (ll = 0; ll < length_array[3]; ll++) { kptr = lptr; for (kk = 0; kk < length_array[2]; kk++) { jptr = kptr; for (jj = 0; jj < length_array[1]; jj++) { if (stride_array[0] == 1) { memcpy(jptr, dptr, length_array[0]*sizeof(double)); } else { iptr = jptr; length = length_array[0]; stride = stride_array[0]; for (ii = 0; ii < length; ii++) { *iptr = dptr[ii]; iptr += stride; } } dptr += length_array[0]; jptr += stride_array[1]; } kptr += stride_array[2]; } lptr += stride_array[3]; } } } /*-------------------------------------------------------------------- * turn off first_recv indicator *--------------------------------------------------------------------*/ hypre_CommPkgFirstRecv(comm_pkg) = 0; /*-------------------------------------------------------------------- * Free up communication handle *--------------------------------------------------------------------*/ hypre_TFree(hypre_CommHandleRequests(comm_handle)); hypre_TFree(hypre_CommHandleStatus(comm_handle)); if (num_sends > 0) { hypre_SharedTFree(send_buffers[0]); } if (num_recvs > 0) { hypre_SharedTFree(recv_buffers[0]); } hypre_TFree(send_buffers); hypre_TFree(recv_buffers); hypre_TFree(comm_handle); return ierr; } /*-------------------------------------------------------------------------- * Execute local data exchanges. *--------------------------------------------------------------------------*/ int hypre_ExchangeLocalData( hypre_CommPkg *comm_pkg, double *send_data, double *recv_data ) { hypre_CommType *copy_from_type; hypre_CommType *copy_to_type; hypre_CommEntryType *copy_from_entry; hypre_CommEntryType *copy_to_entry; double *from_dp; int *from_stride_array; int from_i; double *to_dp; int *to_stride_array; int to_i; int *length_array; int i0, i1, i2, i3; int i; int ierr = 0; /*-------------------------------------------------------------------- * copy local data *--------------------------------------------------------------------*/ copy_from_type = hypre_CommPkgCopyFromType(comm_pkg); copy_to_type = hypre_CommPkgCopyToType(comm_pkg); for (i = 0; i < hypre_CommTypeNumEntries(copy_from_type); i++) { copy_from_entry = hypre_CommTypeEntry(copy_from_type, i); copy_to_entry = hypre_CommTypeEntry(copy_to_type, i); from_dp = send_data + hypre_CommEntryTypeOffset(copy_from_entry); to_dp = recv_data + hypre_CommEntryTypeOffset(copy_to_entry); /* copy data only when necessary */ if (to_dp != from_dp) { length_array = hypre_CommEntryTypeLengthArray(copy_from_entry); from_stride_array = hypre_CommEntryTypeStrideArray(copy_from_entry); to_stride_array = hypre_CommEntryTypeStrideArray(copy_to_entry); for (i3 = 0; i3 < length_array[3]; i3++) { for (i2 = 0; i2 < length_array[2]; i2++) { for (i1 = 0; i1 < length_array[1]; i1++) { from_i = (i3*from_stride_array[3] + i2*from_stride_array[2] + i1*from_stride_array[1] ); to_i = (i3*to_stride_array[3] + i2*to_stride_array[2] + i1*to_stride_array[1] ); for (i0 = 0; i0 < length_array[0]; i0++) { to_dp[to_i] = from_dp[from_i]; from_i += from_stride_array[0]; to_i += to_stride_array[0]; } } } } } } return ( ierr ); } /*-------------------------------------------------------------------------- *--------------------------------------------------------------------------*/ int hypre_CommPkgDestroy( hypre_CommPkg *comm_pkg ) { int ierr = 0; hypre_CommType *comm_types; if (comm_pkg) { comm_types = hypre_CommPkgCopyToType(comm_pkg); hypre_TFree(hypre_CommTypeEntries(comm_types)); hypre_TFree(hypre_CommTypeLocBoxnums(comm_types)); hypre_TFree(hypre_CommTypeRemBoxnums(comm_types)); hypre_TFree(hypre_CommTypeLocBoxes(comm_types)); hypre_TFree(hypre_CommTypeRemBoxes(comm_types)); hypre_TFree(comm_types); comm_types = hypre_CommPkgCopyFromType(comm_pkg); hypre_TFree(hypre_CommTypeEntries(comm_types)); hypre_TFree(hypre_CommTypeLocBoxnums(comm_types)); hypre_TFree(hypre_CommTypeRemBoxnums(comm_types)); hypre_TFree(hypre_CommTypeLocBoxes(comm_types)); hypre_TFree(hypre_CommTypeRemBoxes(comm_types)); hypre_TFree(comm_types); hypre_TFree(hypre_CommPkgRecvDataOffsets(comm_pkg)); hypre_BoxArrayDestroy(hypre_CommPkgRecvDataSpace(comm_pkg)); hypre_TFree(comm_pkg); } return ierr; }