/*BHEADER********************************************************************** * Copyright (c) 2008, Lawrence Livermore National Security, LLC. * Produced at the Lawrence Livermore National Laboratory. * This file is part of HYPRE. See file COPYRIGHT for details. * * HYPRE is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License (as published by the Free * Software Foundation) version 2.1 dated February 1999. * * $Revision: 2.4 $ ***********************************************************************EHEADER*/ #include "headers.h" /*==========================================================================*/ hypre_ParCSRCommHandle * hypre_ParCSRCommHandleCreate ( int job, hypre_ParCSRCommPkg *comm_pkg, void *send_data, void *recv_data ) { int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); MPI_Comm comm = hypre_ParCSRCommPkgComm(comm_pkg); hypre_ParCSRCommHandle *comm_handle; int num_requests; MPI_Request *requests; int i, j; int my_id, num_procs; int ip, vec_start, vec_len; /*-------------------------------------------------------------------- * hypre_Initialize sets up a communication handle, * posts receives and initiates sends. It always requires num_sends, * num_recvs, recv_procs and send_procs to be set in comm_pkg. * There are different options for job: * job = 1 : is used to initialize communication exchange for the parts * of vector needed to perform a Matvec, it requires send_data * and recv_data to be doubles, recv_vec_starts and * send_map_starts need to be set in comm_pkg. * job = 2 : is used to initialize communication exchange for the parts * of vector needed to perform a MatvecT, it requires send_data * and recv_data to be doubles, recv_vec_starts and * send_map_starts need to be set in comm_pkg. * job = 11: similar to job = 1, but exchanges data of type int (not double), * requires send_data and recv_data to be ints * recv_vec_starts and send_map_starts need to be set in comm_pkg. * job = 12: similar to job = 1, but exchanges data of type int (not double), * requires send_data and recv_data to be ints * recv_vec_starts and send_map_starts need to be set in comm_pkg. * job = 21: similar to job = 1, but exchanges data of type int (not double), * requires send_data and recv_data to be long longs * recv_vec_starts and send_map_starts need to be set in comm_pkg. * job = 22: similar to job = 1, but exchanges data of type int (not double), * requires send_data and recv_data to be long longs * recv_vec_starts and send_map_starts need to be set in comm_pkg. * default: ignores send_data and recv_data, requires send_mpi_types * and recv_mpi_types to be set in comm_pkg. * datatypes need to point to absolute * addresses, e.g. generated using MPI_Address . *--------------------------------------------------------------------*/ num_requests = num_sends + num_recvs; requests = hypre_CTAlloc(MPI_Request, num_requests); MPI_Comm_size(comm,&num_procs); MPI_Comm_rank(comm,&my_id); j = 0; switch (job) { case 1: { double *d_send_data = (double *) send_data; double *d_recv_data = (double *) recv_data; for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; MPI_Irecv(&d_recv_data[vec_start], vec_len, MPI_DOUBLE, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_sends; i++) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); MPI_Isend(&d_send_data[vec_start], vec_len, MPI_DOUBLE, ip, 0, comm, &requests[j++]); } break; } case 2: { double *d_send_data = (double *) send_data; double *d_recv_data = (double *) recv_data; for (i = 0; i < num_sends; i++) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); MPI_Irecv(&d_recv_data[vec_start], vec_len, MPI_DOUBLE, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; MPI_Isend(&d_send_data[vec_start], vec_len, MPI_DOUBLE, ip, 0, comm, &requests[j++]); } break; } case 11: { int *i_send_data = (int *) send_data; int *i_recv_data = (int *) recv_data; for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; MPI_Irecv(&i_recv_data[vec_start], vec_len, MPI_INT, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_sends; i++) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); MPI_Isend(&i_send_data[vec_start], vec_len, MPI_INT, ip, 0, comm, &requests[j++]); } break; } case 12: { int *i_send_data = (int *) send_data; int *i_recv_data = (int *) recv_data; for (i = 0; i < num_sends; i++) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); MPI_Irecv(&i_recv_data[vec_start], vec_len, MPI_INT, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; MPI_Isend(&i_send_data[vec_start], vec_len, MPI_INT, ip, 0, comm, &requests[j++]); } break; } case 21: { HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data; HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data; for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; MPI_Irecv(&i_recv_data[vec_start], vec_len, MPI_HYPRE_BIG_INT, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_sends; i++) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); MPI_Isend(&i_send_data[vec_start], vec_len, MPI_HYPRE_BIG_INT, ip, 0, comm, &requests[j++]); } break; } case 22: { HYPRE_BigInt *i_send_data = (HYPRE_BigInt *) send_data; HYPRE_BigInt *i_recv_data = (HYPRE_BigInt *) recv_data; for (i = 0; i < num_sends; i++) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1) - vec_start; ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); MPI_Irecv(&i_recv_data[vec_start], vec_len, MPI_HYPRE_BIG_INT, ip, 0, comm, &requests[j++]); } for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; MPI_Isend(&i_send_data[vec_start], vec_len, MPI_HYPRE_BIG_INT, ip, 0, comm, &requests[j++]); } break; } /* default : { for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); MPI_Irecv(MPI_BOTTOM, 1, hypre_ParCSRCommPkgRecvMPIType(comm_pkg, i), ip, 0, comm, &requests[j++]); } for (i = 0; i < num_sends; i++) { ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); MPI_Isend(MPI_BOTTOM, 1, hypre_ParCSRCommPkgSendMPIType(comm_pkg, i), ip, 0, comm, &requests[j++]); } break; } */ } /*-------------------------------------------------------------------- * set up comm_handle and return *--------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle, 1); hypre_ParCSRCommHandleCommPkg(comm_handle) = comm_pkg; hypre_ParCSRCommHandleSendData(comm_handle) = send_data; hypre_ParCSRCommHandleRecvData(comm_handle) = recv_data; hypre_ParCSRCommHandleNumRequests(comm_handle) = num_requests; hypre_ParCSRCommHandleRequests(comm_handle) = requests; return ( comm_handle ); } int hypre_ParCSRCommHandleDestroy( hypre_ParCSRCommHandle *comm_handle ) { MPI_Status *status0; int ierr = 0; if ( comm_handle==NULL ) return ierr; if (hypre_ParCSRCommHandleNumRequests(comm_handle)) { status0 = hypre_CTAlloc(MPI_Status, hypre_ParCSRCommHandleNumRequests(comm_handle)); MPI_Waitall(hypre_ParCSRCommHandleNumRequests(comm_handle), hypre_ParCSRCommHandleRequests(comm_handle), status0); hypre_TFree(status0); } hypre_TFree(hypre_ParCSRCommHandleRequests(comm_handle)); hypre_TFree(comm_handle); return ierr; } /* hypre_MatCommPkgCreate_core does all the communications and computations for hypre_MatCommPkgCreate ( hypre_ParCSRMatrix *A) To support both data types, it has hardly any data structures other than int*. */ void hypre_MatvecCommPkgCreate_core ( /* input args: */ MPI_Comm comm, HYPRE_BigInt * col_map_offd, HYPRE_BigInt first_col_diag, HYPRE_BigInt * col_starts, int num_cols_diag, int num_cols_offd, HYPRE_BigInt firstColDiag, HYPRE_BigInt * colMapOffd, int data, /* = 1 for a matrix with floating-point data, =0 for Boolean matrix */ /* pointers to output args: */ int * p_num_recvs, int ** p_recv_procs, int ** p_recv_vec_starts, int * p_num_sends, int ** p_send_procs, int ** p_send_map_starts, int ** p_send_map_elmts ) { int i, j; int num_procs, my_id, proc_num, num_elmts; int local_info; HYPRE_BigInt offd_col; HYPRE_BigInt *big_buf_data = NULL; int *proc_mark, *proc_add, *tmp, *recv_buf, *displs, *info; /* outputs: */ int num_recvs, * recv_procs, * recv_vec_starts; int num_sends, * send_procs, * send_map_starts, * send_map_elmts; int ip, vec_start, vec_len, num_requests; MPI_Request *requests; MPI_Status *status; MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm, &my_id); proc_mark = hypre_CTAlloc(int, num_procs); proc_add = hypre_CTAlloc(int, num_procs); info = hypre_CTAlloc(int, num_procs); /* ---------------------------------------------------------------------- * determine which processors to receive from (set proc_mark) and num_recvs, * at the end of the loop proc_mark[i] contains the number of elements to be * received from Proc. i * ---------------------------------------------------------------------*/ for (i=0; i < num_procs; i++) proc_add[i] = 0; proc_num = 0; if (num_cols_offd) offd_col = col_map_offd[0]; num_recvs=0; j = 0; for (i=0; i < num_cols_offd; i++) { if (num_cols_diag) proc_num = hypre_min(num_procs-1,offd_col / num_cols_diag); while (col_starts[proc_num] > offd_col ) proc_num = proc_num-1; while (col_starts[proc_num+1]-1 < offd_col ) proc_num = proc_num+1; proc_mark[num_recvs] = proc_num; j = i; while (col_starts[proc_num+1] > offd_col) { proc_add[num_recvs]++; if (j < num_cols_offd-1) { j++; offd_col = col_map_offd[j]; } else { j++; offd_col = col_starts[num_procs]; } } num_recvs++; if (j < num_cols_offd) i = j-1; else i=j; } local_info = 2*num_recvs; MPI_Allgather(&local_info, 1, MPI_INT, info, 1, MPI_INT, comm); /* ---------------------------------------------------------------------- * generate information to be sent: tmp contains for each recv_proc: * id of recv_procs, number of elements to be received for this processor, * indices of elements (in this order) * ---------------------------------------------------------------------*/ displs = hypre_CTAlloc(int, num_procs+1); displs[0] = 0; for (i=1; i < num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; recv_buf = hypre_CTAlloc(int, displs[num_procs]); recv_procs = NULL; tmp = NULL; if (num_recvs) { recv_procs = hypre_CTAlloc(int, num_recvs); tmp = hypre_CTAlloc(int, local_info); } recv_vec_starts = hypre_CTAlloc(int, num_recvs+1); j = 0; if (num_recvs) recv_vec_starts[0] = 0; for (i=0; i < num_recvs; i++) { num_elmts = proc_add[i]; recv_procs[i] = proc_mark[i]; recv_vec_starts[i+1] = recv_vec_starts[i]+num_elmts; tmp[j++] = proc_mark[i]; tmp[j++] = num_elmts; } MPI_Allgatherv(tmp,local_info,MPI_INT,recv_buf,info,displs,MPI_INT,comm); /* ---------------------------------------------------------------------- * determine num_sends and number of elements to be sent * ---------------------------------------------------------------------*/ num_sends = 0; num_elmts = 0; proc_add[0] = 0; for (i=0; i < num_procs; i++) { j = displs[i]; while ( j < displs[i+1]) { if (recv_buf[j++] == my_id) { proc_mark[num_sends] = i; num_sends++; proc_add[num_sends] = proc_add[num_sends-1]+recv_buf[j]; break; } j++; } } /* ---------------------------------------------------------------------- * determine send_procs and actual elements to be send (in send_map_elmts) * and send_map_starts whose i-th entry points to the beginning of the * elements to be send to proc. i * ---------------------------------------------------------------------*/ send_procs = NULL; send_map_elmts = NULL; if (num_sends) { send_procs = hypre_CTAlloc(int, num_sends); send_map_elmts = hypre_CTAlloc(int, proc_add[num_sends]); big_buf_data = hypre_CTAlloc(HYPRE_BigInt, proc_add[num_sends]); } send_map_starts = hypre_CTAlloc(int, num_sends+1); num_requests = num_recvs+num_sends; if (num_requests) { requests = hypre_CTAlloc(MPI_Request, num_requests); status = hypre_CTAlloc(MPI_Status, num_requests); } if (num_sends) send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = proc_add[i+1]; send_procs[i] = proc_mark[i]; } j=0; for (i=0; i < num_sends; i++) { vec_start = send_map_starts[i]; vec_len = send_map_starts[i+1] - vec_start; ip = send_procs[i]; MPI_Irecv(&big_buf_data[vec_start], vec_len, MPI_HYPRE_BIG_INT, ip, 0, comm, &requests[j++]); } for (i=0; i < num_recvs; i++) { vec_start = recv_vec_starts[i]; vec_len = recv_vec_starts[i+1] - vec_start; ip = recv_procs[i]; MPI_Isend(&col_map_offd[vec_start], vec_len, MPI_HYPRE_BIG_INT, ip, 0, comm, &requests[j++]); } if (num_requests) { MPI_Waitall(num_requests, requests, status); hypre_TFree(requests); hypre_TFree(status); } if (num_sends) { for (i=0; i