| 1 | /*****************************************************************************
|
|---|
| 2 | * FILE: mpithreads_both.c
|
|---|
| 3 | * DESCRIPTION:
|
|---|
| 4 | * This program illustrates the simultaneous use of MPI and Pthreads.
|
|---|
| 5 | * It is essentially a simple combination of a code that implements a dot
|
|---|
| 6 | * product using threads, and a code that uses MPI for the same purpose.
|
|---|
| 7 | * It is the last of four codes used to show the progression from a serial
|
|---|
| 8 | * program to a hybrid MPI/Pthreads program. The other relevant codes are:
|
|---|
| 9 | * - mpithreads_serial.c - The serial version
|
|---|
| 10 | * - mpithreads_threads.c - A shared memory programming model using
|
|---|
| 11 | * Pthreads
|
|---|
| 12 | * - mpithreads_mpi.c - A distributed memory programming model with MPI
|
|---|
| 13 | * All the internode MPI communication is done by the main thread on each
|
|---|
| 14 | * node - the other threads within that node need not even be aware that
|
|---|
| 15 | * internode communication is being performed. Use of the SPMD model for
|
|---|
| 16 | * MPI was chosen for convenience, with replication of the main data on
|
|---|
| 17 | * all nodes. A more memory efficient implementation would be advisable
|
|---|
| 18 | * for larger data sets. This is the simplest model for mixed MPI/Pthreads
|
|---|
| 19 | * programming.
|
|---|
| 20 | * SOURCE: Vijay Sonnad, IBM
|
|---|
| 21 | * LAST REVISED: 01/29/09 Blaise Barney
|
|---|
| 22 | ******************************************************************************/
|
|---|
| 23 | #include "mpi.h"
|
|---|
| 24 | #include <pthread.h>
|
|---|
| 25 | #include <stdio.h>
|
|---|
| 26 | #include <stdlib.h>
|
|---|
| 27 |
|
|---|
| 28 | /*
|
|---|
| 29 | This structure has been changed slightly from the previous cases
|
|---|
| 30 | to include the number of threads per node.
|
|---|
| 31 | */
|
|---|
| 32 |
|
|---|
| 33 | typedef struct
|
|---|
| 34 | {
|
|---|
| 35 | double *a;
|
|---|
| 36 | double *b;
|
|---|
| 37 | double sum;
|
|---|
| 38 | int veclen;
|
|---|
| 39 | int numthrds;
|
|---|
| 40 | } DOTDATA;
|
|---|
| 41 |
|
|---|
| 42 | /* Define globally accessible variables and a mutex */
|
|---|
| 43 | #ifdef _CIVL
|
|---|
| 44 | $input int MAXTHRDS;
|
|---|
| 45 | $input int VECLEN;
|
|---|
| 46 | $output double _sum;
|
|---|
| 47 | #else
|
|---|
| 48 | #define MAXTHRDS 8
|
|---|
| 49 | #define VECLEN 100
|
|---|
| 50 | #endif
|
|---|
| 51 | DOTDATA dotstr;
|
|---|
| 52 | pthread_t callThd[MAXTHRDS];
|
|---|
| 53 | pthread_mutex_t mutexsum;
|
|---|
| 54 |
|
|---|
| 55 | /*
|
|---|
| 56 | The function dotprod has only minor changes from the code
|
|---|
| 57 | that used threads or MPI.
|
|---|
| 58 | */
|
|---|
| 59 |
|
|---|
| 60 | void *dotprod(void *arg)
|
|---|
| 61 | {
|
|---|
| 62 |
|
|---|
| 63 | /* Define and use local variables for convenience */
|
|---|
| 64 |
|
|---|
| 65 | int i, start, end, len, numthrds, myid;
|
|---|
| 66 | long mythrd;
|
|---|
| 67 | double mysum, *x, *y;
|
|---|
| 68 |
|
|---|
| 69 | /*
|
|---|
| 70 | The number of threads and nodes defines the beginning
|
|---|
| 71 | and ending for the dot product; each thread does work
|
|---|
| 72 | on a vector of length VECLENGTH.
|
|---|
| 73 | */
|
|---|
| 74 |
|
|---|
| 75 | mythrd = (long)arg;
|
|---|
| 76 | MPI_Comm_rank (MPI_COMM_WORLD, &myid);
|
|---|
| 77 |
|
|---|
| 78 | numthrds = dotstr.numthrds;
|
|---|
| 79 | len = dotstr.veclen;
|
|---|
| 80 | start = myid*numthrds*len + mythrd*len;
|
|---|
| 81 | end = start + len;
|
|---|
| 82 | x = dotstr.a;
|
|---|
| 83 | y = dotstr.b;
|
|---|
| 84 |
|
|---|
| 85 | /*
|
|---|
| 86 | Perform the dot product and assign result
|
|---|
| 87 | to the appropriate variable in the structure.
|
|---|
| 88 | */
|
|---|
| 89 |
|
|---|
| 90 | mysum = 0;
|
|---|
| 91 | for (i=start; i<end ; i++)
|
|---|
| 92 | {
|
|---|
| 93 | mysum += (x[i] * y[i]);
|
|---|
| 94 | }
|
|---|
| 95 |
|
|---|
| 96 | /*
|
|---|
| 97 | Lock a mutex prior to updating the value in the structure, and unlock it
|
|---|
| 98 | upon updating.
|
|---|
| 99 | */
|
|---|
| 100 | pthread_mutex_lock (&mutexsum);
|
|---|
| 101 | printf("Task %d thread %ld adding partial sum of %f to node sum of %f\n",
|
|---|
| 102 | myid, mythrd, mysum, dotstr.sum);
|
|---|
| 103 | dotstr.sum += mysum;
|
|---|
| 104 | pthread_mutex_unlock (&mutexsum);
|
|---|
| 105 |
|
|---|
| 106 | pthread_exit((void*)0);
|
|---|
| 107 | }
|
|---|
| 108 |
|
|---|
| 109 | /*
|
|---|
| 110 | As before,the main program does very little computation. It creates
|
|---|
| 111 | threads on each node and the main thread does all the MPI calls.
|
|---|
| 112 | */
|
|---|
| 113 |
|
|---|
| 114 | int main(int argc, char* argv[])
|
|---|
| 115 | {
|
|---|
| 116 | int len=VECLEN, myid, numprocs;
|
|---|
| 117 | long i;
|
|---|
| 118 | int nump1, numthrds;
|
|---|
| 119 | double *a, *b;
|
|---|
| 120 | double nodesum, allsum;
|
|---|
| 121 | void *status;
|
|---|
| 122 | pthread_attr_t attr;
|
|---|
| 123 |
|
|---|
| 124 | /* MPI Initialization */
|
|---|
| 125 | MPI_Init (&argc, &argv);
|
|---|
| 126 | MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
|
|---|
| 127 | MPI_Comm_rank (MPI_COMM_WORLD, &myid);
|
|---|
| 128 |
|
|---|
| 129 | /* Assign storage and initialize values */
|
|---|
| 130 | numthrds=MAXTHRDS;
|
|---|
| 131 | a = (double*) malloc (numprocs*numthrds*len*sizeof(double));
|
|---|
| 132 | b = (double*) malloc (numprocs*numthrds*len*sizeof(double));
|
|---|
| 133 |
|
|---|
| 134 | for (i=0; i<len*numprocs*numthrds; i++) {
|
|---|
| 135 | a[i]=1;
|
|---|
| 136 | b[i]=a[i];
|
|---|
| 137 | }
|
|---|
| 138 |
|
|---|
| 139 | dotstr.veclen = len;
|
|---|
| 140 | dotstr.a = a;
|
|---|
| 141 | dotstr.b = b;
|
|---|
| 142 | dotstr.sum=0;
|
|---|
| 143 | dotstr.numthrds=MAXTHRDS;
|
|---|
| 144 |
|
|---|
| 145 | /*
|
|---|
| 146 | Create thread attribute to specify that the main thread needs
|
|---|
| 147 | to join with the threads it creates.
|
|---|
| 148 | */
|
|---|
| 149 | pthread_attr_init(&attr );
|
|---|
| 150 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
|---|
| 151 |
|
|---|
| 152 | /* Create a mutex */
|
|---|
| 153 | pthread_mutex_init (&mutexsum, NULL);
|
|---|
| 154 |
|
|---|
| 155 | /* Create threads within this node to perform the dotproduct */
|
|---|
| 156 | for(i=0;i<numthrds;i++) {
|
|---|
| 157 | pthread_create( &callThd[i], &attr, dotprod, (void *)i);
|
|---|
| 158 | }
|
|---|
| 159 |
|
|---|
| 160 | /* Release the thread attribute handle as it is no longer needed */
|
|---|
| 161 | pthread_attr_destroy(&attr );
|
|---|
| 162 |
|
|---|
| 163 | /* Wait on the other threads within this node */
|
|---|
| 164 | for(i=0;i<numthrds;i++) {
|
|---|
| 165 | pthread_join( callThd[i], &status);
|
|---|
| 166 | }
|
|---|
| 167 |
|
|---|
| 168 | nodesum = dotstr.sum;
|
|---|
| 169 | printf("Task %d node sum is %f\n",myid, nodesum);
|
|---|
| 170 |
|
|---|
| 171 | /* After the dot product, perform a summation of results on each node */
|
|---|
| 172 | MPI_Reduce (&nodesum, &allsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
|
|---|
| 173 |
|
|---|
| 174 | if (myid == 0)
|
|---|
| 175 | printf ("Done. MPI with threads version: sum = %f \n", allsum);
|
|---|
| 176 | #ifdef _CIVL
|
|---|
| 177 | if(myid == 0)
|
|---|
| 178 | _sum=allsum;
|
|---|
| 179 | #endif
|
|---|
| 180 |
|
|---|
| 181 | MPI_Finalize();
|
|---|
| 182 | free (a);
|
|---|
| 183 | free (b);
|
|---|
| 184 | pthread_mutex_destroy(&mutexsum);
|
|---|
| 185 | //exit (0);
|
|---|
| 186 | }
|
|---|