| 1 | /*****************************************************************************
|
|---|
| 2 | * FILE: mpithreads_both.c
|
|---|
| 3 | * DESCRIPTION:
|
|---|
| 4 | * This program illustrates the simultaneous use of MPI and Pthreads.
|
|---|
| 5 | * It is essentially a simple combination of a code that implements a dot
|
|---|
| 6 | * product using threads, and a code that uses MPI for the same purpose.
|
|---|
| 7 | * It is the last of four codes used to show the progression from a serial
|
|---|
| 8 | * program to a hybrid MPI/Pthreads program. The other relevant codes are:
|
|---|
| 9 | * - mpithreads_serial.c - The serial version
|
|---|
| 10 | * - mpithreads_threads.c - A shared memory programming model using
|
|---|
| 11 | * Pthreads
|
|---|
| 12 | * - mpithreads_mpi.c - A distributed memory programming model with MPI
|
|---|
| 13 | * All the internode MPI communication is done by the main thread on each
|
|---|
| 14 | * node - the other threads within that node need not even be aware that
|
|---|
| 15 | * internode communication is being performed. Use of the SPMD model for
|
|---|
| 16 | * MPI was chosen for convenience, with replication of the main data on
|
|---|
| 17 | * all nodes. A more memory efficient implementation would be advisable
|
|---|
| 18 | * for larger data sets. This is the simplest model for mixed MPI/Pthreads
|
|---|
| 19 | * programming.
|
|---|
| 20 | * SOURCE: Vijay Sonnad, IBM
|
|---|
| 21 | * LAST REVISED: 01/29/09 Blaise Barney
|
|---|
| 22 | ******************************************************************************/
|
|---|
| 23 | #include "mpi.h"
|
|---|
| 24 | #include <pthread.h>
|
|---|
| 25 | #include <stdio.h>
|
|---|
| 26 | #include <stdlib.h>
|
|---|
| 27 |
|
|---|
| 28 | /*
|
|---|
| 29 | This structure has been changed slightly from the previous cases
|
|---|
| 30 | to include the number of threads per node.
|
|---|
| 31 | */
|
|---|
| 32 |
|
|---|
| 33 | typedef struct
|
|---|
| 34 | {
|
|---|
| 35 | double *a;
|
|---|
| 36 | double *b;
|
|---|
| 37 | double sum;
|
|---|
| 38 | int veclen;
|
|---|
| 39 | int numthrds;
|
|---|
| 40 | } DOTDATA;
|
|---|
| 41 |
|
|---|
| 42 | /* Define globally accessible variables and a mutex */
|
|---|
| 43 | #ifdef _CIVL
|
|---|
| 44 | $input int MAXTHRDS;
|
|---|
| 45 | $input int VECLEN;
|
|---|
| 46 | $output double _sum;
|
|---|
| 47 | #else
|
|---|
| 48 | #define MAXTHRDS 8
|
|---|
| 49 | #define VECLEN 100
|
|---|
| 50 | #endif
|
|---|
| 51 | DOTDATA dotstr;
|
|---|
| 52 | pthread_t callThd[MAXTHRDS];
|
|---|
| 53 | pthread_mutex_t mutexsum;
|
|---|
| 54 |
|
|---|
| 55 | /*
|
|---|
| 56 | The function dotprod has only minor changes from the code
|
|---|
| 57 | that used threads or MPI.
|
|---|
| 58 | */
|
|---|
| 59 |
|
|---|
| 60 | void *dotprod(void *arg)
|
|---|
| 61 | {
|
|---|
| 62 |
|
|---|
| 63 | /* Define and use local variables for convenience */
|
|---|
| 64 |
|
|---|
| 65 | int i, start, end, len, numthrds, myid, sigSize1, sigSize2, max;
|
|---|
| 66 | long mythrd;
|
|---|
| 67 | double mysum, *x, *y;
|
|---|
| 68 |
|
|---|
| 69 | /*
|
|---|
| 70 | The number of threads and nodes defines the beginning
|
|---|
| 71 | and ending for the dot product; each thread does work
|
|---|
| 72 | on a vector of length VECLENGTH.
|
|---|
| 73 | */
|
|---|
| 74 |
|
|---|
| 75 | mythrd = (int)(arg[0]);
|
|---|
| 76 | sigSize1 = (int)(arg[1]);
|
|---|
| 77 | sigSize2 = (int)(arg[2]);
|
|---|
| 78 | free(arg);
|
|---|
| 79 | MPI_Comm_rank (MPI_COMM_WORLD, &myid);
|
|---|
| 80 |
|
|---|
| 81 | numthrds = dotstr.numthrds;
|
|---|
| 82 | len = dotstr.veclen;
|
|---|
| 83 | max = (myid+1)*sigSize1;
|
|---|
| 84 | max = max > len ? len : max;
|
|---|
| 85 | start = myid*sigSize1 + mythrd*sigSize2;
|
|---|
| 86 | start = start > max ? max : start;
|
|---|
| 87 | end = start + sigSize2;
|
|---|
| 88 | end = end > max ? max : end;
|
|---|
| 89 | x = dotstr.a;
|
|---|
| 90 | y = dotstr.b;
|
|---|
| 91 |
|
|---|
| 92 | /*
|
|---|
| 93 | Perform the dot product and assign result
|
|---|
| 94 | to the appropriate variable in the structure.
|
|---|
| 95 | */
|
|---|
| 96 |
|
|---|
| 97 | mysum = 0;
|
|---|
| 98 | for (i=start; i<end ; i++)
|
|---|
| 99 | {
|
|---|
| 100 | mysum += (x[i] * y[i]);
|
|---|
| 101 | }
|
|---|
| 102 |
|
|---|
| 103 | /*
|
|---|
| 104 | Lock a mutex prior to updating the value in the structure, and unlock it
|
|---|
| 105 | upon updating.
|
|---|
| 106 | */
|
|---|
| 107 | pthread_mutex_lock (&mutexsum);
|
|---|
| 108 | printf("Task %d thread %ld adding partial sum of %f to node sum of %f\n",
|
|---|
| 109 | myid, mythrd, mysum, dotstr.sum);
|
|---|
| 110 | dotstr.sum += mysum;
|
|---|
| 111 | pthread_mutex_unlock (&mutexsum);
|
|---|
| 112 |
|
|---|
| 113 | pthread_exit((void*)0);
|
|---|
| 114 | }
|
|---|
| 115 |
|
|---|
| 116 | /*
|
|---|
| 117 | As before,the main program does very little computation. It creates
|
|---|
| 118 | threads on each node and the main thread does all the MPI calls.
|
|---|
| 119 | */
|
|---|
| 120 |
|
|---|
| 121 | int main(int argc, char* argv[])
|
|---|
| 122 | {
|
|---|
| 123 | int len=VECLEN, myid, numprocs;
|
|---|
| 124 | long i;
|
|---|
| 125 | int nump1, numthrds, segSize1, segSize2;
|
|---|
| 126 | double *a, *b;
|
|---|
| 127 | double nodesum, allsum;
|
|---|
| 128 | void *status;
|
|---|
| 129 | pthread_attr_t attr;
|
|---|
| 130 |
|
|---|
| 131 | /* MPI Initialization */
|
|---|
| 132 | MPI_Init (&argc, &argv);
|
|---|
| 133 | MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
|
|---|
| 134 | MPI_Comm_rank (MPI_COMM_WORLD, &myid);
|
|---|
| 135 |
|
|---|
| 136 | /* Assign storage and initialize values */
|
|---|
| 137 | numthrds=MAXTHRDS;
|
|---|
| 138 | a = (double*) malloc (len*sizeof(double));
|
|---|
| 139 | b = (double*) malloc (len*sizeof(double));
|
|---|
| 140 | if (len % numprocs == 0)
|
|---|
| 141 | segSize1 = len / numprocs;
|
|---|
| 142 | else
|
|---|
| 143 | segSize1 = len / numprocs + 1;
|
|---|
| 144 | if (segSize1 % numthrds == 0)
|
|---|
| 145 | segSize2 = segSize1 / numthrds;
|
|---|
| 146 | else
|
|---|
| 147 | segSize2 = segSize1 / numthrds + 1;
|
|---|
| 148 |
|
|---|
| 149 | for (i=0; i<len; i++) {
|
|---|
| 150 | a[i]=1;
|
|---|
| 151 | b[i]=a[i];
|
|---|
| 152 | }
|
|---|
| 153 |
|
|---|
| 154 | dotstr.veclen = len;
|
|---|
| 155 | dotstr.a = a;
|
|---|
| 156 | dotstr.b = b;
|
|---|
| 157 | dotstr.sum=0;
|
|---|
| 158 | dotstr.numthrds=MAXTHRDS;
|
|---|
| 159 |
|
|---|
| 160 | /*
|
|---|
| 161 | Create thread attribute to specify that the main thread needs
|
|---|
| 162 | to join with the threads it creates.
|
|---|
| 163 | */
|
|---|
| 164 | pthread_attr_init(&attr );
|
|---|
| 165 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
|---|
| 166 |
|
|---|
| 167 | /* Create a mutex */
|
|---|
| 168 | pthread_mutex_init (&mutexsum, NULL);
|
|---|
| 169 |
|
|---|
| 170 | /* Create threads within this node to perform the dotproduct */
|
|---|
| 171 | for(i=0;i<numthrds;i++) {
|
|---|
| 172 | int* args = (int *)malloc(3 * sizeof(int));
|
|---|
| 173 | args[0] = i;
|
|---|
| 174 | args[1] = segSize1;
|
|---|
| 175 | args[2] = segSize2;
|
|---|
| 176 | pthread_create( &callThd[i], &attr, dotprod, (void*)args);
|
|---|
| 177 | }
|
|---|
| 178 |
|
|---|
| 179 | /* Release the thread attribute handle as it is no longer needed */
|
|---|
| 180 | pthread_attr_destroy(&attr );
|
|---|
| 181 |
|
|---|
| 182 | /* Wait on the other threads within this node */
|
|---|
| 183 | for(i=0;i<numthrds;i++) {
|
|---|
| 184 | pthread_join( callThd[i], &status);
|
|---|
| 185 | }
|
|---|
| 186 |
|
|---|
| 187 | nodesum = dotstr.sum;
|
|---|
| 188 | printf("Task %d node sum is %f\n",myid, nodesum);
|
|---|
| 189 |
|
|---|
| 190 | /* After the dot product, perform a summation of results on each node */
|
|---|
| 191 | MPI_Reduce (&nodesum, &allsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
|
|---|
| 192 |
|
|---|
| 193 | if (myid == 0)
|
|---|
| 194 | printf ("Done. MPI with threads version: sum = %f \n", allsum);
|
|---|
| 195 | #ifdef _CIVL
|
|---|
| 196 | if(myid == 0)
|
|---|
| 197 | _sum=allsum;
|
|---|
| 198 | #endif
|
|---|
| 199 |
|
|---|
| 200 | MPI_Finalize();
|
|---|
| 201 | free (a);
|
|---|
| 202 | free (b);
|
|---|
| 203 | pthread_mutex_destroy(&mutexsum);
|
|---|
| 204 | //exit (0);
|
|---|
| 205 | }
|
|---|