| 1 | /*****************************************************************************
|
|---|
| 2 | * FILE: mpithreads_both.c
|
|---|
| 3 | * DESCRIPTION:
|
|---|
| 4 | * This program illustrates the simultaneous use of MPI and Pthreads.
|
|---|
| 5 | * It is essentially a simple combination of a code that implements a dot
|
|---|
| 6 | * product using threads, and a code that uses MPI for the same purpose.
|
|---|
| 7 | * It is the last of four codes used to show the progression from a serial
|
|---|
| 8 | * program to a hybrid MPI/Pthreads program. The other relevant codes are:
|
|---|
| 9 | * - mpithreads_serial.c - The serial version
|
|---|
| 10 | * - mpithreads_threads.c - A shared memory programming model using
|
|---|
| 11 | * Pthreads
|
|---|
| 12 | * - mpithreads_mpi.c - A distributed memory programming model with MPI
|
|---|
| 13 | * All the internode MPI communication is done by the main thread on each
|
|---|
| 14 | * node - the other threads within that node need not even be aware that
|
|---|
| 15 | * internode communication is being performed. Use of the SPMD model for
|
|---|
| 16 | * MPI was chosen for convenience, with replication of the main data on
|
|---|
| 17 | * all nodes. A more memory efficient implementation would be advisable
|
|---|
| 18 | * for larger data sets. This is the simplest model for mixed MPI/Pthreads
|
|---|
| 19 | * programming.
|
|---|
| 20 | * SOURCE: Vijay Sonnad, IBM
|
|---|
| 21 | * LAST REVISED: 01/29/09 Blaise Barney
|
|---|
| 22 | ******************************************************************************/
|
|---|
| 23 | #include "mpi.h"
|
|---|
| 24 | #include <pthread.h>
|
|---|
| 25 | #include <stdio.h>
|
|---|
| 26 | #include <stdlib.h>
|
|---|
| 27 |
|
|---|
| 28 | /*
|
|---|
| 29 | This structure has been changed slightly from the previous cases
|
|---|
| 30 | to include the number of threads per node.
|
|---|
| 31 | */
|
|---|
| 32 |
|
|---|
| 33 | typedef struct
|
|---|
| 34 | {
|
|---|
| 35 | double *a;
|
|---|
| 36 | double *b;
|
|---|
| 37 | double sum;
|
|---|
| 38 | int veclen;
|
|---|
| 39 | int numthrds;
|
|---|
| 40 | } DOTDATA;
|
|---|
| 41 |
|
|---|
| 42 | /* Define globally accessible variables and a mutex */
|
|---|
| 43 |
|
|---|
| 44 | #define MAXTHRDS 2
|
|---|
| 45 | #define VECLEN 5
|
|---|
| 46 | DOTDATA dotstr;
|
|---|
| 47 | pthread_t callThd[MAXTHRDS];
|
|---|
| 48 | pthread_mutex_t mutexsum;
|
|---|
| 49 |
|
|---|
| 50 | /*
|
|---|
| 51 | The function dotprod has only minor changes from the code
|
|---|
| 52 | that used threads or MPI.
|
|---|
| 53 | */
|
|---|
| 54 |
|
|---|
| 55 | void *dotprod(void *arg)
|
|---|
| 56 | {
|
|---|
| 57 |
|
|---|
| 58 | /* Define and use local variables for convenience */
|
|---|
| 59 |
|
|---|
| 60 | int i, start, end, len, numthrds, myid;
|
|---|
| 61 | long mythrd;
|
|---|
| 62 | double mysum, *x, *y;
|
|---|
| 63 |
|
|---|
| 64 | /*
|
|---|
| 65 | The number of threads and nodes defines the beginning
|
|---|
| 66 | and ending for the dot product; each thread does work
|
|---|
| 67 | on a vector of length VECLENGTH.
|
|---|
| 68 | */
|
|---|
| 69 |
|
|---|
| 70 | mythrd = (long)arg;
|
|---|
| 71 | MPI_Comm_rank (MPI_COMM_WORLD, &myid);
|
|---|
| 72 |
|
|---|
| 73 | numthrds = dotstr.numthrds;
|
|---|
| 74 | len = dotstr.veclen;
|
|---|
| 75 | start = myid*numthrds*len + mythrd*len;
|
|---|
| 76 | end = start + len;
|
|---|
| 77 | x = dotstr.a;
|
|---|
| 78 | y = dotstr.b;
|
|---|
| 79 |
|
|---|
| 80 | /*
|
|---|
| 81 | Perform the dot product and assign result
|
|---|
| 82 | to the appropriate variable in the structure.
|
|---|
| 83 | */
|
|---|
| 84 |
|
|---|
| 85 | mysum = 0;
|
|---|
| 86 | for (i=start; i<end ; i++)
|
|---|
| 87 | {
|
|---|
| 88 | mysum += (x[i] * y[i]);
|
|---|
| 89 | }
|
|---|
| 90 |
|
|---|
| 91 | /*
|
|---|
| 92 | Lock a mutex prior to updating the value in the structure, and unlock it
|
|---|
| 93 | upon updating.
|
|---|
| 94 | */
|
|---|
| 95 | pthread_mutex_lock (&mutexsum);
|
|---|
| 96 | printf("Task %d thread %ld adding partial sum of %f to node sum of %f\n",
|
|---|
| 97 | myid, mythrd, mysum, dotstr.sum);
|
|---|
| 98 | dotstr.sum += mysum;
|
|---|
| 99 | pthread_mutex_unlock (&mutexsum);
|
|---|
| 100 |
|
|---|
| 101 | pthread_exit((void*)0);
|
|---|
| 102 | }
|
|---|
| 103 |
|
|---|
| 104 | /*
|
|---|
| 105 | As before,the main program does very little computation. It creates
|
|---|
| 106 | threads on each node and the main thread does all the MPI calls.
|
|---|
| 107 | */
|
|---|
| 108 |
|
|---|
| 109 | int main(int argc, char* argv[])
|
|---|
| 110 | {
|
|---|
| 111 | int len=VECLEN, myid, numprocs;
|
|---|
| 112 | long i;
|
|---|
| 113 | int nump1, numthrds;
|
|---|
| 114 | double *a, *b;
|
|---|
| 115 | double nodesum, allsum;
|
|---|
| 116 | void *status;
|
|---|
| 117 | pthread_attr_t attr;
|
|---|
| 118 |
|
|---|
| 119 | /* MPI Initialization */
|
|---|
| 120 | MPI_Init (&argc, &argv);
|
|---|
| 121 | MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
|
|---|
| 122 | MPI_Comm_rank (MPI_COMM_WORLD, &myid);
|
|---|
| 123 |
|
|---|
| 124 | /* Assign storage and initialize values */
|
|---|
| 125 | numthrds=MAXTHRDS;
|
|---|
| 126 | a = (double*) malloc (numprocs*numthrds*len*sizeof(double));
|
|---|
| 127 | b = (double*) malloc (numprocs*numthrds*len*sizeof(double));
|
|---|
| 128 |
|
|---|
| 129 | for (i=0; i<len*numprocs*numthrds; i++) {
|
|---|
| 130 | a[i]=1;
|
|---|
| 131 | b[i]=a[i];
|
|---|
| 132 | }
|
|---|
| 133 |
|
|---|
| 134 | dotstr.veclen = len;
|
|---|
| 135 | dotstr.a = a;
|
|---|
| 136 | dotstr.b = b;
|
|---|
| 137 | dotstr.sum=0;
|
|---|
| 138 | dotstr.numthrds=MAXTHRDS;
|
|---|
| 139 |
|
|---|
| 140 | /*
|
|---|
| 141 | Create thread attribute to specify that the main thread needs
|
|---|
| 142 | to join with the threads it creates.
|
|---|
| 143 | */
|
|---|
| 144 | pthread_attr_init(&attr );
|
|---|
| 145 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
|---|
| 146 |
|
|---|
| 147 | /* Create a mutex */
|
|---|
| 148 | pthread_mutex_init (&mutexsum, NULL);
|
|---|
| 149 |
|
|---|
| 150 | /* Create threads within this node to perform the dotproduct */
|
|---|
| 151 | for(i=0;i<numthrds;i++) {
|
|---|
| 152 | pthread_create( &callThd[i], &attr, dotprod, (void *)i);
|
|---|
| 153 | }
|
|---|
| 154 |
|
|---|
| 155 | /* Release the thread attribute handle as it is no longer needed */
|
|---|
| 156 | pthread_attr_destroy(&attr );
|
|---|
| 157 |
|
|---|
| 158 | /* Wait on the other threads within this node */
|
|---|
| 159 | for(i=0;i<numthrds;i++) {
|
|---|
| 160 | pthread_join( callThd[i], &status);
|
|---|
| 161 | }
|
|---|
| 162 |
|
|---|
| 163 | nodesum = dotstr.sum;
|
|---|
| 164 | printf("Task %d node sum is %f\n",myid, nodesum);
|
|---|
| 165 |
|
|---|
| 166 | /* After the dot product, perform a summation of results on each node */
|
|---|
| 167 | MPI_Reduce (&nodesum, &allsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
|
|---|
| 168 |
|
|---|
| 169 | if (myid == 0)
|
|---|
| 170 | printf ("Done. MPI with threads version: sum = %f \n", allsum);
|
|---|
| 171 | MPI_Finalize();
|
|---|
| 172 | free (a);
|
|---|
| 173 | free (b);
|
|---|
| 174 | pthread_mutex_destroy(&mutexsum);
|
|---|
| 175 | //exit (0);
|
|---|
| 176 | return 0;
|
|---|
| 177 | }
|
|---|