| 1 | /*****************************************************************************
|
|---|
| 2 | * FILE: mpithreads_threads.c
|
|---|
| 3 | * DESCRIPTION:
|
|---|
| 4 | * This simple program illustrates the use of Pthreads in a program obtained
|
|---|
| 5 | * by modifying a serial code that performs a dot product. It is the second
|
|---|
| 6 | * of four codes used to show the progression from a serial program to a
|
|---|
| 7 | * hybrid MPI/Pthreads program. The other relevant codes are:
|
|---|
| 8 | * - mpithreads_serial.c - The serial version
|
|---|
| 9 | * - mpithreads_mpi.c - A distributed memory programming model with MPI
|
|---|
| 10 | * - mpithreads_both.c - A hybrid model that utilizes both MPI and
|
|---|
| 11 | * Pthreads to execute on systems that are comprised of clusters
|
|---|
| 12 | * of SMP's.
|
|---|
| 13 | * The main data is made available to all threads through a globally
|
|---|
| 14 | * accessible structure. Each thread works on a different part of the
|
|---|
| 15 | * data. The main thread waits for all the threads to complete their
|
|---|
| 16 | * computations, and then it prints the resulting sum.
|
|---|
| 17 | * SOURCE: Vijay Sonnad, IBM
|
|---|
| 18 | * LAST REVISED: 01/29/09 Blaise Barney
|
|---|
| 19 | ******************************************************************************/
|
|---|
| 20 | #include <pthread.h>
|
|---|
| 21 | #include <stdio.h>
|
|---|
| 22 | #include <stdlib.h>
|
|---|
| 23 |
|
|---|
| 24 | /*
|
|---|
| 25 | The following structure contains the necessary information to allow the
|
|---|
| 26 | function "dotprod" to access its input data and place its output into
|
|---|
| 27 | the structure. This structure is unchanged from the sequential version.
|
|---|
| 28 | */
|
|---|
| 29 |
|
|---|
| 30 | typedef struct
|
|---|
| 31 | {
|
|---|
| 32 | double *a;
|
|---|
| 33 | double *b;
|
|---|
| 34 | double sum;
|
|---|
| 35 | int veclen;
|
|---|
| 36 | } DOTDATA;
|
|---|
| 37 |
|
|---|
| 38 | /* Define globally accessible variables and a mutex */
|
|---|
| 39 |
|
|---|
| 40 | #ifdef _CIVL
|
|---|
| 41 | $input int MAXTHRDS;
|
|---|
| 42 | $input int VECLEN;
|
|---|
| 43 | $output double _sum;
|
|---|
| 44 | #else
|
|---|
| 45 | #define MAXTHRDS 8
|
|---|
| 46 | #define VECLEN 100
|
|---|
| 47 | #endif
|
|---|
| 48 | DOTDATA dotstr;
|
|---|
| 49 | pthread_t callThd[MAXTHRDS];
|
|---|
| 50 | pthread_mutex_t mutexsum;
|
|---|
| 51 |
|
|---|
| 52 | /*
|
|---|
| 53 | The function dotprod is activated when the thread is created. As before,
|
|---|
| 54 | all input to this routine is obtained from a structure of type DOTDATA and
|
|---|
| 55 | all output from this function is written into this structure. The benefit
|
|---|
| 56 | of this approach is apparent for the multi-threaded program: when a thread
|
|---|
| 57 | is created we pass a single argument to the activated function - typically
|
|---|
| 58 | this argument is a thread number. All the other information required by the
|
|---|
| 59 | function is accessed from the globally accessible structure.
|
|---|
| 60 | */
|
|---|
| 61 |
|
|---|
| 62 | void *dotprod(void *arg)
|
|---|
| 63 | {
|
|---|
| 64 |
|
|---|
| 65 | /* Define and use local variables for convenience */
|
|---|
| 66 |
|
|---|
| 67 | int i, start, end, len ;
|
|---|
| 68 | long offset;
|
|---|
| 69 | double mysum, *x, *y;
|
|---|
| 70 | offset = (long)arg;
|
|---|
| 71 |
|
|---|
| 72 | len = dotstr.veclen;
|
|---|
| 73 | start = offset*len;
|
|---|
| 74 | end = start + len;
|
|---|
| 75 | x = dotstr.a;
|
|---|
| 76 | y = dotstr.b;
|
|---|
| 77 |
|
|---|
| 78 | /*
|
|---|
| 79 | Perform the dot product and assign result to the appropriate variable in
|
|---|
| 80 | the structure.
|
|---|
| 81 | */
|
|---|
| 82 |
|
|---|
| 83 | mysum = 0;
|
|---|
| 84 | for (i=start; i<end ; i++)
|
|---|
| 85 | {
|
|---|
| 86 | mysum += (x[i] * y[i]);
|
|---|
| 87 | }
|
|---|
| 88 |
|
|---|
| 89 | /*
|
|---|
| 90 | Lock a mutex prior to updating the value in the shared structure, and
|
|---|
| 91 | unlock it upon updating.
|
|---|
| 92 | */
|
|---|
| 93 | pthread_mutex_lock (&mutexsum);
|
|---|
| 94 | printf("Thread %ld adding partial sum of %f to global sum of %f\n",
|
|---|
| 95 | arg, mysum, dotstr.sum);
|
|---|
| 96 | dotstr.sum += mysum;
|
|---|
| 97 | pthread_mutex_unlock (&mutexsum);
|
|---|
| 98 |
|
|---|
| 99 | pthread_exit((void*) 0);
|
|---|
| 100 | }
|
|---|
| 101 |
|
|---|
| 102 | /*
|
|---|
| 103 | The main program creates threads which do all the work and then print out
|
|---|
| 104 | result upon completion. Before creating the threads, the input data is
|
|---|
| 105 | created. Since all threads update a shared structure, we need a mutex for
|
|---|
| 106 | mutual exclusion. The main thread needs to wait for all threads to complete,
|
|---|
| 107 | it waits for each one of the threads. We specify a thread attribute value
|
|---|
| 108 | that allow the main thread to join with the threads it creates. Note also
|
|---|
| 109 | that we free up handles when they are no longer needed.
|
|---|
| 110 | */
|
|---|
| 111 |
|
|---|
| 112 | int main (int argc, char *argv[])
|
|---|
| 113 | {
|
|---|
| 114 | long i;
|
|---|
| 115 | double *a, *b;
|
|---|
| 116 | void *status;
|
|---|
| 117 | pthread_attr_t attr;
|
|---|
| 118 |
|
|---|
| 119 | /* Assign storage and initialize values */
|
|---|
| 120 | a = (double*) malloc (MAXTHRDS*VECLEN*sizeof(double));
|
|---|
| 121 | b = (double*) malloc (MAXTHRDS*VECLEN*sizeof(double));
|
|---|
| 122 |
|
|---|
| 123 | for (i=0; i<VECLEN*MAXTHRDS; i++) {
|
|---|
| 124 | a[i]=1;
|
|---|
| 125 | b[i]=a[i];
|
|---|
| 126 | }
|
|---|
| 127 |
|
|---|
| 128 | dotstr.veclen = VECLEN;
|
|---|
| 129 | dotstr.a = a;
|
|---|
| 130 | dotstr.b = b;
|
|---|
| 131 | dotstr.sum=0;
|
|---|
| 132 |
|
|---|
| 133 | pthread_mutex_init(&mutexsum, NULL);
|
|---|
| 134 |
|
|---|
| 135 | /* Create threads to perform the dotproduct */
|
|---|
| 136 | pthread_attr_init(&attr);
|
|---|
| 137 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
|---|
| 138 |
|
|---|
| 139 | for(i=0;i<MAXTHRDS;i++) {
|
|---|
| 140 | /* Each thread works on a different set of data.
|
|---|
| 141 | The offset is specified by 'i'. The size of
|
|---|
| 142 | the data for each thread is indicated by VECLEN.
|
|---|
| 143 | */
|
|---|
| 144 | pthread_create( &callThd[i], &attr, dotprod, (void *)i);
|
|---|
| 145 | }
|
|---|
| 146 |
|
|---|
| 147 | pthread_attr_destroy(&attr);
|
|---|
| 148 |
|
|---|
| 149 | /* Wait on the other threads */
|
|---|
| 150 | for(i=0;i<MAXTHRDS;i++) {
|
|---|
| 151 | pthread_join( callThd[i], &status);
|
|---|
| 152 | }
|
|---|
| 153 |
|
|---|
| 154 | /* After joining, print out the results and cleanup */
|
|---|
| 155 | printf ("Done. Threaded version: sum = %f \n", dotstr.sum);
|
|---|
| 156 | #ifdef _CIVL
|
|---|
| 157 | _sum=dotstr.sum;
|
|---|
| 158 | #endif
|
|---|
| 159 | free (a);
|
|---|
| 160 | free (b);
|
|---|
| 161 | pthread_mutex_destroy(&mutexsum);
|
|---|
| 162 | pthread_exit(NULL);
|
|---|
| 163 | }
|
|---|
| 164 |
|
|---|