| 1 | /*****************************************************************************
|
|---|
| 2 | * SOURCE: This is a translation of a Pthread program from the Lawrence Livermore
|
|---|
| 3 | * Computing Center POSIX Threads Programming Exercise at:
|
|---|
| 4 | * https://computing.llnl.gov/tutorials/pthreads/exercise.html
|
|---|
| 5 | * FILE: mpithreads_threads.cvl
|
|---|
| 6 | * DESCRIPTION:
|
|---|
| 7 | * This simple program illustrates the use of Pthreads in a program obtained
|
|---|
| 8 | * by modifying a serial code that performs a dot product. It is the second
|
|---|
| 9 | * of four codes used to show the progression from a serial program to a
|
|---|
| 10 | * hybrid MPI/Pthreads program. The other relevant codes are:
|
|---|
| 11 | * - mpithreads_serial.c - The serial version
|
|---|
| 12 | * - mpithreads_mpi.c - A distributed memory programming model with MPI
|
|---|
| 13 | * - mpithreads_both.c - A hybrid model that utilizes both MPI and
|
|---|
| 14 | * Pthreads to execute on systems that are comprised of clusters
|
|---|
| 15 | * of SMP's.
|
|---|
| 16 | * The main data is made available to all threads through a globally
|
|---|
| 17 | * accessible structure. Each thread works on a different part of the
|
|---|
| 18 | * data. The main thread waits for all the threads to complete their
|
|---|
| 19 | * computations, and then it prints the resulting sum.
|
|---|
| 20 | * Command line execution:
|
|---|
| 21 | * civl verify -inputMAXTHRDS=8 -inputVECLEN=100 mpithreads_thread.cvl
|
|---|
| 22 | ******************************************************************************/
|
|---|
| 23 | #include "pthread.cvh"
|
|---|
| 24 | #include <civlc.h>
|
|---|
| 25 | #include <stdio.h>
|
|---|
| 26 | #include <stdlib.h>
|
|---|
| 27 |
|
|---|
| 28 | /*
|
|---|
| 29 | The following structure contains the necessary information to allow the
|
|---|
| 30 | function "dotprod" to access its input data and place its output into
|
|---|
| 31 | the structure. This structure is unchanged from the sequential version.
|
|---|
| 32 | */
|
|---|
| 33 |
|
|---|
| 34 | typedef struct
|
|---|
| 35 | {
|
|---|
| 36 | double *a;
|
|---|
| 37 | double *b;
|
|---|
| 38 | double sum;
|
|---|
| 39 | int veclen;
|
|---|
| 40 | } DOTDATA;
|
|---|
| 41 |
|
|---|
| 42 | /* Define globally accessible variables and a mutex */
|
|---|
| 43 |
|
|---|
| 44 | $input int MAXTHRDS;
|
|---|
| 45 | $input int VECLEN;
|
|---|
| 46 | DOTDATA dotstr;
|
|---|
| 47 | pthread_t callThd[MAXTHRDS];
|
|---|
| 48 | pthread_mutex_t mutexsum;
|
|---|
| 49 |
|
|---|
| 50 | /*
|
|---|
| 51 | The function dotprod is activated when the thread is created. As before,
|
|---|
| 52 | all input to this routine is obtained from a structure of type DOTDATA and
|
|---|
| 53 | all output from this function is written into this structure. The benefit
|
|---|
| 54 | of this approach is apparent for the multi-threaded program: when a thread
|
|---|
| 55 | is created we pass a single argument to the activated function - typically
|
|---|
| 56 | this argument is a thread number. All the other information required by the
|
|---|
| 57 | function is accessed from the globally accessible structure.
|
|---|
| 58 | */
|
|---|
| 59 |
|
|---|
| 60 | void *dotprod(void *arg)
|
|---|
| 61 | {
|
|---|
| 62 |
|
|---|
| 63 | /* Define and use local variables for convenience */
|
|---|
| 64 |
|
|---|
| 65 | int i, start, end, len ;
|
|---|
| 66 | long offset;
|
|---|
| 67 | double mysum, *x, *y;
|
|---|
| 68 | offset = (long)*arg;
|
|---|
| 69 |
|
|---|
| 70 | len = dotstr.veclen;
|
|---|
| 71 | start = offset*len;
|
|---|
| 72 | end = start + len;
|
|---|
| 73 | x = dotstr.a;
|
|---|
| 74 | y = dotstr.b;
|
|---|
| 75 |
|
|---|
| 76 | /*
|
|---|
| 77 | Perform the dot product and assign result to the appropriate variable in
|
|---|
| 78 | the structure.
|
|---|
| 79 | */
|
|---|
| 80 |
|
|---|
| 81 | mysum = 0;
|
|---|
| 82 | for (i=start; i<end ; i++)
|
|---|
| 83 | {
|
|---|
| 84 | mysum += (x[i] * y[i]);
|
|---|
| 85 | }
|
|---|
| 86 |
|
|---|
| 87 | /*
|
|---|
| 88 | Lock a mutex prior to updating the value in the shared structure, and
|
|---|
| 89 | unlock it upon updating.
|
|---|
| 90 | */
|
|---|
| 91 | pthread_mutex_lock (&mutexsum);
|
|---|
| 92 | printf("Thread %d adding partial sum of %f to global sum of %f\n",
|
|---|
| 93 | arg, mysum, dotstr.sum); // Removed l from %ld
|
|---|
| 94 | dotstr.sum += mysum;
|
|---|
| 95 | pthread_mutex_unlock (&mutexsum);
|
|---|
| 96 |
|
|---|
| 97 | pthread_exit(NULL, false, NULL, 0);
|
|---|
| 98 | }
|
|---|
| 99 |
|
|---|
| 100 | /*
|
|---|
| 101 | The main program creates threads which do all the work and then print out
|
|---|
| 102 | result upon completion. Before creating the threads, the input data is
|
|---|
| 103 | created. Since all threads update a shared structure, we need a mutex for
|
|---|
| 104 | mutual exclusion. The main thread needs to wait for all threads to complete,
|
|---|
| 105 | it waits for each one of the threads. We specify a thread attribute value
|
|---|
| 106 | that allow the main thread to join with the threads it creates. Note also
|
|---|
| 107 | that we free up handles when they are no longer needed.
|
|---|
| 108 | */
|
|---|
| 109 |
|
|---|
| 110 | int main (void)
|
|---|
| 111 | {
|
|---|
| 112 | long t[MAXTHRDS];
|
|---|
| 113 | long i;
|
|---|
| 114 | double *a, *b;
|
|---|
| 115 | void *status;
|
|---|
| 116 | pthread_attr_t attr;
|
|---|
| 117 |
|
|---|
| 118 | /* Assign storage and initialize values */
|
|---|
| 119 | a = (double*) malloc (MAXTHRDS*VECLEN*sizeof(double));
|
|---|
| 120 | b = (double*) malloc (MAXTHRDS*VECLEN*sizeof(double));
|
|---|
| 121 |
|
|---|
| 122 | for (i=0; i<VECLEN*MAXTHRDS; i++) {
|
|---|
| 123 | a[i]=1;
|
|---|
| 124 | b[i]=a[i];
|
|---|
| 125 | }
|
|---|
| 126 |
|
|---|
| 127 | dotstr.veclen = VECLEN;
|
|---|
| 128 | dotstr.a = a;
|
|---|
| 129 | dotstr.b = b;
|
|---|
| 130 | dotstr.sum=0;
|
|---|
| 131 |
|
|---|
| 132 | pthread_mutex_init(&mutexsum, NULL);
|
|---|
| 133 |
|
|---|
| 134 | /* Create threads to perform the dotproduct */
|
|---|
| 135 | pthread_attr_init(&attr);
|
|---|
| 136 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
|---|
| 137 |
|
|---|
| 138 | for(i=0;i<MAXTHRDS;i++) {
|
|---|
| 139 | /* Each thread works on a different set of data.
|
|---|
| 140 | The offset is specified by 'i'. The size of
|
|---|
| 141 | the data for each thread is indicated by VECLEN.
|
|---|
| 142 | */
|
|---|
| 143 | t[i] = i;
|
|---|
| 144 | pthread_create( &callThd[i], &attr, dotprod, (void *)&t[i]);
|
|---|
| 145 | }
|
|---|
| 146 |
|
|---|
| 147 | pthread_attr_destroy(&attr);
|
|---|
| 148 |
|
|---|
| 149 | /* Wait on the other threads */
|
|---|
| 150 | for(i=0;i<MAXTHRDS;i++) {
|
|---|
| 151 | pthread_join( callThd[i], &status);
|
|---|
| 152 | }
|
|---|
| 153 |
|
|---|
| 154 | /* After joining, print out the results and cleanup */
|
|---|
| 155 | printf ("Done. Threaded version: sum = %f \n", dotstr.sum);
|
|---|
| 156 | free (a);
|
|---|
| 157 | free (b);
|
|---|
| 158 | pthread_mutex_destroy(&mutexsum);
|
|---|
| 159 | pthread_exit(NULL, true, NULL, 0);
|
|---|
| 160 | return 0;
|
|---|
| 161 | }
|
|---|
| 162 |
|
|---|
| 163 |
|
|---|