| [f2097b0] | 1 | /*
|
|---|
| 2 | OpenMP implementation of matrix multiplication. Each thread takes care
|
|---|
| 3 | a chunk of rows.
|
|---|
| 4 |
|
|---|
| 5 | Compile with gcc -O3 -fopenmp omp_matrixmult.c -o omp_matrixmult
|
|---|
| 6 | */
|
|---|
| 7 | // Online source: http://users.abo.fi/mats/PP2012/examples/OpenMP/omp_critical.c
|
|---|
| 8 | // permission obtained
|
|---|
| 9 |
|
|---|
| 10 | #include <omp.h>
|
|---|
| 11 | #include <stdio.h>
|
|---|
| 12 | #include <stdlib.h>
|
|---|
| 13 |
|
|---|
| 14 | /* Number of threads used */
|
|---|
| 15 | #define NR_THREADS 4
|
|---|
| 16 |
|
|---|
| [20ac35f] | 17 | #ifdef _CIVL
|
|---|
| 18 | #define DEBUG 1
|
|---|
| 19 | $input int NRA=10; // number of rows in matrix A
|
|---|
| 20 | $input int NCA=10; // number of columns in matrix A
|
|---|
| 21 | $input int NCB=10; // number of columns in matrix B
|
|---|
| 22 | #else
|
|---|
| [f2097b0] | 23 | #define DEBUG 0
|
|---|
| 24 | #define NRA 1400 // number of rows in matrix A
|
|---|
| 25 | #define NCA 1400 // number of columns in matrix A
|
|---|
| 26 | #define NCB 1400 // number of columns in matrix B
|
|---|
| [20ac35f] | 27 | #endif
|
|---|
| [f2097b0] | 28 |
|
|---|
| 29 | int main (int argc, char *argv[]) {
|
|---|
| 30 | int tid, nthreads, i, j, k;
|
|---|
| 31 | double **a, **b, **c;
|
|---|
| 32 | double *a_block, *b_block, *c_block;
|
|---|
| 33 | double **res;
|
|---|
| 34 | double *res_block;
|
|---|
| 35 | double starttime, stoptime;
|
|---|
| 36 |
|
|---|
| 37 | a = (double **) malloc(NRA*sizeof(double *)); /* matrix a to be multiplied */
|
|---|
| 38 | b = (double **) malloc(NCA*sizeof(double *)); /* matrix b to be multiplied */
|
|---|
| 39 | c = (double **) malloc(NRA*sizeof(double *)); /* result matrix c */
|
|---|
| 40 |
|
|---|
| 41 | a_block = (double *) malloc(NRA*NCA*sizeof(double)); /* Storage for matrices */
|
|---|
| 42 | b_block = (double *) malloc(NCA*NCB*sizeof(double));
|
|---|
| 43 | c_block = (double *) malloc(NRA*NCB*sizeof(double));
|
|---|
| 44 |
|
|---|
| 45 | /* Result matrix for the sequential algorithm */
|
|---|
| 46 | res = (double **) malloc(NRA*sizeof(double *));
|
|---|
| 47 | res_block = (double *) malloc(NRA*NCB*sizeof(double));
|
|---|
| 48 |
|
|---|
| 49 | for (i=0; i<NRA; i++) /* Initialize pointers to a */
|
|---|
| 50 | a[i] = a_block+i*NRA;
|
|---|
| 51 |
|
|---|
| 52 | for (i=0; i<NCA; i++) /* Initialize pointers to b */
|
|---|
| 53 | b[i] = b_block+i*NCA;
|
|---|
| 54 |
|
|---|
| 55 | for (i=0; i<NRA; i++) /* Initialize pointers to c */
|
|---|
| 56 | c[i] = c_block+i*NRA;
|
|---|
| 57 |
|
|---|
| 58 | for (i=0; i<NRA; i++) /* Initialize pointers to res */
|
|---|
| 59 | res[i] = res_block+i*NRA;
|
|---|
| 60 |
|
|---|
| 61 | /* A static allocation of the matrices would be done like this */
|
|---|
| 62 | /* double a[NRA][NCA], b[NCA][NCB], c[NRA][NCB]; */
|
|---|
| 63 |
|
|---|
| 64 | /*** Spawn a parallel region explicitly scoping all variables ***/
|
|---|
| 65 | #pragma omp parallel shared(a,b,c,nthreads) private(tid,i,j,k) num_threads(NR_THREADS)
|
|---|
| 66 | {
|
|---|
| 67 | tid = omp_get_thread_num();
|
|---|
| 68 | if (tid == 0) { /* Only thread 0 prints */
|
|---|
| 69 | nthreads = omp_get_num_threads();
|
|---|
| 70 | printf("Starting matrix multiplication with %d threads\n",nthreads);
|
|---|
| 71 | printf("Initializing matrices...\n");
|
|---|
| 72 | }
|
|---|
| 73 | /*** Initialize matrices ***/
|
|---|
| 74 | #pragma omp for nowait /* No need to synchronize the threads before the */
|
|---|
| 75 | for (i=0; i<NRA; i++) /* last matrix has been initialized */
|
|---|
| 76 | for (j=0; j<NCA; j++)
|
|---|
| 77 | a[i][j]= (double) (i+j);
|
|---|
| 78 | #pragma omp for nowait
|
|---|
| 79 | for (i=0; i<NCA; i++)
|
|---|
| 80 | for (j=0; j<NCB; j++)
|
|---|
| 81 | b[i][j]= (double) (i*j);
|
|---|
| 82 | #pragma omp for /* We synchronize the threads after this */
|
|---|
| 83 | for (i=0; i<NRA; i++)
|
|---|
| 84 | for (j=0; j<NCB; j++)
|
|---|
| 85 | c[i][j]= 0.0;
|
|---|
| 86 |
|
|---|
| 87 | if (tid == 0) /* Thread zero measures time */
|
|---|
| 88 | starttime = omp_get_wtime(); /* Master thread measures the execution time */
|
|---|
| 89 |
|
|---|
| 90 | /* Do matrix multiply sharing iterations on outer loop */
|
|---|
| 91 | /* If DEBUG is TRUE display who does which iterations */
|
|---|
| 92 | /* printf("Thread %d starting matrix multiply...\n",tid); */
|
|---|
| 93 | #pragma omp for
|
|---|
| 94 | for (i=0; i<NRA; i++) {
|
|---|
| 95 | if (DEBUG) printf("Thread=%d did row=%d\n",tid,i);
|
|---|
| 96 | for(j=0; j<NCB; j++) {
|
|---|
| 97 | for (k=0; k<NCA; k++) {
|
|---|
| 98 | c[i][j] += a[i][k] * b[k][j];
|
|---|
| 99 | }
|
|---|
| 100 | }
|
|---|
| 101 | }
|
|---|
| 102 |
|
|---|
| 103 | if (tid == 0) {
|
|---|
| 104 | stoptime = omp_get_wtime();
|
|---|
| 105 | printf("Time for parallel matrix multiplication: %3.2f s\n",
|
|---|
| 106 | stoptime-starttime);
|
|---|
| 107 | }
|
|---|
| 108 | } /*** End of parallel region ***/
|
|---|
| 109 |
|
|---|
| 110 | starttime = omp_get_wtime();
|
|---|
| 111 | /* Do a sequential matrix multiplication and compare the results */
|
|---|
| 112 | for (i=0; i<NRA; i++) {
|
|---|
| 113 | for (j=0; j<NCB; j++) {
|
|---|
| 114 | res[i][j] = 0.0;
|
|---|
| 115 | for (k=0; k<NCA; k++)
|
|---|
| 116 | res[i][j] += a[i][k]*b[k][j];
|
|---|
| 117 | }
|
|---|
| 118 | }
|
|---|
| 119 | stoptime = omp_get_wtime();
|
|---|
| 120 | printf("Time for sequential matrix multiplication: %3.2f s\n", stoptime-starttime);
|
|---|
| 121 |
|
|---|
| 122 | /* Check that the results are the same as in the parallel solution.
|
|---|
| 123 | Actually, you should not compare floating point values for equality like this
|
|---|
| 124 | but instead compute the difference between the two values and check that it
|
|---|
| 125 | is smaller than a very small value epsilon. However, since all values in the
|
|---|
| 126 | matrices here are integer values, this will work.
|
|---|
| 127 | */
|
|---|
| 128 | for (i=0; i<NRA; i++) {
|
|---|
| 129 | for (j=0; j<NCB; j++) {
|
|---|
| 130 | if (res[i][j] == c[i][j]) {
|
|---|
| 131 | /* Everything is OK if they are equal */
|
|---|
| 132 | }
|
|---|
| 133 | else {
|
|---|
| 134 | printf("Different result %5.1f != %5.1f in %d %d\n ", res[i][j], c[i][j], i, j);
|
|---|
| 135 | }
|
|---|
| 136 | }
|
|---|
| 137 | }
|
|---|
| 138 |
|
|---|
| 139 | /* If DEBUG is true, print the results. Usa smaller matrices for this */
|
|---|
| 140 | if (DEBUG) {
|
|---|
| 141 | printf("Result Matrix:\n");
|
|---|
| 142 | for (i=0; i<NRA; i++) {
|
|---|
| 143 | for (j=0; j<NCB; j++)
|
|---|
| 144 | printf("%6.1f ", c[i][j]);
|
|---|
| 145 | printf("\n");
|
|---|
| 146 | }
|
|---|
| 147 | }
|
|---|
| 148 |
|
|---|
| 149 | printf ("Done.\n");
|
|---|
| 150 | exit(0);
|
|---|
| 151 | }
|
|---|