source: CIVL/examples/omp/matProduct2.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 4.8 KB
RevLine 
[f2097b0]1/*
2 OpenMP implementation of matrix multiplication. Each thread takes care
3 a chunk of rows.
4
5 Compile with gcc -O3 -fopenmp omp_matrixmult.c -o omp_matrixmult
6*/
7// Online source: http://users.abo.fi/mats/PP2012/examples/OpenMP/omp_critical.c
8// permission obtained
9
10#include <omp.h>
11#include <stdio.h>
12#include <stdlib.h>
13
14/* Number of threads used */
15#define NR_THREADS 4
16
[20ac35f]17#ifdef _CIVL
18#define DEBUG 1
19$input int NRA=10; // number of rows in matrix A
20$input int NCA=10; // number of columns in matrix A
21$input int NCB=10; // number of columns in matrix B
22#else
[f2097b0]23#define DEBUG 0
24#define NRA 1400 // number of rows in matrix A
25#define NCA 1400 // number of columns in matrix A
26#define NCB 1400 // number of columns in matrix B
[20ac35f]27#endif
[f2097b0]28
29int main (int argc, char *argv[]) {
30 int tid, nthreads, i, j, k;
31 double **a, **b, **c;
32 double *a_block, *b_block, *c_block;
33 double **res;
34 double *res_block;
35 double starttime, stoptime;
36
37 a = (double **) malloc(NRA*sizeof(double *)); /* matrix a to be multiplied */
38 b = (double **) malloc(NCA*sizeof(double *)); /* matrix b to be multiplied */
39 c = (double **) malloc(NRA*sizeof(double *)); /* result matrix c */
40
41 a_block = (double *) malloc(NRA*NCA*sizeof(double)); /* Storage for matrices */
42 b_block = (double *) malloc(NCA*NCB*sizeof(double));
43 c_block = (double *) malloc(NRA*NCB*sizeof(double));
44
45 /* Result matrix for the sequential algorithm */
46 res = (double **) malloc(NRA*sizeof(double *));
47 res_block = (double *) malloc(NRA*NCB*sizeof(double));
48
49 for (i=0; i<NRA; i++) /* Initialize pointers to a */
50 a[i] = a_block+i*NRA;
51
52 for (i=0; i<NCA; i++) /* Initialize pointers to b */
53 b[i] = b_block+i*NCA;
54
55 for (i=0; i<NRA; i++) /* Initialize pointers to c */
56 c[i] = c_block+i*NRA;
57
58 for (i=0; i<NRA; i++) /* Initialize pointers to res */
59 res[i] = res_block+i*NRA;
60
61 /* A static allocation of the matrices would be done like this */
62 /* double a[NRA][NCA], b[NCA][NCB], c[NRA][NCB]; */
63
64 /*** Spawn a parallel region explicitly scoping all variables ***/
65#pragma omp parallel shared(a,b,c,nthreads) private(tid,i,j,k) num_threads(NR_THREADS)
66 {
67 tid = omp_get_thread_num();
68 if (tid == 0) { /* Only thread 0 prints */
69 nthreads = omp_get_num_threads();
70 printf("Starting matrix multiplication with %d threads\n",nthreads);
71 printf("Initializing matrices...\n");
72 }
73 /*** Initialize matrices ***/
74#pragma omp for nowait /* No need to synchronize the threads before the */
75 for (i=0; i<NRA; i++) /* last matrix has been initialized */
76 for (j=0; j<NCA; j++)
77 a[i][j]= (double) (i+j);
78#pragma omp for nowait
79 for (i=0; i<NCA; i++)
80 for (j=0; j<NCB; j++)
81 b[i][j]= (double) (i*j);
82#pragma omp for /* We synchronize the threads after this */
83 for (i=0; i<NRA; i++)
84 for (j=0; j<NCB; j++)
85 c[i][j]= 0.0;
86
87 if (tid == 0) /* Thread zero measures time */
88 starttime = omp_get_wtime(); /* Master thread measures the execution time */
89
90 /* Do matrix multiply sharing iterations on outer loop */
91 /* If DEBUG is TRUE display who does which iterations */
92 /* printf("Thread %d starting matrix multiply...\n",tid); */
93#pragma omp for
94 for (i=0; i<NRA; i++) {
95 if (DEBUG) printf("Thread=%d did row=%d\n",tid,i);
96 for(j=0; j<NCB; j++) {
97 for (k=0; k<NCA; k++) {
98 c[i][j] += a[i][k] * b[k][j];
99 }
100 }
101 }
102
103 if (tid == 0) {
104 stoptime = omp_get_wtime();
105 printf("Time for parallel matrix multiplication: %3.2f s\n",
106 stoptime-starttime);
107 }
108 } /*** End of parallel region ***/
109
110 starttime = omp_get_wtime();
111 /* Do a sequential matrix multiplication and compare the results */
112 for (i=0; i<NRA; i++) {
113 for (j=0; j<NCB; j++) {
114 res[i][j] = 0.0;
115 for (k=0; k<NCA; k++)
116 res[i][j] += a[i][k]*b[k][j];
117 }
118 }
119 stoptime = omp_get_wtime();
120 printf("Time for sequential matrix multiplication: %3.2f s\n", stoptime-starttime);
121
122 /* Check that the results are the same as in the parallel solution.
123 Actually, you should not compare floating point values for equality like this
124 but instead compute the difference between the two values and check that it
125 is smaller than a very small value epsilon. However, since all values in the
126 matrices here are integer values, this will work.
127 */
128 for (i=0; i<NRA; i++) {
129 for (j=0; j<NCB; j++) {
130 if (res[i][j] == c[i][j]) {
131 /* Everything is OK if they are equal */
132 }
133 else {
134 printf("Different result %5.1f != %5.1f in %d %d\n ", res[i][j], c[i][j], i, j);
135 }
136 }
137 }
138
139 /* If DEBUG is true, print the results. Usa smaller matrices for this */
140 if (DEBUG) {
141 printf("Result Matrix:\n");
142 for (i=0; i<NRA; i++) {
143 for (j=0; j<NCB; j++)
144 printf("%6.1f ", c[i][j]);
145 printf("\n");
146 }
147 }
148
149 printf ("Done.\n");
150 exit(0);
151}
Note: See TracBrowser for help on using the repository browser.