source: CIVL/examples/openacc/acc_c3/acc_c3.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 4.8 KB
RevLine 
[33534bb]1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * NVIDIA CORPORATION and its licensors retain all intellectual property
5 * and proprietary rights in and to this software, related documentation
6 * and any modifications thereto. Any use, reproduction, disclosure or
7 * distribution of this software and related documentation without an express
8 * license agreement from NVIDIA CORPORATION is strictly prohibited.
9 *
10 */
11
12
13/*
14 * Jacobi iteration example using OpenACC in C
15 * Build with
16 * pgcc -acc -Minfo=accel -fast c3.c
17 */
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <assert.h>
22#include <openacc.h>
23#include <math.h>
24
25#if defined(_WIN32) || defined(_WIN64)
26#include <sys/timeb.h>
27#define gettime(a) _ftime(a)
28#define usec(t1,t2) ((((t2).time-(t1).time)*1000+((t2).millitm-(t1).millitm))*100)
29typedef struct _timeb timestruct;
30#else
31#include <sys/time.h>
32#define gettime(a) gettimeofday(a,NULL)
33#define usec(t1,t2) (((t2).tv_sec-(t1).tv_sec)*1000000+((t2).tv_usec-(t1).tv_usec))
34typedef struct timeval timestruct;
35#endif
36
37void
38smooth( float*restrict a, float*restrict b, float w0, float w1, float w2, int n, int m, int niters )
39{
40 int i, j, iter;
41 float* tmp;
42 for( iter = 1; iter <= niters; ++iter ){
43 #pragma acc kernels loop copyin(b[0:n*m]) copy(a[0:n*m]) independent
44 for( i = 1; i < n-1; ++i )
45 for( j = 1; j < m-1; ++j )
46 a[i*m+j] = w0 * b[i*m+j] +
47 w1*(b[(i-1)*m+j] + b[(i+1)*m+j] + b[i*m+j-1] + b[i*m+j+1]) +
48 w2*(b[(i-1)*m+j-1] + b[(i-1)*m+j+1] + b[(i+1)*m+j-1] + b[(i+1)*m+j+1]);
49 tmp = a; a = b; b = tmp;
50 }
51}
52
53void
54smoothhost( float*restrict a, float*restrict b, float w0, float w1, float w2, int n, int m, int niters )
55{
56 int i, j, iter;
57 float* tmp;
58 for( iter = 1; iter <= niters; ++iter ){
59 for( i = 1; i < n-1; ++i ){
60 for( j = 1; j < m-1; ++j ){
61 a[i*m+j] = w0 * b[i*m+j] +
62 w1*(b[(i-1)*m+j] + b[(i+1)*m+j] + b[i*m+j-1] + b[i*m+j+1]) +
63 w2*(b[(i-1)*m+j-1] + b[(i-1)*m+j+1] + b[(i+1)*m+j-1] + b[(i+1)*m+j+1]);
64 }
65 }
66 tmp = a; a = b; b = tmp;
67 }
68}
69
70void
71doprt( char* s, float*restrict a, float*restrict ah, int i, int j, int n, int m )
72{
73 printf( "%s[%d][%d] = %g = %g\n", s, i, j, a[i*m+j], ah[i*m+j] );
74}
75
76int
77main( int argc, char* argv[] )
78{
79 float *aa, *bb, *aahost, *bbhost;
80 int i,j;
81 float w0, w1, w2;
82 int n, m, aerrs, berrs, iters;
83 float dif, rdif, tol;
84 timestruct t1, t2, t3;
85 long long cgpu, chost;
86
87 n = 0;
88 m = 0;
89 iters = 0;
90
91 if( argc > 1 ){
92 n = atoi( argv[1] );
93 if( argc > 2 ){
94 m = atoi( argv[2] );
95 if( argc > 3 ){
96 iters = atoi( argv[3] );
97 if( argc > 4 ){
98 if( !strcmp( argv[4], "host" ) ||
99 !strcmp( argv[4], "HOST" ) ){
100 acc_set_device( acc_device_host );
101 printf( "using host\n" );
102 }else
103 if( !strcmp( argv[4], "nvidia" ) ||
104 !strcmp( argv[4], "NVIDIA" ) ){
105 acc_set_device( acc_device_nvidia );
106 acc_init( acc_device_nvidia );
107 printf( "using nvidia\n" );
108 }else{
109 printf( "unknown device: %s\nUsing default\n", argv[4] );
110 }
111 }
112 }
113 }
114 }
115
116 if( n <= 0 ) n = 1000;
117 if( m <= 0 ) m = n;
118 if( iters <= 0 ) iters = 10;
119
120 aa = (float*) malloc( sizeof(float) * n * m );
121 aahost = (float*) malloc( sizeof(float) * n * m );
122 bb = (float*)malloc( sizeof(float) * n * m );
123 bbhost = (float*)malloc( sizeof(float) * n * m );
124 for( i = 0; i < n; ++i ){
125 for( j = 0; j < m; ++j ){
126 aa[i*m+j] = 0;
127 aahost[i*m+j] = 0;
128 bb[i*m+j] = i*1000 + j;
129 bbhost[i*m+j] = i*1000 + j;
130 }
131 }
132 w0 = 0.5;
133 w1 = 0.3;
134 w2 = 0.2;
135 gettime( &t1 );
136 smooth( aa, bb, w0, w1, w2, n, m, iters );
137 gettime( &t2 );
138 smoothhost( aahost, bbhost, w0, w1, w2, n, m, iters );
139 gettime( &t3 );
140
141 cgpu = usec(t1,t2);
142 chost = usec(t2,t3);
143
144 printf( "matrix %d x %d, %d iterations\n", n, m, iters );
145 printf( "%13ld microseconds optimized\n", cgpu );
146 printf( "%13ld microseconds on host\n", chost );
147
148 aerrs = berrs = 0;
149 tol = 0.000005;
150 for( i = 0; i < n; ++i ){
151 for( j = 0; j < m; ++j ){
152 rdif = dif = fabsf(aa[i*m+j] - aahost[i*m+j]);
153 if( aahost[i*m+j] ) rdif = fabsf(dif / aahost[i*m+j]);
154 if( rdif > tol ){
155 ++aerrs;
156 if( aerrs < 10 ){
157 printf( "aa[%d][%d] = %12.7e != %12.7e, dif=%12.7e\n", i, j, (double)aa[i*m+j], (double)aahost[i*m+j], (double)dif );
158 }
159 }
160 rdif = dif = fabsf(bb[i*m+j] - bbhost[i*m+j]);
161 if( bbhost[i*m+j] ) rdif = fabsf(dif / bbhost[i*m+j]);
162 if( rdif > tol ){
163 ++berrs;
164 if( berrs < 10 ){
165 printf( "bb[%d][%d] = %12.7e != %12.7e, dif=%12.7e\n", i, j, (double)bb[i*m+j], (double)bbhost[i*m+j], (double)dif );
166 }
167 }
168 }
169 }
170 if( aerrs == 0 && berrs == 0 ){
171 printf( "Test PASSED\n" );
172 return 0;
173 }else{
174 printf( "Test FAILED\n" );
175 printf( "%d ERRORS found\n", aerrs + berrs );
176 return 1;
177 }
178}
Note: See TracBrowser for help on using the repository browser.