source: CIVL/examples/openacc/acc_c3a/acc_c3a.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 4.8 KB
RevLine 
[33534bb]1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * NVIDIA CORPORATION and its licensors retain all intellectual property
5 * and proprietary rights in and to this software, related documentation
6 * and any modifications thereto. Any use, reproduction, disclosure or
7 * distribution of this software and related documentation without an express
8 * license agreement from NVIDIA CORPORATION is strictly prohibited.
9 *
10 */
11
12
13/*
14 * Jacobi iteration example using OpenACC in C
15 * Build with
16 * pgcc -acc -Minfo=accel -fast c3.c
17 */
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <assert.h>
22#include <openacc.h>
23#include <math.h>
24
25#if defined(_WIN32) || defined(_WIN64)
26#include <sys/timeb.h>
27#define gettime(a) _ftime(a)
28#define usec(t1,t2) ((((t2).time-(t1).time)*1000+((t2).millitm-(t1).millitm))*100)
29typedef struct _timeb timestruct;
30#else
31#include <sys/time.h>
32#define gettime(a) gettimeofday(a,NULL)
33#define usec(t1,t2) (((t2).tv_sec-(t1).tv_sec)*1000000+((t2).tv_usec-(t1).tv_usec))
34typedef struct timeval timestruct;
35#endif
36
37void
38smooth( float*restrict a, float*restrict b, float w0, float w1, float w2, int n, int m, int niters )
39{
40 int i, j, iter;
41 float* tmp;
42 for( iter = 1; iter <= niters; ++iter ){
43 #pragma acc kernels loop present(b[0:n*m],a[0:n*m]) independent
44 for( i = 1; i < n-1; ++i ){
45 for( j = 1; j < m-1; ++j ){
46 a[i*m+j] = w0 * b[i*m+j] +
47 w1*(b[(i-1)*m+j] + b[(i+1)*m+j] + b[i*m+j-1] + b[i*m+j+1]) +
48 w2*(b[(i-1)*m+j-1] + b[(i-1)*m+j+1] + b[(i+1)*m+j-1] + b[(i+1)*m+j+1]);
49 }
50 }
51 tmp = a; a = b; b = tmp;
52 }
53}
54
55void
56smoothhost( float*restrict a, float*restrict b, float w0, float w1, float w2, int n, int m, int niters )
57{
58 int i, j, iter;
59 float* tmp;
60 for( iter = 1; iter <= niters; ++iter ){
61 for( i = 1; i < n-1; ++i ){
62 for( j = 1; j < m-1; ++j ){
63 a[i*m+j] = w0 * b[i*m+j] +
64 w1*(b[(i-1)*m+j] + b[(i+1)*m+j] + b[i*m+j-1] + b[i*m+j+1]) +
65 w2*(b[(i-1)*m+j-1] + b[(i-1)*m+j+1] + b[(i+1)*m+j-1] + b[(i+1)*m+j+1]);
66 }
67 }
68 tmp = a; a = b; b = tmp;
69 }
70}
71
72void
73doprt( char* s, float*restrict a, float*restrict ah, int i, int j, int n, int m )
74{
75 printf( "%s[%d][%d] = %g = %g\n", s, i, j, a[i*m+j], ah[i*m+j] );
76}
77
78int
79main( int argc, char* argv[] )
80{
81 float *aa, *bb, *aahost, *bbhost;
82 int i,j;
83 float w0, w1, w2;
84 int n, m, aerrs, berrs, iters;
85 float dif, rdif, tol;
86 timestruct t1, t2, t3;
87 long long cgpu, chost;
88
89 n = 0;
90 m = 0;
91 iters = 0;
92
93 if( argc > 1 ){
94 n = atoi( argv[1] );
95 if( argc > 2 ){
96 m = atoi( argv[2] );
97 if( argc > 3 ){
98 iters = atoi( argv[3] );
99 if( argc > 4 ){
100 if( !strcmp( argv[4], "host" ) ||
101 !strcmp( argv[4], "HOST" ) ){
102 acc_set_device( acc_device_host );
103 printf( "using host\n" );
104 }else
105 if( !strcmp( argv[4], "nvidia" ) ||
106 !strcmp( argv[4], "NVIDIA" ) ){
107 acc_set_device( acc_device_nvidia );
108 acc_init( acc_device_nvidia );
109 printf( "using nvidia\n" );
110 }else{
111 printf( "unknown device: %s\nUsing default\n", argv[4] );
112 }
113 }
114 }
115 }
116 }
117
118 if( n <= 0 ) n = 100;
119 if( m <= 0 ) m = n;
120 if( iters <= 0 ) iters = 10;
121
122 aa = (float*) malloc( sizeof(float) * n * m );
123 aahost = (float*) malloc( sizeof(float) * n * m );
124 bb = (float*)malloc( sizeof(float) * n * m );
125 bbhost = (float*)malloc( sizeof(float) * n * m );
126 for( i = 0; i < n; ++i ){
127 for( j = 0; j < m; ++j ){
128 aa[i*m+j] = 0;
129 aahost[i*m+j] = 0;
130 bb[i*m+j] = i*1000 + j;
131 bbhost[i*m+j] = i*1000 + j;
132 }
133 }
134 w0 = 0.5;
135 w1 = 0.3;
136 w2 = 0.2;
137 gettime( &t1 );
138 #pragma acc data copy(bb[0:n*m],aa[0:n*m])
139 {
140 smooth( aa, bb, w0, w1, w2, n, m, iters );
141 }
142 gettime( &t2 );
143 smoothhost( aahost, bbhost, w0, w1, w2, n, m, iters );
144 gettime( &t3 );
145
146 cgpu = usec(t1,t2);
147 chost = usec(t2,t3);
148
149 printf( "matrix %d x %d, %d iterations\n", n, m, iters );
150 printf( "%13ld microseconds optimized\n", cgpu );
151 printf( "%13ld microseconds on host\n", chost );
152
153 aerrs = berrs = 0;
154 tol = 0.000005;
155 for( i = 0; i < n; ++i ){
156 for( j = 0; j < m; ++j ){
157 rdif = dif = fabsf(aa[i*m+j] - aahost[i*m+j]);
158 if( aahost[i*m+j] ) rdif = fabsf(dif / aahost[i*m+j]);
159 if( rdif > tol ){
160 ++aerrs;
161 if( aerrs < 10 ){
162 printf( "aa[%d][%d] = %12.7e != %12.7e, dif=%12.7e\n", i, j, (double)aa[i*m+j], (double)aahost[i*m+j], (double)dif );
163 }
164 }
165 rdif = dif = fabsf(bb[i*m+j] - bbhost[i*m+j]);
166 if( bbhost[i*m+j] ) rdif = fabsf(dif / bbhost[i*m+j]);
167 if( rdif > tol ){
168 ++berrs;
169 if( berrs < 10 ){
170 printf( "bb[%d][%d] = %12.7e != %12.7e, dif=%12.7e\n", i, j, (double)bb[i*m+j], (double)bbhost[i*m+j], (double)dif );
171 }
172 }
173 }
174 }
175 if( aerrs == 0 && berrs == 0 ){
176 printf( "Test PASSED\n" );
177 return 0;
178 }else{
179 printf( "Test FAILED\n" );
180 printf( "%d ERRORS found\n", aerrs + berrs );
181 return 1;
182 }
183}
Note: See TracBrowser for help on using the repository browser.