| [33534bb] | 1 | /*
|
|---|
| 2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
|---|
| 3 | *
|
|---|
| 4 | * NVIDIA CORPORATION and its licensors retain all intellectual property
|
|---|
| 5 | * and proprietary rights in and to this software, related documentation
|
|---|
| 6 | * and any modifications thereto. Any use, reproduction, disclosure or
|
|---|
| 7 | * distribution of this software and related documentation without an express
|
|---|
| 8 | * license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|---|
| 9 | *
|
|---|
| 10 | */
|
|---|
| 11 |
|
|---|
| 12 | #include <stdio.h>
|
|---|
| 13 | #include <stdlib.h>
|
|---|
| 14 | #include <assert.h>
|
|---|
| 15 | #include <math.h>
|
|---|
| 16 | #include <openacc.h>
|
|---|
| 17 | #include <accelmath.h>
|
|---|
| 18 |
|
|---|
| 19 | #if defined(_WIN32) || defined(_WIN64)
|
|---|
| 20 | #include <sys/timeb.h>
|
|---|
| 21 | #define gettime(a) _ftime(a)
|
|---|
| 22 | #define usec(t1,t2) ((((t2).time-(t1).time)*1000+((t2).millitm-(t1).millitm))*100)
|
|---|
| 23 | typedef struct _timeb timestruct;
|
|---|
| 24 | #else
|
|---|
| 25 | #include <sys/time.h>
|
|---|
| 26 | #define gettime(a) gettimeofday(a,NULL)
|
|---|
| 27 | #define usec(t1,t2) (((t2).tv_sec-(t1).tv_sec)*1000000+((t2).tv_usec-(t1).tv_usec))
|
|---|
| 28 | typedef struct timeval timestruct;
|
|---|
| 29 | #endif
|
|---|
| 30 |
|
|---|
| 31 | int main( int argc, char* argv[] )
|
|---|
| 32 | {
|
|---|
| 33 | int n; /* size of the vector */
|
|---|
| 34 | float *a; /* the vector */
|
|---|
| 35 | float *restrict r; /* the results */
|
|---|
| 36 | float *e; /* expected results */
|
|---|
| 37 | float s, c;
|
|---|
| 38 | timestruct t1, t2, t3;
|
|---|
| 39 | long long cgpu, chost;
|
|---|
| 40 | int i, j, nerrors;
|
|---|
| 41 | nerrors = 0;
|
|---|
| 42 | if( argc > 1 )
|
|---|
| 43 | n = atoi( argv[1] );
|
|---|
| 44 | else
|
|---|
| 45 | n = 1000000;
|
|---|
| 46 | if( n <= 0 ) n = 1000000;
|
|---|
| 47 |
|
|---|
| 48 | a = (float*)malloc(n*sizeof(float));
|
|---|
| 49 | r = (float*)malloc(n*sizeof(float));
|
|---|
| 50 | e = (float*)malloc(n*sizeof(float));
|
|---|
| 51 | for( i = 0; i < n; ++i ) a[i] = (float)(i+1) * 2.0f;
|
|---|
| 52 | /*acc_init( acc_device_nvidia );*/
|
|---|
| 53 |
|
|---|
| 54 | for(j=0;j<3;++j){
|
|---|
| 55 | gettime( &t1 );
|
|---|
| 56 | #pragma acc kernels loop
|
|---|
| 57 | for( i = 0; i < n; ++i ){
|
|---|
| 58 | s = sinf(a[i]);
|
|---|
| 59 | c = cosf(a[i]);
|
|---|
| 60 | r[i] = s*s + c*c;
|
|---|
| 61 | }
|
|---|
| 62 | gettime( &t2 );
|
|---|
| 63 | cgpu = usec(t1,t2);
|
|---|
| 64 | for( i = 0; i < n; ++i ){
|
|---|
| 65 | s = sinf(a[i]);
|
|---|
| 66 | c = cosf(a[i]);
|
|---|
| 67 | e[i] = s*s + c*c;
|
|---|
| 68 | }
|
|---|
| 69 | gettime( &t3 );
|
|---|
| 70 | chost = usec(t2,t3);
|
|---|
| 71 | /* check the results */
|
|---|
| 72 | for( i = 0; i < n; ++i ) {
|
|---|
| 73 | if ( fabsf(r[i] - e[i]) >= 0.000001f ) {
|
|---|
| 74 | nerrors++;
|
|---|
| 75 | }
|
|---|
| 76 | }
|
|---|
| 77 |
|
|---|
| 78 | printf( "%13d iterations completed\n", n );
|
|---|
| 79 | printf( "%13ld microseconds on GPU\n", cgpu );
|
|---|
| 80 | printf( "%13ld microseconds on host\n", chost );
|
|---|
| 81 | if ( nerrors != 0 ) {
|
|---|
| 82 | printf( "Test FAILED\n");
|
|---|
| 83 | } else {
|
|---|
| 84 | printf( "Test PASSED\n");
|
|---|
| 85 | }
|
|---|
| 86 | }
|
|---|
| 87 | return 0;
|
|---|
| 88 | }
|
|---|