source: CIVL/examples/omp/fft_openmp.c@ 83af34d

1.23 2.0 main test-branch
Last change on this file since 83af34d was 9a94b18, checked in by Matthew B. Dwyer <matthewbdwyer@…>, 11 years ago

Making examples parameterizable through #define

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1901 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 12.4 KB
Line 
1# include <stdlib.h>
2# include <stdio.h>
3# include <math.h>
4# include <time.h>
5# include <omp.h>
6
7#define NITS 4 // originally 10000
8#define LN2MAX 1 //originally 25
9
10int main ( void );
11void ccopy ( int n, double x[], double y[] );
12void cfft2 ( int n, double x[], double y[], double w[], double sgn );
13void cffti ( int n, double w[] );
14double ggl ( double *ds );
15void step ( int n, int mj, double a[], double b[], double c[], double d[],
16 double w[], double sgn );
17void timestamp ( void );
18
19/******************************************************************************/
20
21int main ( void )
22
23/******************************************************************************/
24/*
25 Purpose:
26
27 MAIN is the main program for FFT_OPENMP.
28
29 Discussion:
30
31 The "complex" vector A is actually stored as a double vector B.
32
33 The "complex" vector entry A[I] is stored as:
34
35 B[I*2+0], the real part,
36 B[I*2+1], the imaginary part.
37
38 Modified:
39
40 20 March 2009
41
42 Author:
43
44 Original C version by Wesley Petersen.
45 This C version by John Burkardt.
46
47 Reference:
48
49 Wesley Petersen, Peter Arbenz,
50 Introduction to Parallel Computing - A practical guide with examples in C,
51 Oxford University Press,
52 ISBN: 0-19-851576-6,
53 LC: QA76.58.P47.
54*/
55{
56 double error;
57 int first;
58 double flops;
59 double fnm1;
60 int i;
61 int icase;
62 int it;
63 int ln2;
64 int ln2_max = LN2MAX;
65 double mflops;
66 int n;
67 int nits = NITS;
68 int proc_num;
69 static double seed;
70 double sgn;
71 int thread_num;
72 double *w;
73 double wtime;
74 double *x;
75 double *y;
76 double *z;
77 double z0;
78 double z1;
79
80 timestamp ( );
81 printf ( "\n" );
82 printf ( "FFT_OPENMP\n" );
83 printf ( " C/OpenMP version\n" );
84 printf ( "\n" );
85 printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" );
86 printf ( " of a complex data vector, using OpenMP for parallel execution.\n" );
87
88 printf ( "\n" );
89 printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
90 printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
91/*
92 Prepare for tests.
93*/
94 printf ( "\n" );
95 printf ( " Accuracy check:\n" );
96 printf ( "\n" );
97 printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
98 printf ( "\n" );
99 printf ( " N NITS Error Time Time/Call MFLOPS\n" );
100 printf ( "\n" );
101
102 seed = 331.0;
103 n = 1;
104/*
105 LN2 is the log base 2 of N. Each increase of LN2 doubles N.
106*/
107 for ( ln2 = 1; ln2 <= ln2_max; ln2++ )
108 {
109 n = 2 * n;
110/*
111 Allocate storage for the complex arrays W, X, Y, Z.
112
113 We handle the complex arithmetic,
114 and store a complex number as a pair of doubles, a complex vector as a doubly
115 dimensioned array whose second dimension is 2.
116*/
117 w = ( double * ) malloc ( n * sizeof ( double ) );
118 x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
119 y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
120 z = ( double * ) malloc ( 2 * n * sizeof ( double ) );
121
122 first = 1;
123
124 for ( icase = 0; icase < 2; icase++ )
125 {
126 if ( first )
127 {
128 for ( i = 0; i < 2 * n; i = i + 2 )
129 {
130 z0 = ggl ( &seed );
131 z1 = ggl ( &seed );
132 x[i] = z0;
133 z[i] = z0;
134 x[i+1] = z1;
135 z[i+1] = z1;
136 }
137 }
138 else
139 {
140# pragma omp parallel \
141 shared ( n, x, z ) \
142 private ( i, z0, z1 )
143
144# pragma omp for nowait
145
146 for ( i = 0; i < 2 * n; i = i + 2 )
147 {
148 z0 = 0.0; /* real part of array */
149 z1 = 0.0; /* imaginary part of array */
150 x[i] = z0;
151 z[i] = z0; /* copy of initial real data */
152 x[i+1] = z1;
153 z[i+1] = z1; /* copy of initial imag. data */
154 }
155 }
156/*
157 Initialize the sine and cosine tables.
158*/
159 cffti ( n, w );
160/*
161 Transform forward, back
162*/
163 if ( first )
164 {
165 sgn = + 1.0;
166 cfft2 ( n, x, y, w, sgn );
167 sgn = - 1.0;
168 cfft2 ( n, y, x, w, sgn );
169/*
170 Results should be same as the initial data multiplied by N.
171*/
172 fnm1 = 1.0 / ( double ) n;
173 error = 0.0;
174 for ( i = 0; i < 2 * n; i = i + 2 )
175 {
176 error = error
177 + pow ( z[i] - fnm1 * x[i], 2 )
178 + pow ( z[i+1] - fnm1 * x[i+1], 2 );
179 }
180 error = sqrt ( fnm1 * error );
181 printf ( " %12d %8d %12e", n, nits, error );
182 first = 0;
183 }
184 else
185 {
186 wtime = omp_get_wtime ( );
187 for ( it = 0; it < nits; it++ )
188 {
189 sgn = + 1.0;
190 cfft2 ( n, x, y, w, sgn );
191 sgn = - 1.0;
192 cfft2 ( n, y, x, w, sgn );
193 }
194 wtime = omp_get_wtime ( ) - wtime;
195
196 flops = 2.0 * ( double ) nits
197 * ( 5.0 * ( double ) n * ( double ) ln2 );
198
199 mflops = flops / 1.0E+06; // / wtime;
200
201 printf ( " %12e %12e %12f\n", wtime, wtime / ( double ) ( 2 * nits ), mflops );
202 }
203 }
204 if ( ( ln2 % 4 ) == 0 )
205 {
206 nits = nits / 10;
207 }
208 if ( nits < 1 )
209 {
210 nits = 1;
211 }
212 free ( w );
213 free ( x );
214 free ( y );
215 free ( z );
216 }
217/*
218 Terminate.
219*/
220 printf ( "\n" );
221 printf ( "FFT_OPENMP:\n" );
222 printf ( " Normal end of execution.\n" );
223 printf ( "\n" );
224 timestamp ( );
225
226 return 0;
227}
228/******************************************************************************/
229
230void ccopy ( int n, double x[], double y[] )
231
232/******************************************************************************/
233/*
234 Purpose:
235
236 CCOPY copies a complex vector.
237
238 Discussion:
239
240 The "complex" vector A[N] is actually stored as a double vector B[2*N].
241
242 The "complex" vector entry A[I] is stored as:
243
244 B[I*2+0], the real part,
245 B[I*2+1], the imaginary part.
246
247 Modified:
248
249 20 March 2009
250
251 Author:
252
253 Original C version by Wesley Petersen.
254 This C version by John Burkardt.
255
256 Reference:
257
258 Wesley Petersen, Peter Arbenz,
259 Introduction to Parallel Computing - A practical guide with examples in C,
260 Oxford University Press,
261 ISBN: 0-19-851576-6,
262 LC: QA76.58.P47.
263
264 Parameters:
265
266 Input, int N, the length of the vector.
267
268 Input, double X[2*N], the vector to be copied.
269
270 Output, double Y[2*N], a copy of X.
271*/
272{
273 int i;
274
275 for ( i = 0; i < n; i++ )
276 {
277 y[i*2+0] = x[i*2+0];
278 y[i*2+1] = x[i*2+1];
279 }
280 return;
281}
282/******************************************************************************/
283
284void cfft2 ( int n, double x[], double y[], double w[], double sgn )
285
286/******************************************************************************/
287/*
288 Purpose:
289
290 CFFT2 performs a complex Fast Fourier Transform.
291
292 Modified:
293
294 20 March 2009
295
296 Author:
297
298 Original C version by Wesley Petersen.
299 This C version by John Burkardt.
300
301 Reference:
302
303 Wesley Petersen, Peter Arbenz,
304 Introduction to Parallel Computing - A practical guide with examples in C,
305 Oxford University Press,
306 ISBN: 0-19-851576-6,
307 LC: QA76.58.P47.
308
309 Parameters:
310
311 Input, int N, the size of the array to be transformed.
312
313 Input/output, double X[2*N], the data to be transformed.
314 On output, the contents of X have been overwritten by work information.
315
316 Output, double Y[2*N], the forward or backward FFT of X.
317
318 Input, double W[N], a table of sines and cosines.
319
320 Input, double SGN, is +1 for a "forward" FFT and -1 for a "backward" FFT.
321*/
322{
323 int j;
324 int m;
325 int mj;
326 int tgle;
327
328 m = ( int ) ( log ( ( double ) n ) / log ( 1.99 ) );
329 mj = 1;
330/*
331 Toggling switch for work array.
332*/
333 tgle = 1;
334 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
335
336 if ( n == 2 )
337 {
338 return;
339 }
340
341 for ( j = 0; j < m - 2; j++ )
342 {
343 mj = mj * 2;
344 if ( tgle )
345 {
346 step ( n, mj, &y[0*2+0], &y[(n/2)*2+0], &x[0*2+0], &x[mj*2+0], w, sgn );
347 tgle = 0;
348 }
349 else
350 {
351 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
352 tgle = 1;
353 }
354 }
355/*
356 Last pass through data: move Y to X if needed.
357*/
358 if ( tgle )
359 {
360 ccopy ( n, y, x );
361 }
362
363 mj = n / 2;
364 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
365
366 return;
367}
368/******************************************************************************/
369
370void cffti ( int n, double w[] )
371
372/******************************************************************************/
373/*
374 Purpose:
375
376 CFFTI sets up sine and cosine tables needed for the FFT calculation.
377
378 Modified:
379
380 20 March 2009
381
382 Author:
383
384 Original C version by Wesley Petersen.
385 This C version by John Burkardt.
386
387 Reference:
388
389 Wesley Petersen, Peter Arbenz,
390 Introduction to Parallel Computing - A practical guide with examples in C,
391 Oxford University Press,
392 ISBN: 0-19-851576-6,
393 LC: QA76.58.P47.
394
395 Parameters:
396
397 Input, int N, the size of the array to be transformed.
398
399 Output, double W[N], a table of sines and cosines.
400*/
401{
402 double arg;
403 double aw;
404 int i;
405 int n2;
406 const double pi = 3.141592653589793;
407
408 n2 = n / 2;
409 aw = 2.0 * pi / ( ( double ) n );
410
411# pragma omp parallel \
412 shared ( aw, n, w ) \
413 private ( arg, i )
414
415# pragma omp for nowait
416
417 for ( i = 0; i < n2; i++ )
418 {
419 arg = aw * ( ( double ) i );
420 w[i*2+0] = cos ( arg );
421 w[i*2+1] = sin ( arg );
422 }
423 return;
424}
425/******************************************************************************/
426
427double ggl ( double *seed )
428
429/******************************************************************************/
430/*
431 Purpose:
432
433 GGL generates uniformly distributed pseudorandom real numbers in [0,1].
434
435 Modified:
436
437 20 March 2009
438
439 Author:
440
441 Original C version by Wesley Petersen, M Troyer, I Vattulainen.
442 This C version by John Burkardt.
443
444 Reference:
445
446 Wesley Petersen, Peter Arbenz,
447 Introduction to Parallel Computing - A practical guide with examples in C,
448 Oxford University Press,
449 ISBN: 0-19-851576-6,
450 LC: QA76.58.P47.
451
452 Parameters:
453
454 Input/output, double *SEED, used as a seed for the sequence.
455
456 Output, double GGL, the next pseudorandom value.
457*/
458{
459 double d2 = 0.2147483647e10;
460 double t;
461 double value;
462
463 t = ( double ) *seed;
464 t = fmod ( 16807.0 * t, d2 );
465 *seed = ( double ) t;
466 value = ( double ) ( ( t - 1.0 ) / ( d2 - 1.0 ) );
467
468 return value;
469}
470/******************************************************************************/
471
472void step ( int n, int mj, double a[], double b[], double c[],
473 double d[], double w[], double sgn )
474
475/******************************************************************************/
476/*
477 Purpose:
478
479 STEP carries out one step of the workspace version of CFFT2.
480
481 Modified:
482
483 20 March 2009
484
485 Author:
486
487 Original C version by Wesley Petersen.
488 This C version by John Burkardt.
489
490 Reference:
491
492 Wesley Petersen, Peter Arbenz,
493 Introduction to Parallel Computing - A practical guide with examples in C,
494 Oxford University Press,
495 ISBN: 0-19-851576-6,
496 LC: QA76.58.P47.
497
498 Parameters:
499
500*/
501{
502 double ambr;
503 double ambu;
504 int j;
505 int ja;
506 int jb;
507 int jc;
508 int jd;
509 int jw;
510 int k;
511 int lj;
512 int mj2;
513 double wjw[2];
514
515 mj2 = 2 * mj;
516 lj = n / mj2;
517
518# pragma omp parallel \
519 shared ( a, b, c, d, lj, mj, mj2, sgn, w ) \
520 private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw )
521
522# pragma omp for nowait
523
524 for ( j = 0; j < lj; j++ )
525 {
526 jw = j * mj;
527 ja = jw;
528 jb = ja;
529 jc = j * mj2;
530 jd = jc;
531
532 wjw[0] = w[jw*2+0];
533 wjw[1] = w[jw*2+1];
534
535 if ( sgn < 0.0 )
536 {
537 wjw[1] = - wjw[1];
538 }
539
540 for ( k = 0; k < mj; k++ )
541 {
542 c[(jc+k)*2+0] = a[(ja+k)*2+0] + b[(jb+k)*2+0];
543 c[(jc+k)*2+1] = a[(ja+k)*2+1] + b[(jb+k)*2+1];
544
545 ambr = a[(ja+k)*2+0] - b[(jb+k)*2+0];
546 ambu = a[(ja+k)*2+1] - b[(jb+k)*2+1];
547
548 d[(jd+k)*2+0] = wjw[0] * ambr - wjw[1] * ambu;
549 d[(jd+k)*2+1] = wjw[1] * ambr + wjw[0] * ambu;
550 }
551 }
552 return;
553}
554/******************************************************************************/
555
556void timestamp ( void )
557
558/******************************************************************************/
559/*
560 Purpose:
561
562 TIMESTAMP prints the current YMDHMS date as a time stamp.
563
564 Example:
565
566 31 May 2001 09:45:54 AM
567
568 Licensing:
569
570 This code is distributed under the GNU LGPL license.
571
572 Modified:
573
574 24 September 2003
575
576 Author:
577
578 John Burkardt
579
580 Parameters:
581
582 None
583*/
584{
585# define TIME_SIZE 40
586
587 static char time_buffer[TIME_SIZE];
588 const struct tm *tm;
589 size_t len;
590 time_t now;
591
592 now = time ( NULL );
593 tm = localtime ( &now );
594
595 len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
596
597 printf ( "%s\n", time_buffer );
598
599 return;
600# undef TIME_SIZE
601}
Note: See TracBrowser for help on using the repository browser.