source: CIVL/examples/omp/fft_openmp.c@ 8190175

main test-branch
Last change on this file since 8190175 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 12.5 KB
RevLine 
[71264c4]1# include <stdlib.h>
2# include <stdio.h>
3# include <math.h>
4# include <time.h>
5# include <omp.h>
6
[20ac35f]7#ifdef _CIVL
8$input int NITS=4; // originally 10000
9$input int LN2MAX=1; //originally 25
10#else
11#define NITS 10000
12#define LN2MAX 25
13#endif
[9a94b18]14
[71264c4]15int main ( void );
16void ccopy ( int n, double x[], double y[] );
17void cfft2 ( int n, double x[], double y[], double w[], double sgn );
18void cffti ( int n, double w[] );
19double ggl ( double *ds );
20void step ( int n, int mj, double a[], double b[], double c[], double d[],
21 double w[], double sgn );
22void timestamp ( void );
23
24/******************************************************************************/
25
26int main ( void )
27
28/******************************************************************************/
29/*
30 Purpose:
31
32 MAIN is the main program for FFT_OPENMP.
33
34 Discussion:
35
36 The "complex" vector A is actually stored as a double vector B.
37
38 The "complex" vector entry A[I] is stored as:
39
40 B[I*2+0], the real part,
41 B[I*2+1], the imaginary part.
42
43 Modified:
44
45 20 March 2009
46
47 Author:
48
49 Original C version by Wesley Petersen.
50 This C version by John Burkardt.
51
52 Reference:
53
54 Wesley Petersen, Peter Arbenz,
55 Introduction to Parallel Computing - A practical guide with examples in C,
56 Oxford University Press,
57 ISBN: 0-19-851576-6,
58 LC: QA76.58.P47.
59*/
60{
61 double error;
62 int first;
63 double flops;
64 double fnm1;
65 int i;
66 int icase;
67 int it;
68 int ln2;
[9a94b18]69 int ln2_max = LN2MAX;
[71264c4]70 double mflops;
71 int n;
[9a94b18]72 int nits = NITS;
[71264c4]73 int proc_num;
74 static double seed;
75 double sgn;
76 int thread_num;
77 double *w;
78 double wtime;
79 double *x;
80 double *y;
81 double *z;
82 double z0;
83 double z1;
84
85 timestamp ( );
86 printf ( "\n" );
87 printf ( "FFT_OPENMP\n" );
88 printf ( " C/OpenMP version\n" );
89 printf ( "\n" );
90 printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" );
91 printf ( " of a complex data vector, using OpenMP for parallel execution.\n" );
92
93 printf ( "\n" );
94 printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
95 printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
96/*
97 Prepare for tests.
98*/
99 printf ( "\n" );
100 printf ( " Accuracy check:\n" );
101 printf ( "\n" );
102 printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
103 printf ( "\n" );
104 printf ( " N NITS Error Time Time/Call MFLOPS\n" );
105 printf ( "\n" );
106
107 seed = 331.0;
108 n = 1;
109/*
110 LN2 is the log base 2 of N. Each increase of LN2 doubles N.
111*/
112 for ( ln2 = 1; ln2 <= ln2_max; ln2++ )
113 {
114 n = 2 * n;
115/*
116 Allocate storage for the complex arrays W, X, Y, Z.
117
118 We handle the complex arithmetic,
119 and store a complex number as a pair of doubles, a complex vector as a doubly
120 dimensioned array whose second dimension is 2.
121*/
122 w = ( double * ) malloc ( n * sizeof ( double ) );
123 x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
124 y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
125 z = ( double * ) malloc ( 2 * n * sizeof ( double ) );
126
127 first = 1;
128
129 for ( icase = 0; icase < 2; icase++ )
130 {
131 if ( first )
132 {
133 for ( i = 0; i < 2 * n; i = i + 2 )
134 {
135 z0 = ggl ( &seed );
136 z1 = ggl ( &seed );
137 x[i] = z0;
138 z[i] = z0;
139 x[i+1] = z1;
140 z[i+1] = z1;
141 }
142 }
143 else
144 {
145# pragma omp parallel \
146 shared ( n, x, z ) \
147 private ( i, z0, z1 )
148
[20ac35f]149# pragma omp for
[71264c4]150
151 for ( i = 0; i < 2 * n; i = i + 2 )
152 {
153 z0 = 0.0; /* real part of array */
154 z1 = 0.0; /* imaginary part of array */
155 x[i] = z0;
156 z[i] = z0; /* copy of initial real data */
157 x[i+1] = z1;
158 z[i+1] = z1; /* copy of initial imag. data */
159 }
160 }
161/*
162 Initialize the sine and cosine tables.
163*/
164 cffti ( n, w );
165/*
166 Transform forward, back
167*/
168 if ( first )
169 {
170 sgn = + 1.0;
171 cfft2 ( n, x, y, w, sgn );
172 sgn = - 1.0;
173 cfft2 ( n, y, x, w, sgn );
174/*
175 Results should be same as the initial data multiplied by N.
176*/
177 fnm1 = 1.0 / ( double ) n;
178 error = 0.0;
179 for ( i = 0; i < 2 * n; i = i + 2 )
180 {
181 error = error
182 + pow ( z[i] - fnm1 * x[i], 2 )
183 + pow ( z[i+1] - fnm1 * x[i+1], 2 );
184 }
185 error = sqrt ( fnm1 * error );
186 printf ( " %12d %8d %12e", n, nits, error );
187 first = 0;
188 }
189 else
190 {
191 wtime = omp_get_wtime ( );
192 for ( it = 0; it < nits; it++ )
193 {
194 sgn = + 1.0;
195 cfft2 ( n, x, y, w, sgn );
196 sgn = - 1.0;
197 cfft2 ( n, y, x, w, sgn );
198 }
199 wtime = omp_get_wtime ( ) - wtime;
200
201 flops = 2.0 * ( double ) nits
202 * ( 5.0 * ( double ) n * ( double ) ln2 );
203
[9a94b18]204 mflops = flops / 1.0E+06; // / wtime;
[71264c4]205
206 printf ( " %12e %12e %12f\n", wtime, wtime / ( double ) ( 2 * nits ), mflops );
207 }
208 }
209 if ( ( ln2 % 4 ) == 0 )
210 {
211 nits = nits / 10;
212 }
213 if ( nits < 1 )
214 {
215 nits = 1;
216 }
217 free ( w );
218 free ( x );
219 free ( y );
220 free ( z );
221 }
222/*
223 Terminate.
224*/
225 printf ( "\n" );
226 printf ( "FFT_OPENMP:\n" );
227 printf ( " Normal end of execution.\n" );
228 printf ( "\n" );
229 timestamp ( );
230
231 return 0;
232}
233/******************************************************************************/
234
235void ccopy ( int n, double x[], double y[] )
236
237/******************************************************************************/
238/*
239 Purpose:
240
241 CCOPY copies a complex vector.
242
243 Discussion:
244
245 The "complex" vector A[N] is actually stored as a double vector B[2*N].
246
247 The "complex" vector entry A[I] is stored as:
248
249 B[I*2+0], the real part,
250 B[I*2+1], the imaginary part.
251
252 Modified:
253
254 20 March 2009
255
256 Author:
257
258 Original C version by Wesley Petersen.
259 This C version by John Burkardt.
260
261 Reference:
262
263 Wesley Petersen, Peter Arbenz,
264 Introduction to Parallel Computing - A practical guide with examples in C,
265 Oxford University Press,
266 ISBN: 0-19-851576-6,
267 LC: QA76.58.P47.
268
269 Parameters:
270
271 Input, int N, the length of the vector.
272
273 Input, double X[2*N], the vector to be copied.
274
275 Output, double Y[2*N], a copy of X.
276*/
277{
278 int i;
279
280 for ( i = 0; i < n; i++ )
281 {
282 y[i*2+0] = x[i*2+0];
283 y[i*2+1] = x[i*2+1];
284 }
285 return;
286}
287/******************************************************************************/
288
289void cfft2 ( int n, double x[], double y[], double w[], double sgn )
290
291/******************************************************************************/
292/*
293 Purpose:
294
295 CFFT2 performs a complex Fast Fourier Transform.
296
297 Modified:
298
299 20 March 2009
300
301 Author:
302
303 Original C version by Wesley Petersen.
304 This C version by John Burkardt.
305
306 Reference:
307
308 Wesley Petersen, Peter Arbenz,
309 Introduction to Parallel Computing - A practical guide with examples in C,
310 Oxford University Press,
311 ISBN: 0-19-851576-6,
312 LC: QA76.58.P47.
313
314 Parameters:
315
316 Input, int N, the size of the array to be transformed.
317
318 Input/output, double X[2*N], the data to be transformed.
319 On output, the contents of X have been overwritten by work information.
320
321 Output, double Y[2*N], the forward or backward FFT of X.
322
323 Input, double W[N], a table of sines and cosines.
324
325 Input, double SGN, is +1 for a "forward" FFT and -1 for a "backward" FFT.
326*/
327{
328 int j;
329 int m;
330 int mj;
331 int tgle;
332
333 m = ( int ) ( log ( ( double ) n ) / log ( 1.99 ) );
334 mj = 1;
335/*
336 Toggling switch for work array.
337*/
338 tgle = 1;
339 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
340
341 if ( n == 2 )
342 {
343 return;
344 }
345
346 for ( j = 0; j < m - 2; j++ )
347 {
348 mj = mj * 2;
349 if ( tgle )
350 {
351 step ( n, mj, &y[0*2+0], &y[(n/2)*2+0], &x[0*2+0], &x[mj*2+0], w, sgn );
352 tgle = 0;
353 }
354 else
355 {
356 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
357 tgle = 1;
358 }
359 }
360/*
361 Last pass through data: move Y to X if needed.
362*/
363 if ( tgle )
364 {
365 ccopy ( n, y, x );
366 }
367
368 mj = n / 2;
369 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
370
371 return;
372}
373/******************************************************************************/
374
375void cffti ( int n, double w[] )
376
377/******************************************************************************/
378/*
379 Purpose:
380
381 CFFTI sets up sine and cosine tables needed for the FFT calculation.
382
383 Modified:
384
385 20 March 2009
386
387 Author:
388
389 Original C version by Wesley Petersen.
390 This C version by John Burkardt.
391
392 Reference:
393
394 Wesley Petersen, Peter Arbenz,
395 Introduction to Parallel Computing - A practical guide with examples in C,
396 Oxford University Press,
397 ISBN: 0-19-851576-6,
398 LC: QA76.58.P47.
399
400 Parameters:
401
402 Input, int N, the size of the array to be transformed.
403
404 Output, double W[N], a table of sines and cosines.
405*/
406{
407 double arg;
408 double aw;
409 int i;
410 int n2;
411 const double pi = 3.141592653589793;
412
413 n2 = n / 2;
414 aw = 2.0 * pi / ( ( double ) n );
415
416# pragma omp parallel \
417 shared ( aw, n, w ) \
418 private ( arg, i )
419
420# pragma omp for nowait
421
422 for ( i = 0; i < n2; i++ )
423 {
424 arg = aw * ( ( double ) i );
425 w[i*2+0] = cos ( arg );
426 w[i*2+1] = sin ( arg );
427 }
428 return;
429}
430/******************************************************************************/
431
432double ggl ( double *seed )
433
434/******************************************************************************/
435/*
436 Purpose:
437
438 GGL generates uniformly distributed pseudorandom real numbers in [0,1].
439
440 Modified:
441
442 20 March 2009
443
444 Author:
445
446 Original C version by Wesley Petersen, M Troyer, I Vattulainen.
447 This C version by John Burkardt.
448
449 Reference:
450
451 Wesley Petersen, Peter Arbenz,
452 Introduction to Parallel Computing - A practical guide with examples in C,
453 Oxford University Press,
454 ISBN: 0-19-851576-6,
455 LC: QA76.58.P47.
456
457 Parameters:
458
459 Input/output, double *SEED, used as a seed for the sequence.
460
461 Output, double GGL, the next pseudorandom value.
462*/
463{
464 double d2 = 0.2147483647e10;
465 double t;
466 double value;
467
468 t = ( double ) *seed;
469 t = fmod ( 16807.0 * t, d2 );
470 *seed = ( double ) t;
471 value = ( double ) ( ( t - 1.0 ) / ( d2 - 1.0 ) );
472
473 return value;
474}
475/******************************************************************************/
476
477void step ( int n, int mj, double a[], double b[], double c[],
478 double d[], double w[], double sgn )
479
480/******************************************************************************/
481/*
482 Purpose:
483
484 STEP carries out one step of the workspace version of CFFT2.
485
486 Modified:
487
488 20 March 2009
489
490 Author:
491
492 Original C version by Wesley Petersen.
493 This C version by John Burkardt.
494
495 Reference:
496
497 Wesley Petersen, Peter Arbenz,
498 Introduction to Parallel Computing - A practical guide with examples in C,
499 Oxford University Press,
500 ISBN: 0-19-851576-6,
501 LC: QA76.58.P47.
502
503 Parameters:
504
505*/
506{
507 double ambr;
508 double ambu;
509 int j;
510 int ja;
511 int jb;
512 int jc;
513 int jd;
514 int jw;
515 int k;
516 int lj;
517 int mj2;
518 double wjw[2];
519
520 mj2 = 2 * mj;
521 lj = n / mj2;
522
523# pragma omp parallel \
524 shared ( a, b, c, d, lj, mj, mj2, sgn, w ) \
525 private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw )
526
527# pragma omp for nowait
528
529 for ( j = 0; j < lj; j++ )
530 {
531 jw = j * mj;
532 ja = jw;
533 jb = ja;
534 jc = j * mj2;
535 jd = jc;
536
537 wjw[0] = w[jw*2+0];
538 wjw[1] = w[jw*2+1];
539
540 if ( sgn < 0.0 )
541 {
542 wjw[1] = - wjw[1];
543 }
544
545 for ( k = 0; k < mj; k++ )
546 {
547 c[(jc+k)*2+0] = a[(ja+k)*2+0] + b[(jb+k)*2+0];
548 c[(jc+k)*2+1] = a[(ja+k)*2+1] + b[(jb+k)*2+1];
549
550 ambr = a[(ja+k)*2+0] - b[(jb+k)*2+0];
551 ambu = a[(ja+k)*2+1] - b[(jb+k)*2+1];
552
553 d[(jd+k)*2+0] = wjw[0] * ambr - wjw[1] * ambu;
554 d[(jd+k)*2+1] = wjw[1] * ambr + wjw[0] * ambu;
555 }
556 }
557 return;
558}
559/******************************************************************************/
560
561void timestamp ( void )
562
563/******************************************************************************/
564/*
565 Purpose:
566
567 TIMESTAMP prints the current YMDHMS date as a time stamp.
568
569 Example:
570
571 31 May 2001 09:45:54 AM
572
573 Licensing:
574
575 This code is distributed under the GNU LGPL license.
576
577 Modified:
578
579 24 September 2003
580
581 Author:
582
583 John Burkardt
584
585 Parameters:
586
587 None
588*/
589{
590# define TIME_SIZE 40
591
592 static char time_buffer[TIME_SIZE];
593 const struct tm *tm;
594 size_t len;
595 time_t now;
596
597 now = time ( NULL );
598 tm = localtime ( &now );
599
600 len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
601
602 printf ( "%s\n", time_buffer );
603
604 return;
605# undef TIME_SIZE
606}
Note: See TracBrowser for help on using the repository browser.