source: CIVL/examples/omp/simple/fft_openmp.c.s

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 12.3 KB
Line 
1# include <stdlib.h>
2# include <stdio.h>
3# include <math.h>
4# include <time.h>
5# include <omp.h>
6
7int main ( void );
8void ccopy ( int n, double x[], double y[] );
9void cfft2 ( int n, double x[], double y[], double w[], double sgn );
10void cffti ( int n, double w[] );
11double ggl ( double *ds );
12void step ( int n, int mj, double a[], double b[], double c[], double d[],
13 double w[], double sgn );
14void timestamp ( void );
15
16/******************************************************************************/
17
18int main ( void )
19
20/******************************************************************************/
21/*
22 Purpose:
23
24 MAIN is the main program for FFT_OPENMP.
25
26 Discussion:
27
28 The "complex" vector A is actually stored as a double vector B.
29
30 The "complex" vector entry A[I] is stored as:
31
32 B[I*2+0], the real part,
33 B[I*2+1], the imaginary part.
34
35 Modified:
36
37 20 March 2009
38
39 Author:
40
41 Original C version by Wesley Petersen.
42 This C version by John Burkardt.
43
44 Reference:
45
46 Wesley Petersen, Peter Arbenz,
47 Introduction to Parallel Computing - A practical guide with examples in C,
48 Oxford University Press,
49 ISBN: 0-19-851576-6,
50 LC: QA76.58.P47.
51*/
52{
53 double error;
54 int first;
55 double flops;
56 double fnm1;
57 int i;
58 int icase;
59 int it;
60 int ln2;
61 int ln2_max = 25;
62 double mflops;
63 int n;
64 int nits = 10000;
65 int proc_num;
66 static double seed;
67 double sgn;
68 int thread_num;
69 double *w;
70 double wtime;
71 double *x;
72 double *y;
73 double *z;
74 double z0;
75 double z1;
76
77 timestamp ( );
78 printf ( "\n" );
79 printf ( "FFT_OPENMP\n" );
80 printf ( " C/OpenMP version\n" );
81 printf ( "\n" );
82 printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" );
83 printf ( " of a complex data vector, using OpenMP for parallel execution.\n" );
84
85 printf ( "\n" );
86 printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
87 printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
88/*
89 Prepare for tests.
90*/
91 printf ( "\n" );
92 printf ( " Accuracy check:\n" );
93 printf ( "\n" );
94 printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
95 printf ( "\n" );
96 printf ( " N NITS Error Time Time/Call MFLOPS\n" );
97 printf ( "\n" );
98
99 seed = 331.0;
100 n = 1;
101/*
102 LN2 is the log base 2 of N. Each increase of LN2 doubles N.
103*/
104 for ( ln2 = 1; ln2 <= ln2_max; ln2++ )
105 {
106 n = 2 * n;
107/*
108 Allocate storage for the complex arrays W, X, Y, Z.
109
110 We handle the complex arithmetic,
111 and store a complex number as a pair of doubles, a complex vector as a doubly
112 dimensioned array whose second dimension is 2.
113*/
114 w = ( double * ) malloc ( n * sizeof ( double ) );
115 x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
116 y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
117 z = ( double * ) malloc ( 2 * n * sizeof ( double ) );
118
119 first = 1;
120
121 for ( icase = 0; icase < 2; icase++ )
122 {
123 if ( first )
124 {
125 for ( i = 0; i < 2 * n; i = i + 2 )
126 {
127 z0 = ggl ( &seed );
128 z1 = ggl ( &seed );
129 x[i] = z0;
130 z[i] = z0;
131 x[i+1] = z1;
132 z[i+1] = z1;
133 }
134 }
135 else
136 {
137# pragma omp parallel \
138 shared ( n, x, z ) \
139 private ( i, z0, z1 )
140
141# pragma omp for nowait
142
143 for ( i = 0; i < 2 * n; i = i + 2 )
144 {
145 z0 = 0.0; /* real part of array */
146 z1 = 0.0; /* imaginary part of array */
147 x[i] = z0;
148 z[i] = z0; /* copy of initial real data */
149 x[i+1] = z1;
150 z[i+1] = z1; /* copy of initial imag. data */
151 }
152 }
153/*
154 Initialize the sine and cosine tables.
155*/
156 cffti ( n, w );
157/*
158 Transform forward, back
159*/
160 if ( first )
161 {
162 sgn = + 1.0;
163 cfft2 ( n, x, y, w, sgn );
164 sgn = - 1.0;
165 cfft2 ( n, y, x, w, sgn );
166/*
167 Results should be same as the initial data multiplied by N.
168*/
169 fnm1 = 1.0 / ( double ) n;
170 error = 0.0;
171 for ( i = 0; i < 2 * n; i = i + 2 )
172 {
173 error = error
174 + pow ( z[i] - fnm1 * x[i], 2 )
175 + pow ( z[i+1] - fnm1 * x[i+1], 2 );
176 }
177 error = sqrt ( fnm1 * error );
178 printf ( " %12d %8d %12e", n, nits, error );
179 first = 0;
180 }
181 else
182 {
183 wtime = omp_get_wtime ( );
184 for ( it = 0; it < nits; it++ )
185 {
186 sgn = + 1.0;
187 cfft2 ( n, x, y, w, sgn );
188 sgn = - 1.0;
189 cfft2 ( n, y, x, w, sgn );
190 }
191 wtime = omp_get_wtime ( ) - wtime;
192
193 flops = 2.0 * ( double ) nits
194 * ( 5.0 * ( double ) n * ( double ) ln2 );
195
196 mflops = flops / 1.0E+06 / wtime;
197
198 printf ( " %12e %12e %12f\n", wtime, wtime / ( double ) ( 2 * nits ), mflops );
199 }
200 }
201 if ( ( ln2 % 4 ) == 0 )
202 {
203 nits = nits / 10;
204 }
205 if ( nits < 1 )
206 {
207 nits = 1;
208 }
209 free ( w );
210 free ( x );
211 free ( y );
212 free ( z );
213 }
214/*
215 Terminate.
216*/
217 printf ( "\n" );
218 printf ( "FFT_OPENMP:\n" );
219 printf ( " Normal end of execution.\n" );
220 printf ( "\n" );
221 timestamp ( );
222
223 return 0;
224}
225/******************************************************************************/
226
227void ccopy ( int n, double x[], double y[] )
228
229/******************************************************************************/
230/*
231 Purpose:
232
233 CCOPY copies a complex vector.
234
235 Discussion:
236
237 The "complex" vector A[N] is actually stored as a double vector B[2*N].
238
239 The "complex" vector entry A[I] is stored as:
240
241 B[I*2+0], the real part,
242 B[I*2+1], the imaginary part.
243
244 Modified:
245
246 20 March 2009
247
248 Author:
249
250 Original C version by Wesley Petersen.
251 This C version by John Burkardt.
252
253 Reference:
254
255 Wesley Petersen, Peter Arbenz,
256 Introduction to Parallel Computing - A practical guide with examples in C,
257 Oxford University Press,
258 ISBN: 0-19-851576-6,
259 LC: QA76.58.P47.
260
261 Parameters:
262
263 Input, int N, the length of the vector.
264
265 Input, double X[2*N], the vector to be copied.
266
267 Output, double Y[2*N], a copy of X.
268*/
269{
270 int i;
271
272 for ( i = 0; i < n; i++ )
273 {
274 y[i*2+0] = x[i*2+0];
275 y[i*2+1] = x[i*2+1];
276 }
277 return;
278}
279/******************************************************************************/
280
281void cfft2 ( int n, double x[], double y[], double w[], double sgn )
282
283/******************************************************************************/
284/*
285 Purpose:
286
287 CFFT2 performs a complex Fast Fourier Transform.
288
289 Modified:
290
291 20 March 2009
292
293 Author:
294
295 Original C version by Wesley Petersen.
296 This C version by John Burkardt.
297
298 Reference:
299
300 Wesley Petersen, Peter Arbenz,
301 Introduction to Parallel Computing - A practical guide with examples in C,
302 Oxford University Press,
303 ISBN: 0-19-851576-6,
304 LC: QA76.58.P47.
305
306 Parameters:
307
308 Input, int N, the size of the array to be transformed.
309
310 Input/output, double X[2*N], the data to be transformed.
311 On output, the contents of X have been overwritten by work information.
312
313 Output, double Y[2*N], the forward or backward FFT of X.
314
315 Input, double W[N], a table of sines and cosines.
316
317 Input, double SGN, is +1 for a "forward" FFT and -1 for a "backward" FFT.
318*/
319{
320 int j;
321 int m;
322 int mj;
323 int tgle;
324
325 m = ( int ) ( log ( ( double ) n ) / log ( 1.99 ) );
326 mj = 1;
327/*
328 Toggling switch for work array.
329*/
330 tgle = 1;
331 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
332
333 if ( n == 2 )
334 {
335 return;
336 }
337
338 for ( j = 0; j < m - 2; j++ )
339 {
340 mj = mj * 2;
341 if ( tgle )
342 {
343 step ( n, mj, &y[0*2+0], &y[(n/2)*2+0], &x[0*2+0], &x[mj*2+0], w, sgn );
344 tgle = 0;
345 }
346 else
347 {
348 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
349 tgle = 1;
350 }
351 }
352/*
353 Last pass through data: move Y to X if needed.
354*/
355 if ( tgle )
356 {
357 ccopy ( n, y, x );
358 }
359
360 mj = n / 2;
361 step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn );
362
363 return;
364}
365/******************************************************************************/
366
367void cffti ( int n, double w[] )
368
369/******************************************************************************/
370/*
371 Purpose:
372
373 CFFTI sets up sine and cosine tables needed for the FFT calculation.
374
375 Modified:
376
377 20 March 2009
378
379 Author:
380
381 Original C version by Wesley Petersen.
382 This C version by John Burkardt.
383
384 Reference:
385
386 Wesley Petersen, Peter Arbenz,
387 Introduction to Parallel Computing - A practical guide with examples in C,
388 Oxford University Press,
389 ISBN: 0-19-851576-6,
390 LC: QA76.58.P47.
391
392 Parameters:
393
394 Input, int N, the size of the array to be transformed.
395
396 Output, double W[N], a table of sines and cosines.
397*/
398{
399 double arg;
400 double aw;
401 int i;
402 int n2;
403 const double pi = 3.141592653589793;
404
405 n2 = n / 2;
406 aw = 2.0 * pi / ( ( double ) n );
407
408# pragma omp parallel \
409 shared ( aw, n, w ) \
410 private ( arg, i )
411
412# pragma omp for nowait
413
414 for ( i = 0; i < n2; i++ )
415 {
416 arg = aw * ( ( double ) i );
417 w[i*2+0] = cos ( arg );
418 w[i*2+1] = sin ( arg );
419 }
420 return;
421}
422/******************************************************************************/
423
424double ggl ( double *seed )
425
426/******************************************************************************/
427/*
428 Purpose:
429
430 GGL generates uniformly distributed pseudorandom real numbers in [0,1].
431
432 Modified:
433
434 20 March 2009
435
436 Author:
437
438 Original C version by Wesley Petersen, M Troyer, I Vattulainen.
439 This C version by John Burkardt.
440
441 Reference:
442
443 Wesley Petersen, Peter Arbenz,
444 Introduction to Parallel Computing - A practical guide with examples in C,
445 Oxford University Press,
446 ISBN: 0-19-851576-6,
447 LC: QA76.58.P47.
448
449 Parameters:
450
451 Input/output, double *SEED, used as a seed for the sequence.
452
453 Output, double GGL, the next pseudorandom value.
454*/
455{
456 double d2 = 0.2147483647e10;
457 double t;
458 double value;
459
460 t = ( double ) *seed;
461 t = fmod ( 16807.0 * t, d2 );
462 *seed = ( double ) t;
463 value = ( double ) ( ( t - 1.0 ) / ( d2 - 1.0 ) );
464
465 return value;
466}
467/******************************************************************************/
468
469void step ( int n, int mj, double a[], double b[], double c[],
470 double d[], double w[], double sgn )
471
472/******************************************************************************/
473/*
474 Purpose:
475
476 STEP carries out one step of the workspace version of CFFT2.
477
478 Modified:
479
480 20 March 2009
481
482 Author:
483
484 Original C version by Wesley Petersen.
485 This C version by John Burkardt.
486
487 Reference:
488
489 Wesley Petersen, Peter Arbenz,
490 Introduction to Parallel Computing - A practical guide with examples in C,
491 Oxford University Press,
492 ISBN: 0-19-851576-6,
493 LC: QA76.58.P47.
494
495 Parameters:
496
497*/
498{
499 double ambr;
500 double ambu;
501 int j;
502 int ja;
503 int jb;
504 int jc;
505 int jd;
506 int jw;
507 int k;
508 int lj;
509 int mj2;
510 double wjw[2];
511
512 mj2 = 2 * mj;
513 lj = n / mj2;
514
515# pragma omp parallel \
516 shared ( a, b, c, d, lj, mj, mj2, sgn, w ) \
517 private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw )
518
519# pragma omp for nowait
520
521 for ( j = 0; j < lj; j++ )
522 {
523 jw = j * mj;
524 ja = jw;
525 jb = ja;
526 jc = j * mj2;
527 jd = jc;
528
529 wjw[0] = w[jw*2+0];
530 wjw[1] = w[jw*2+1];
531
532 if ( sgn < 0.0 )
533 {
534 wjw[1] = - wjw[1];
535 }
536
537 for ( k = 0; k < mj; k++ )
538 {
539 c[(jc+k)*2+0] = a[(ja+k)*2+0] + b[(jb+k)*2+0];
540 c[(jc+k)*2+1] = a[(ja+k)*2+1] + b[(jb+k)*2+1];
541
542 ambr = a[(ja+k)*2+0] - b[(jb+k)*2+0];
543 ambu = a[(ja+k)*2+1] - b[(jb+k)*2+1];
544
545 d[(jd+k)*2+0] = wjw[0] * ambr - wjw[1] * ambu;
546 d[(jd+k)*2+1] = wjw[1] * ambr + wjw[0] * ambu;
547 }
548 }
549 return;
550}
551/******************************************************************************/
552
553void timestamp ( void )
554
555/******************************************************************************/
556/*
557 Purpose:
558
559 TIMESTAMP prints the current YMDHMS date as a time stamp.
560
561 Example:
562
563 31 May 2001 09:45:54 AM
564
565 Licensing:
566
567 This code is distributed under the GNU LGPL license.
568
569 Modified:
570
571 24 September 2003
572
573 Author:
574
575 John Burkardt
576
577 Parameters:
578
579 None
580*/
581{
582# define TIME_SIZE 40
583
584 static char time_buffer[TIME_SIZE];
585 const struct tm *tm;
586 size_t len;
587 time_t now;
588
589 now = time ( NULL );
590 tm = localtime ( &now );
591
592 len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
593
594 printf ( "%s\n", time_buffer );
595
596 return;
597# undef TIME_SIZE
598}
Note: See TracBrowser for help on using the repository browser.