Context Navigation

fft_openmp.c@ ea8d8ce

1.23 2.0 main test-branch

Last change on this file since ea8d8ce was 9a94b18, checked in by Matthew B. Dwyer <matthewbdwyer@…>, 11 years ago

Making examples parameterizable through #define

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1901 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 12.4 KB

Line
1	# include <stdlib.h>
2	# include <stdio.h>
3	# include <math.h>
4	# include <time.h>
5	# include <omp.h>
6
7	#define NITS 4 // originally 10000
8	#define LN2MAX 1 //originally 25
9
10	int main ( void );
11	void ccopy ( int n, double x[], double y[] );
12	void cfft2 ( int n, double x[], double y[], double w[], double sgn );
13	void cffti ( int n, double w[] );
14	double ggl ( double *ds );
15	void step ( int n, int mj, double a[], double b[], double c[], double d[],
16	double w[], double sgn );
17	void timestamp ( void );
18
19	/******************************************************************************/
20
21	int main ( void )
22
23	/******************************************************************************/
24	/*
25	Purpose:
26
27	MAIN is the main program for FFT_OPENMP.
28
29	Discussion:
30
31	The "complex" vector A is actually stored as a double vector B.
32
33	The "complex" vector entry A[I] is stored as:
34
35	B[I*2+0], the real part,
36	B[I*2+1], the imaginary part.
37
38	Modified:
39
40	20 March 2009
41
42	Author:
43
44	Original C version by Wesley Petersen.
45	This C version by John Burkardt.
46
47	Reference:
48
49	Wesley Petersen, Peter Arbenz,
50	Introduction to Parallel Computing - A practical guide with examples in C,
51	Oxford University Press,
52	ISBN: 0-19-851576-6,
53	LC: QA76.58.P47.
54	*/
55	{
56	double error;
57	int first;
58	double flops;
59	double fnm1;
60	int i;
61	int icase;
62	int it;
63	int ln2;
64	int ln2_max = LN2MAX;
65	double mflops;
66	int n;
67	int nits = NITS;
68	int proc_num;
69	static double seed;
70	double sgn;
71	int thread_num;
72	double *w;
73	double wtime;
74	double *x;
75	double *y;
76	double *z;
77	double z0;
78	double z1;
79
80	timestamp ( );
81	printf ( "\n" );
82	printf ( "FFT_OPENMP\n" );
83	printf ( " C/OpenMP version\n" );
84	printf ( "\n" );
85	printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" );
86	printf ( " of a complex data vector, using OpenMP for parallel execution.\n" );
87
88	printf ( "\n" );
89	printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
90	printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
91	/*
92	Prepare for tests.
93	*/
94	printf ( "\n" );
95	printf ( " Accuracy check:\n" );
96	printf ( "\n" );
97	printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
98	printf ( "\n" );
99	printf ( " N NITS Error Time Time/Call MFLOPS\n" );
100	printf ( "\n" );
101
102	seed = 331.0;
103	n = 1;
104	/*
105	LN2 is the log base 2 of N. Each increase of LN2 doubles N.
106	*/
107	for ( ln2 = 1; ln2 <= ln2_max; ln2++ )
108	{
109	n = 2 * n;
110	/*
111	Allocate storage for the complex arrays W, X, Y, Z.
112
113	We handle the complex arithmetic,
114	and store a complex number as a pair of doubles, a complex vector as a doubly
115	dimensioned array whose second dimension is 2.
116	*/
117	w = ( double * ) malloc ( n * sizeof ( double ) );
118	x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
119	y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
120	z = ( double * ) malloc ( 2 * n * sizeof ( double ) );
121
122	first = 1;
123
124	for ( icase = 0; icase < 2; icase++ )
125	{
126	if ( first )
127	{
128	for ( i = 0; i < 2 * n; i = i + 2 )
129	{
130	z0 = ggl ( &seed );
131	z1 = ggl ( &seed );
132	x[i] = z0;
133	z[i] = z0;
134	x[i+1] = z1;
135	z[i+1] = z1;
136	}
137	}
138	else
139	{
140	# pragma omp parallel \
141	shared ( n, x, z ) \
142	private ( i, z0, z1 )
143
144	# pragma omp for nowait
145
146	for ( i = 0; i < 2 * n; i = i + 2 )
147	{
148	z0 = 0.0; /* real part of array */
149	z1 = 0.0; /* imaginary part of array */
150	x[i] = z0;
151	z[i] = z0; /* copy of initial real data */
152	x[i+1] = z1;
153	z[i+1] = z1; /* copy of initial imag. data */
154	}
155	}
156	/*
157	Initialize the sine and cosine tables.
158	*/
159	cffti ( n, w );
160	/*
161	Transform forward, back
162	*/
163	if ( first )
164	{
165	sgn = + 1.0;
166	cfft2 ( n, x, y, w, sgn );
167	sgn = - 1.0;
168	cfft2 ( n, y, x, w, sgn );
169	/*
170	Results should be same as the initial data multiplied by N.
171	*/
172	fnm1 = 1.0 / ( double ) n;
173	error = 0.0;
174	for ( i = 0; i < 2 * n; i = i + 2 )
175	{
176	error = error
177	+ pow ( z[i] - fnm1 * x[i], 2 )
178	+ pow ( z[i+1] - fnm1 * x[i+1], 2 );
179	}
180	error = sqrt ( fnm1 * error );
181	printf ( " %12d %8d %12e", n, nits, error );
182	first = 0;
183	}
184	else
185	{
186	wtime = omp_get_wtime ( );
187	for ( it = 0; it < nits; it++ )
188	{
189	sgn = + 1.0;
190	cfft2 ( n, x, y, w, sgn );
191	sgn = - 1.0;
192	cfft2 ( n, y, x, w, sgn );
193	}
194	wtime = omp_get_wtime ( ) - wtime;
195
196	flops = 2.0 * ( double ) nits
197	* ( 5.0 * ( double ) n * ( double ) ln2 );
198
199	mflops = flops / 1.0E+06; // / wtime;
200
201	printf ( " %12e %12e %12f\n", wtime, wtime / ( double ) ( 2 * nits ), mflops );
202	}
203	}
204	if ( ( ln2 % 4 ) == 0 )
205	{
206	nits = nits / 10;
207	}
208	if ( nits < 1 )
209	{
210	nits = 1;
211	}
212	free ( w );
213	free ( x );
214	free ( y );
215	free ( z );
216	}
217	/*
218	Terminate.
219	*/
220	printf ( "\n" );
221	printf ( "FFT_OPENMP:\n" );
222	printf ( " Normal end of execution.\n" );
223	printf ( "\n" );
224	timestamp ( );
225
226	return 0;
227	}
228	/******************************************************************************/
229
230	void ccopy ( int n, double x[], double y[] )
231
232	/******************************************************************************/
233	/*
234	Purpose:
235
236	CCOPY copies a complex vector.
237
238	Discussion:
239
240	The "complex" vector A[N] is actually stored as a double vector B[2*N].
241
242	The "complex" vector entry A[I] is stored as:
243
244	B[I*2+0], the real part,
245	B[I*2+1], the imaginary part.
246
247	Modified:
248
249	20 March 2009
250
251	Author:
252
253	Original C version by Wesley Petersen.
254	This C version by John Burkardt.
255
256	Reference:
257
258	Wesley Petersen, Peter Arbenz,
259	Introduction to Parallel Computing - A practical guide with examples in C,
260	Oxford University Press,
261	ISBN: 0-19-851576-6,
262	LC: QA76.58.P47.
263
264	Parameters:
265
266	Input, int N, the length of the vector.
267
268	Input, double X[2*N], the vector to be copied.
269
270	Output, double Y[2*N], a copy of X.
271	*/
272	{
273	int i;
274
275	for ( i = 0; i < n; i++ )
276	{
277	y[i2+0] = x[i2+0];
278	y[i2+1] = x[i2+1];
279	}
280	return;
281	}
282	/******************************************************************************/
283
284	void cfft2 ( int n, double x[], double y[], double w[], double sgn )
285
286	/******************************************************************************/
287	/*
288	Purpose:
289
290	CFFT2 performs a complex Fast Fourier Transform.
291
292	Modified:
293
294	20 March 2009
295
296	Author:
297
298	Original C version by Wesley Petersen.
299	This C version by John Burkardt.
300
301	Reference:
302
303	Wesley Petersen, Peter Arbenz,
304	Introduction to Parallel Computing - A practical guide with examples in C,
305	Oxford University Press,
306	ISBN: 0-19-851576-6,
307	LC: QA76.58.P47.
308
309	Parameters:
310
311	Input, int N, the size of the array to be transformed.
312
313	Input/output, double X[2*N], the data to be transformed.
314	On output, the contents of X have been overwritten by work information.
315
316	Output, double Y[2*N], the forward or backward FFT of X.
317
318	Input, double W[N], a table of sines and cosines.
319
320	Input, double SGN, is +1 for a "forward" FFT and -1 for a "backward" FFT.
321	*/
322	{
323	int j;
324	int m;
325	int mj;
326	int tgle;
327
328	m = ( int ) ( log ( ( double ) n ) / log ( 1.99 ) );
329	mj = 1;
330	/*
331	Toggling switch for work array.
332	*/
333	tgle = 1;
334	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
335
336	if ( n == 2 )
337	{
338	return;
339	}
340
341	for ( j = 0; j < m - 2; j++ )
342	{
343	mj = mj * 2;
344	if ( tgle )
345	{
346	step ( n, mj, &y[02+0], &y[(n/2)2+0], &x[02+0], &x[mj2+0], w, sgn );
347	tgle = 0;
348	}
349	else
350	{
351	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
352	tgle = 1;
353	}
354	}
355	/*
356	Last pass through data: move Y to X if needed.
357	*/
358	if ( tgle )
359	{
360	ccopy ( n, y, x );
361	}
362
363	mj = n / 2;
364	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
365
366	return;
367	}
368	/******************************************************************************/
369
370	void cffti ( int n, double w[] )
371
372	/******************************************************************************/
373	/*
374	Purpose:
375
376	CFFTI sets up sine and cosine tables needed for the FFT calculation.
377
378	Modified:
379
380	20 March 2009
381
382	Author:
383
384	Original C version by Wesley Petersen.
385	This C version by John Burkardt.
386
387	Reference:
388
389	Wesley Petersen, Peter Arbenz,
390	Introduction to Parallel Computing - A practical guide with examples in C,
391	Oxford University Press,
392	ISBN: 0-19-851576-6,
393	LC: QA76.58.P47.
394
395	Parameters:
396
397	Input, int N, the size of the array to be transformed.
398
399	Output, double W[N], a table of sines and cosines.
400	*/
401	{
402	double arg;
403	double aw;
404	int i;
405	int n2;
406	const double pi = 3.141592653589793;
407
408	n2 = n / 2;
409	aw = 2.0 * pi / ( ( double ) n );
410
411	# pragma omp parallel \
412	shared ( aw, n, w ) \
413	private ( arg, i )
414
415	# pragma omp for nowait
416
417	for ( i = 0; i < n2; i++ )
418	{
419	arg = aw * ( ( double ) i );
420	w[i*2+0] = cos ( arg );
421	w[i*2+1] = sin ( arg );
422	}
423	return;
424	}
425	/******************************************************************************/
426
427	double ggl ( double *seed )
428
429	/******************************************************************************/
430	/*
431	Purpose:
432
433	GGL generates uniformly distributed pseudorandom real numbers in [0,1].
434
435	Modified:
436
437	20 March 2009
438
439	Author:
440
441	Original C version by Wesley Petersen, M Troyer, I Vattulainen.
442	This C version by John Burkardt.
443
444	Reference:
445
446	Wesley Petersen, Peter Arbenz,
447	Introduction to Parallel Computing - A practical guide with examples in C,
448	Oxford University Press,
449	ISBN: 0-19-851576-6,
450	LC: QA76.58.P47.
451
452	Parameters:
453
454	Input/output, double *SEED, used as a seed for the sequence.
455
456	Output, double GGL, the next pseudorandom value.
457	*/
458	{
459	double d2 = 0.2147483647e10;
460	double t;
461	double value;
462
463	t = ( double ) *seed;
464	t = fmod ( 16807.0 * t, d2 );
465	*seed = ( double ) t;
466	value = ( double ) ( ( t - 1.0 ) / ( d2 - 1.0 ) );
467
468	return value;
469	}
470	/******************************************************************************/
471
472	void step ( int n, int mj, double a[], double b[], double c[],
473	double d[], double w[], double sgn )
474
475	/******************************************************************************/
476	/*
477	Purpose:
478
479	STEP carries out one step of the workspace version of CFFT2.
480
481	Modified:
482
483	20 March 2009
484
485	Author:
486
487	Original C version by Wesley Petersen.
488	This C version by John Burkardt.
489
490	Reference:
491
492	Wesley Petersen, Peter Arbenz,
493	Introduction to Parallel Computing - A practical guide with examples in C,
494	Oxford University Press,
495	ISBN: 0-19-851576-6,
496	LC: QA76.58.P47.
497
498	Parameters:
499
500	*/
501	{
502	double ambr;
503	double ambu;
504	int j;
505	int ja;
506	int jb;
507	int jc;
508	int jd;
509	int jw;
510	int k;
511	int lj;
512	int mj2;
513	double wjw[2];
514
515	mj2 = 2 * mj;
516	lj = n / mj2;
517
518	# pragma omp parallel \
519	shared ( a, b, c, d, lj, mj, mj2, sgn, w ) \
520	private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw )
521
522	# pragma omp for nowait
523
524	for ( j = 0; j < lj; j++ )
525	{
526	jw = j * mj;
527	ja = jw;
528	jb = ja;
529	jc = j * mj2;
530	jd = jc;
531
532	wjw[0] = w[jw*2+0];
533	wjw[1] = w[jw*2+1];
534
535	if ( sgn < 0.0 )
536	{
537	wjw[1] = - wjw[1];
538	}
539
540	for ( k = 0; k < mj; k++ )
541	{
542	c[(jc+k)2+0] = a[(ja+k)2+0] + b[(jb+k)*2+0];
543	c[(jc+k)2+1] = a[(ja+k)2+1] + b[(jb+k)*2+1];
544
545	ambr = a[(ja+k)2+0] - b[(jb+k)2+0];
546	ambu = a[(ja+k)2+1] - b[(jb+k)2+1];
547
548	d[(jd+k)2+0] = wjw[0] ambr - wjw[1] * ambu;
549	d[(jd+k)2+1] = wjw[1] ambr + wjw[0] * ambu;
550	}
551	}
552	return;
553	}
554	/******************************************************************************/
555
556	void timestamp ( void )
557
558	/******************************************************************************/
559	/*
560	Purpose:
561
562	TIMESTAMP prints the current YMDHMS date as a time stamp.
563
564	Example:
565
566	31 May 2001 09:45:54 AM
567
568	Licensing:
569
570	This code is distributed under the GNU LGPL license.
571
572	Modified:
573
574	24 September 2003
575
576	Author:
577
578	John Burkardt
579
580	Parameters:
581
582	None
583	*/
584	{
585	# define TIME_SIZE 40
586
587	static char time_buffer[TIME_SIZE];
588	const struct tm *tm;
589	size_t len;
590	time_t now;
591
592	now = time ( NULL );
593	tm = localtime ( &now );
594
595	len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
596
597	printf ( "%s\n", time_buffer );
598
599	return;
600	# undef TIME_SIZE
601	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/omp/fft_openmp.c@ ea8d8ce

Download in other formats: