Context Navigation

fft_openmp.c

main

Last change on this file was aad342c, checked in by Stephen Siegel <siegel@…>, 3 years ago

Performing huge refactor to incorporate ABC, GMC, and SARL into CIVL repo and use Java modules.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5664 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 12.3 KB

Rev	Line
[004075f]	1	# include <stdlib.h>
	2	# include <stdio.h>
	3	# include <math.h>
	4	# include <time.h>
	5	# include <omp.h>
	6
	7	int main ( void );
	8	void ccopy ( int n, double x[], double y[] );
	9	void cfft2 ( int n, double x[], double y[], double w[], double sgn );
	10	void cffti ( int n, double w[] );
	11	double ggl ( double *ds );
	12	void step ( int n, int mj, double a[], double b[], double c[], double d[],
	13	double w[], double sgn );
	14	void timestamp ( void );
	15
	16	/******************************************************************************/
	17
	18	int main ( void )
	19
	20	/******************************************************************************/
	21	/*
	22	Purpose:
	23
	24	MAIN is the main program for FFT_OPENMP.
	25
	26	Discussion:
	27
	28	The "complex" vector A is actually stored as a double vector B.
	29
	30	The "complex" vector entry A[I] is stored as:
	31
	32	B[I*2+0], the real part,
	33	B[I*2+1], the imaginary part.
	34
	35	Modified:
	36
	37	20 March 2009
	38
	39	Author:
	40
	41	Original C version by Wesley Petersen.
	42	This C version by John Burkardt.
	43
	44	Reference:
	45
	46	Wesley Petersen, Peter Arbenz,
	47	Introduction to Parallel Computing - A practical guide with examples in C,
	48	Oxford University Press,
	49	ISBN: 0-19-851576-6,
	50	LC: QA76.58.P47.
	51	*/
	52	{
	53	double error;
	54	int first;
	55	double flops;
	56	double fnm1;
	57	int i;
	58	int icase;
	59	int it;
	60	int ln2;
	61	int ln2_max = 25;
	62	double mflops;
	63	int n;
	64	int nits = 10000;
	65	int proc_num;
	66	static double seed;
	67	double sgn;
	68	int thread_num;
	69	double *w;
	70	double wtime;
	71	double *x;
	72	double *y;
	73	double *z;
	74	double z0;
	75	double z1;
	76
	77	timestamp ( );
	78	printf ( "\n" );
	79	printf ( "FFT_OPENMP\n" );
	80	printf ( " C/OpenMP version\n" );
	81	printf ( "\n" );
	82	printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" );
	83	printf ( " of a complex data vector, using OpenMP for parallel execution.\n" );
	84
	85	printf ( "\n" );
	86	printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
	87	printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
	88	/*
	89	Prepare for tests.
	90	*/
	91	printf ( "\n" );
	92	printf ( " Accuracy check:\n" );
	93	printf ( "\n" );
	94	printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
	95	printf ( "\n" );
	96	printf ( " N NITS Error Time Time/Call MFLOPS\n" );
	97	printf ( "\n" );
	98
	99	seed = 331.0;
	100	n = 1;
	101	/*
	102	LN2 is the log base 2 of N. Each increase of LN2 doubles N.
	103	*/
	104	for ( ln2 = 1; ln2 <= ln2_max; ln2++ )
	105	{
	106	n = 2 * n;
	107	/*
	108	Allocate storage for the complex arrays W, X, Y, Z.
	109
	110	We handle the complex arithmetic,
	111	and store a complex number as a pair of doubles, a complex vector as a doubly
	112	dimensioned array whose second dimension is 2.
	113	*/
	114	w = ( double * ) malloc ( n * sizeof ( double ) );
	115	x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
	116	y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
	117	z = ( double * ) malloc ( 2 * n * sizeof ( double ) );
	118
	119	first = 1;
	120
	121	for ( icase = 0; icase < 2; icase++ )
	122	{
	123	if ( first )
	124	{
	125	for ( i = 0; i < 2 * n; i = i + 2 )
	126	{
	127	z0 = ggl ( &seed );
	128	z1 = ggl ( &seed );
	129	x[i] = z0;
	130	z[i] = z0;
	131	x[i+1] = z1;
	132	z[i+1] = z1;
	133	}
	134	}
	135	else
	136	{
	137	# pragma omp parallel \
	138	shared ( n, x, z ) \
	139	private ( i, z0, z1 )
	140
	141	# pragma omp for nowait
	142
	143	for ( i = 0; i < 2 * n; i = i + 2 )
	144	{
	145	z0 = 0.0; /* real part of array */
	146	z1 = 0.0; /* imaginary part of array */
	147	x[i] = z0;
	148	z[i] = z0; /* copy of initial real data */
	149	x[i+1] = z1;
	150	z[i+1] = z1; /* copy of initial imag. data */
	151	}
	152	}
	153	/*
	154	Initialize the sine and cosine tables.
	155	*/
	156	cffti ( n, w );
	157	/*
	158	Transform forward, back
	159	*/
	160	if ( first )
	161	{
	162	sgn = + 1.0;
	163	cfft2 ( n, x, y, w, sgn );
	164	sgn = - 1.0;
	165	cfft2 ( n, y, x, w, sgn );
	166	/*
	167	Results should be same as the initial data multiplied by N.
	168	*/
	169	fnm1 = 1.0 / ( double ) n;
	170	error = 0.0;
	171	for ( i = 0; i < 2 * n; i = i + 2 )
	172	{
	173	error = error
	174	+ pow ( z[i] - fnm1 * x[i], 2 )
	175	+ pow ( z[i+1] - fnm1 * x[i+1], 2 );
	176	}
	177	error = sqrt ( fnm1 * error );
	178	printf ( " %12d %8d %12e", n, nits, error );
	179	first = 0;
	180	}
	181	else
	182	{
	183	wtime = omp_get_wtime ( );
	184	for ( it = 0; it < nits; it++ )
	185	{
	186	sgn = + 1.0;
	187	cfft2 ( n, x, y, w, sgn );
	188	sgn = - 1.0;
	189	cfft2 ( n, y, x, w, sgn );
	190	}
	191	wtime = omp_get_wtime ( ) - wtime;
	192
	193	flops = 2.0 * ( double ) nits
	194	* ( 5.0 * ( double ) n * ( double ) ln2 );
	195
	196	mflops = flops / 1.0E+06 / wtime;
	197
	198	printf ( " %12e %12e %12f\n", wtime, wtime / ( double ) ( 2 * nits ), mflops );
	199	}
	200	}
	201	if ( ( ln2 % 4 ) == 0 )
	202	{
	203	nits = nits / 10;
	204	}
	205	if ( nits < 1 )
	206	{
	207	nits = 1;
	208	}
	209	free ( w );
	210	free ( x );
	211	free ( y );
	212	free ( z );
	213	}
	214	/*
	215	Terminate.
	216	*/
	217	printf ( "\n" );
	218	printf ( "FFT_OPENMP:\n" );
	219	printf ( " Normal end of execution.\n" );
	220	printf ( "\n" );
	221	timestamp ( );
	222
	223	return 0;
	224	}
	225	/******************************************************************************/
	226
	227	void ccopy ( int n, double x[], double y[] )
	228
	229	/******************************************************************************/
	230	/*
	231	Purpose:
	232
	233	CCOPY copies a complex vector.
	234
	235	Discussion:
	236
	237	The "complex" vector A[N] is actually stored as a double vector B[2*N].
	238
	239	The "complex" vector entry A[I] is stored as:
	240
	241	B[I*2+0], the real part,
	242	B[I*2+1], the imaginary part.
	243
	244	Modified:
	245
	246	20 March 2009
	247
	248	Author:
	249
	250	Original C version by Wesley Petersen.
	251	This C version by John Burkardt.
	252
	253	Reference:
	254
	255	Wesley Petersen, Peter Arbenz,
	256	Introduction to Parallel Computing - A practical guide with examples in C,
	257	Oxford University Press,
	258	ISBN: 0-19-851576-6,
	259	LC: QA76.58.P47.
	260
	261	Parameters:
	262
	263	Input, int N, the length of the vector.
	264
	265	Input, double X[2*N], the vector to be copied.
	266
	267	Output, double Y[2*N], a copy of X.
	268	*/
	269	{
	270	int i;
	271
	272	for ( i = 0; i < n; i++ )
	273	{
	274	y[i2+0] = x[i2+0];
	275	y[i2+1] = x[i2+1];
	276	}
	277	return;
	278	}
	279	/******************************************************************************/
	280
	281	void cfft2 ( int n, double x[], double y[], double w[], double sgn )
	282
	283	/******************************************************************************/
	284	/*
	285	Purpose:
	286
	287	CFFT2 performs a complex Fast Fourier Transform.
	288
	289	Modified:
	290
	291	20 March 2009
	292
	293	Author:
	294
	295	Original C version by Wesley Petersen.
	296	This C version by John Burkardt.
	297
	298	Reference:
	299
	300	Wesley Petersen, Peter Arbenz,
	301	Introduction to Parallel Computing - A practical guide with examples in C,
	302	Oxford University Press,
	303	ISBN: 0-19-851576-6,
	304	LC: QA76.58.P47.
	305
	306	Parameters:
	307
	308	Input, int N, the size of the array to be transformed.
	309
	310	Input/output, double X[2*N], the data to be transformed.
	311	On output, the contents of X have been overwritten by work information.
	312
	313	Output, double Y[2*N], the forward or backward FFT of X.
	314
	315	Input, double W[N], a table of sines and cosines.
	316
	317	Input, double SGN, is +1 for a "forward" FFT and -1 for a "backward" FFT.
	318	*/
	319	{
	320	int j;
	321	int m;
	322	int mj;
	323	int tgle;
	324
	325	m = ( int ) ( log ( ( double ) n ) / log ( 1.99 ) );
	326	mj = 1;
	327	/*
	328	Toggling switch for work array.
	329	*/
	330	tgle = 1;
	331	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
	332
	333	if ( n == 2 )
	334	{
	335	return;
	336	}
	337
	338	for ( j = 0; j < m - 2; j++ )
	339	{
	340	mj = mj * 2;
	341	if ( tgle )
	342	{
	343	step ( n, mj, &y[02+0], &y[(n/2)2+0], &x[02+0], &x[mj2+0], w, sgn );
	344	tgle = 0;
	345	}
	346	else
	347	{
	348	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
	349	tgle = 1;
	350	}
	351	}
	352	/*
	353	Last pass through data: move Y to X if needed.
	354	*/
	355	if ( tgle )
	356	{
	357	ccopy ( n, y, x );
	358	}
	359
	360	mj = n / 2;
	361	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
	362
	363	return;
	364	}
	365	/******************************************************************************/
	366
	367	void cffti ( int n, double w[] )
	368
	369	/******************************************************************************/
	370	/*
	371	Purpose:
	372
	373	CFFTI sets up sine and cosine tables needed for the FFT calculation.
	374
	375	Modified:
	376
	377	20 March 2009
	378
	379	Author:
	380
	381	Original C version by Wesley Petersen.
	382	This C version by John Burkardt.
	383
	384	Reference:
	385
	386	Wesley Petersen, Peter Arbenz,
	387	Introduction to Parallel Computing - A practical guide with examples in C,
	388	Oxford University Press,
	389	ISBN: 0-19-851576-6,
	390	LC: QA76.58.P47.
	391
	392	Parameters:
	393
	394	Input, int N, the size of the array to be transformed.
	395
	396	Output, double W[N], a table of sines and cosines.
	397	*/
	398	{
	399	double arg;
	400	double aw;
	401	int i;
	402	int n2;
	403	const double pi = 3.141592653589793;
	404
	405	n2 = n / 2;
	406	aw = 2.0 * pi / ( ( double ) n );
	407
	408	# pragma omp parallel \
	409	shared ( aw, n, w ) \
	410	private ( arg, i )
	411
	412	# pragma omp for nowait
	413
	414	for ( i = 0; i < n2; i++ )
	415	{
	416	arg = aw * ( ( double ) i );
	417	w[i*2+0] = cos ( arg );
	418	w[i*2+1] = sin ( arg );
	419	}
	420	return;
	421	}
	422	/******************************************************************************/
	423
	424	double ggl ( double *seed )
	425
	426	/******************************************************************************/
	427	/*
	428	Purpose:
	429
	430	GGL generates uniformly distributed pseudorandom real numbers in [0,1].
	431
	432	Modified:
	433
	434	20 March 2009
	435
	436	Author:
	437
	438	Original C version by Wesley Petersen, M Troyer, I Vattulainen.
	439	This C version by John Burkardt.
	440
	441	Reference:
	442
	443	Wesley Petersen, Peter Arbenz,
	444	Introduction to Parallel Computing - A practical guide with examples in C,
	445	Oxford University Press,
	446	ISBN: 0-19-851576-6,
	447	LC: QA76.58.P47.
	448
	449	Parameters:
	450
	451	Input/output, double *SEED, used as a seed for the sequence.
	452
	453	Output, double GGL, the next pseudorandom value.
	454	*/
	455	{
	456	double d2 = 0.2147483647e10;
	457	double t;
	458	double value;
	459
	460	t = ( double ) *seed;
	461	t = fmod ( 16807.0 * t, d2 );
	462	*seed = ( double ) t;
	463	value = ( double ) ( ( t - 1.0 ) / ( d2 - 1.0 ) );
	464
	465	return value;
	466	}
	467	/******************************************************************************/
	468
	469	void step ( int n, int mj, double a[], double b[], double c[],
	470	double d[], double w[], double sgn )
	471
	472	/******************************************************************************/
	473	/*
	474	Purpose:
	475
	476	STEP carries out one step of the workspace version of CFFT2.
	477
	478	Modified:
	479
	480	20 March 2009
	481
	482	Author:
	483
	484	Original C version by Wesley Petersen.
	485	This C version by John Burkardt.
	486
	487	Reference:
	488
	489	Wesley Petersen, Peter Arbenz,
	490	Introduction to Parallel Computing - A practical guide with examples in C,
	491	Oxford University Press,
	492	ISBN: 0-19-851576-6,
	493	LC: QA76.58.P47.
	494
	495	Parameters:
	496
	497	*/
	498	{
	499	double ambr;
	500	double ambu;
	501	int j;
	502	int ja;
	503	int jb;
	504	int jc;
	505	int jd;
	506	int jw;
	507	int k;
	508	int lj;
	509	int mj2;
	510	double wjw[2];
	511
	512	mj2 = 2 * mj;
	513	lj = n / mj2;
	514
	515	# pragma omp parallel \
	516	shared ( a, b, c, d, lj, mj, mj2, sgn, w ) \
	517	private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw )
	518
	519	# pragma omp for nowait
	520
	521	for ( j = 0; j < lj; j++ )
	522	{
	523	jw = j * mj;
	524	ja = jw;
	525	jb = ja;
	526	jc = j * mj2;
	527	jd = jc;
	528
	529	wjw[0] = w[jw*2+0];
	530	wjw[1] = w[jw*2+1];
	531
	532	if ( sgn < 0.0 )
	533	{
	534	wjw[1] = - wjw[1];
	535	}
	536
	537	for ( k = 0; k < mj; k++ )
	538	{
	539	c[(jc+k)2+0] = a[(ja+k)2+0] + b[(jb+k)*2+0];
	540	c[(jc+k)2+1] = a[(ja+k)2+1] + b[(jb+k)*2+1];
	541
	542	ambr = a[(ja+k)2+0] - b[(jb+k)2+0];
	543	ambu = a[(ja+k)2+1] - b[(jb+k)2+1];
	544
	545	d[(jd+k)2+0] = wjw[0] ambr - wjw[1] * ambu;
	546	d[(jd+k)2+1] = wjw[1] ambr + wjw[0] * ambu;
	547	}
	548	}
	549	return;
	550	}
	551	/******************************************************************************/
	552
	553	void timestamp ( void )
	554
	555	/******************************************************************************/
	556	/*
	557	Purpose:
	558
	559	TIMESTAMP prints the current YMDHMS date as a time stamp.
	560
	561	Example:
	562
	563	31 May 2001 09:45:54 AM
	564
	565	Licensing:
	566
	567	This code is distributed under the GNU LGPL license.
	568
	569	Modified:
	570
	571	24 September 2003
	572
	573	Author:
	574
	575	John Burkardt
	576
	577	Parameters:
	578
	579	None
	580	*/
	581	{
	582	# define TIME_SIZE 40
	583
	584	static char time_buffer[TIME_SIZE];
	585	const struct tm *tm;
	586	size_t len;
	587	time_t now;
	588
	589	now = time ( NULL );
	590	tm = localtime ( &now );
	591
	592	len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
	593
	594	printf ( "%s\n", time_buffer );
	595
	596	return;
	597	# undef TIME_SIZE
	598	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/mods/dev.civl.abc/examples/omp/fft_openmp.c

Download in other formats: