Context Navigation

fft_openmp.c@ 8190175

main test-branch

Last change on this file since 8190175 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 12.5 KB

Rev	Line
[71264c4]	1	# include <stdlib.h>
	2	# include <stdio.h>
	3	# include <math.h>
	4	# include <time.h>
	5	# include <omp.h>
	6
[20ac35f]	7	#ifdef _CIVL
	8	$input int NITS=4; // originally 10000
	9	$input int LN2MAX=1; //originally 25
	10	#else
	11	#define NITS 10000
	12	#define LN2MAX 25
	13	#endif
[9a94b18]	14
[71264c4]	15	int main ( void );
	16	void ccopy ( int n, double x[], double y[] );
	17	void cfft2 ( int n, double x[], double y[], double w[], double sgn );
	18	void cffti ( int n, double w[] );
	19	double ggl ( double *ds );
	20	void step ( int n, int mj, double a[], double b[], double c[], double d[],
	21	double w[], double sgn );
	22	void timestamp ( void );
	23
	24	/******************************************************************************/
	25
	26	int main ( void )
	27
	28	/******************************************************************************/
	29	/*
	30	Purpose:
	31
	32	MAIN is the main program for FFT_OPENMP.
	33
	34	Discussion:
	35
	36	The "complex" vector A is actually stored as a double vector B.
	37
	38	The "complex" vector entry A[I] is stored as:
	39
	40	B[I*2+0], the real part,
	41	B[I*2+1], the imaginary part.
	42
	43	Modified:
	44
	45	20 March 2009
	46
	47	Author:
	48
	49	Original C version by Wesley Petersen.
	50	This C version by John Burkardt.
	51
	52	Reference:
	53
	54	Wesley Petersen, Peter Arbenz,
	55	Introduction to Parallel Computing - A practical guide with examples in C,
	56	Oxford University Press,
	57	ISBN: 0-19-851576-6,
	58	LC: QA76.58.P47.
	59	*/
	60	{
	61	double error;
	62	int first;
	63	double flops;
	64	double fnm1;
	65	int i;
	66	int icase;
	67	int it;
	68	int ln2;
[9a94b18]	69	int ln2_max = LN2MAX;
[71264c4]	70	double mflops;
	71	int n;
[9a94b18]	72	int nits = NITS;
[71264c4]	73	int proc_num;
	74	static double seed;
	75	double sgn;
	76	int thread_num;
	77	double *w;
	78	double wtime;
	79	double *x;
	80	double *y;
	81	double *z;
	82	double z0;
	83	double z1;
	84
	85	timestamp ( );
	86	printf ( "\n" );
	87	printf ( "FFT_OPENMP\n" );
	88	printf ( " C/OpenMP version\n" );
	89	printf ( "\n" );
	90	printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" );
	91	printf ( " of a complex data vector, using OpenMP for parallel execution.\n" );
	92
	93	printf ( "\n" );
	94	printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
	95	printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
	96	/*
	97	Prepare for tests.
	98	*/
	99	printf ( "\n" );
	100	printf ( " Accuracy check:\n" );
	101	printf ( "\n" );
	102	printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
	103	printf ( "\n" );
	104	printf ( " N NITS Error Time Time/Call MFLOPS\n" );
	105	printf ( "\n" );
	106
	107	seed = 331.0;
	108	n = 1;
	109	/*
	110	LN2 is the log base 2 of N. Each increase of LN2 doubles N.
	111	*/
	112	for ( ln2 = 1; ln2 <= ln2_max; ln2++ )
	113	{
	114	n = 2 * n;
	115	/*
	116	Allocate storage for the complex arrays W, X, Y, Z.
	117
	118	We handle the complex arithmetic,
	119	and store a complex number as a pair of doubles, a complex vector as a doubly
	120	dimensioned array whose second dimension is 2.
	121	*/
	122	w = ( double * ) malloc ( n * sizeof ( double ) );
	123	x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
	124	y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
	125	z = ( double * ) malloc ( 2 * n * sizeof ( double ) );
	126
	127	first = 1;
	128
	129	for ( icase = 0; icase < 2; icase++ )
	130	{
	131	if ( first )
	132	{
	133	for ( i = 0; i < 2 * n; i = i + 2 )
	134	{
	135	z0 = ggl ( &seed );
	136	z1 = ggl ( &seed );
	137	x[i] = z0;
	138	z[i] = z0;
	139	x[i+1] = z1;
	140	z[i+1] = z1;
	141	}
	142	}
	143	else
	144	{
	145	# pragma omp parallel \
	146	shared ( n, x, z ) \
	147	private ( i, z0, z1 )
	148
[20ac35f]	149	# pragma omp for
[71264c4]	150
	151	for ( i = 0; i < 2 * n; i = i + 2 )
	152	{
	153	z0 = 0.0; /* real part of array */
	154	z1 = 0.0; /* imaginary part of array */
	155	x[i] = z0;
	156	z[i] = z0; /* copy of initial real data */
	157	x[i+1] = z1;
	158	z[i+1] = z1; /* copy of initial imag. data */
	159	}
	160	}
	161	/*
	162	Initialize the sine and cosine tables.
	163	*/
	164	cffti ( n, w );
	165	/*
	166	Transform forward, back
	167	*/
	168	if ( first )
	169	{
	170	sgn = + 1.0;
	171	cfft2 ( n, x, y, w, sgn );
	172	sgn = - 1.0;
	173	cfft2 ( n, y, x, w, sgn );
	174	/*
	175	Results should be same as the initial data multiplied by N.
	176	*/
	177	fnm1 = 1.0 / ( double ) n;
	178	error = 0.0;
	179	for ( i = 0; i < 2 * n; i = i + 2 )
	180	{
	181	error = error
	182	+ pow ( z[i] - fnm1 * x[i], 2 )
	183	+ pow ( z[i+1] - fnm1 * x[i+1], 2 );
	184	}
	185	error = sqrt ( fnm1 * error );
	186	printf ( " %12d %8d %12e", n, nits, error );
	187	first = 0;
	188	}
	189	else
	190	{
	191	wtime = omp_get_wtime ( );
	192	for ( it = 0; it < nits; it++ )
	193	{
	194	sgn = + 1.0;
	195	cfft2 ( n, x, y, w, sgn );
	196	sgn = - 1.0;
	197	cfft2 ( n, y, x, w, sgn );
	198	}
	199	wtime = omp_get_wtime ( ) - wtime;
	200
	201	flops = 2.0 * ( double ) nits
	202	* ( 5.0 * ( double ) n * ( double ) ln2 );
	203
[9a94b18]	204	mflops = flops / 1.0E+06; // / wtime;
[71264c4]	205
	206	printf ( " %12e %12e %12f\n", wtime, wtime / ( double ) ( 2 * nits ), mflops );
	207	}
	208	}
	209	if ( ( ln2 % 4 ) == 0 )
	210	{
	211	nits = nits / 10;
	212	}
	213	if ( nits < 1 )
	214	{
	215	nits = 1;
	216	}
	217	free ( w );
	218	free ( x );
	219	free ( y );
	220	free ( z );
	221	}
	222	/*
	223	Terminate.
	224	*/
	225	printf ( "\n" );
	226	printf ( "FFT_OPENMP:\n" );
	227	printf ( " Normal end of execution.\n" );
	228	printf ( "\n" );
	229	timestamp ( );
	230
	231	return 0;
	232	}
	233	/******************************************************************************/
	234
	235	void ccopy ( int n, double x[], double y[] )
	236
	237	/******************************************************************************/
	238	/*
	239	Purpose:
	240
	241	CCOPY copies a complex vector.
	242
	243	Discussion:
	244
	245	The "complex" vector A[N] is actually stored as a double vector B[2*N].
	246
	247	The "complex" vector entry A[I] is stored as:
	248
	249	B[I*2+0], the real part,
	250	B[I*2+1], the imaginary part.
	251
	252	Modified:
	253
	254	20 March 2009
	255
	256	Author:
	257
	258	Original C version by Wesley Petersen.
	259	This C version by John Burkardt.
	260
	261	Reference:
	262
	263	Wesley Petersen, Peter Arbenz,
	264	Introduction to Parallel Computing - A practical guide with examples in C,
	265	Oxford University Press,
	266	ISBN: 0-19-851576-6,
	267	LC: QA76.58.P47.
	268
	269	Parameters:
	270
	271	Input, int N, the length of the vector.
	272
	273	Input, double X[2*N], the vector to be copied.
	274
	275	Output, double Y[2*N], a copy of X.
	276	*/
	277	{
	278	int i;
	279
	280	for ( i = 0; i < n; i++ )
	281	{
	282	y[i2+0] = x[i2+0];
	283	y[i2+1] = x[i2+1];
	284	}
	285	return;
	286	}
	287	/******************************************************************************/
	288
	289	void cfft2 ( int n, double x[], double y[], double w[], double sgn )
	290
	291	/******************************************************************************/
	292	/*
	293	Purpose:
	294
	295	CFFT2 performs a complex Fast Fourier Transform.
	296
	297	Modified:
	298
	299	20 March 2009
	300
	301	Author:
	302
	303	Original C version by Wesley Petersen.
	304	This C version by John Burkardt.
	305
	306	Reference:
	307
	308	Wesley Petersen, Peter Arbenz,
	309	Introduction to Parallel Computing - A practical guide with examples in C,
	310	Oxford University Press,
	311	ISBN: 0-19-851576-6,
	312	LC: QA76.58.P47.
	313
	314	Parameters:
	315
	316	Input, int N, the size of the array to be transformed.
	317
	318	Input/output, double X[2*N], the data to be transformed.
	319	On output, the contents of X have been overwritten by work information.
	320
	321	Output, double Y[2*N], the forward or backward FFT of X.
	322
	323	Input, double W[N], a table of sines and cosines.
	324
	325	Input, double SGN, is +1 for a "forward" FFT and -1 for a "backward" FFT.
	326	*/
	327	{
	328	int j;
	329	int m;
	330	int mj;
	331	int tgle;
	332
	333	m = ( int ) ( log ( ( double ) n ) / log ( 1.99 ) );
	334	mj = 1;
	335	/*
	336	Toggling switch for work array.
	337	*/
	338	tgle = 1;
	339	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
	340
	341	if ( n == 2 )
	342	{
	343	return;
	344	}
	345
	346	for ( j = 0; j < m - 2; j++ )
	347	{
	348	mj = mj * 2;
	349	if ( tgle )
	350	{
	351	step ( n, mj, &y[02+0], &y[(n/2)2+0], &x[02+0], &x[mj2+0], w, sgn );
	352	tgle = 0;
	353	}
	354	else
	355	{
	356	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
	357	tgle = 1;
	358	}
	359	}
	360	/*
	361	Last pass through data: move Y to X if needed.
	362	*/
	363	if ( tgle )
	364	{
	365	ccopy ( n, y, x );
	366	}
	367
	368	mj = n / 2;
	369	step ( n, mj, &x[02+0], &x[(n/2)2+0], &y[02+0], &y[mj2+0], w, sgn );
	370
	371	return;
	372	}
	373	/******************************************************************************/
	374
	375	void cffti ( int n, double w[] )
	376
	377	/******************************************************************************/
	378	/*
	379	Purpose:
	380
	381	CFFTI sets up sine and cosine tables needed for the FFT calculation.
	382
	383	Modified:
	384
	385	20 March 2009
	386
	387	Author:
	388
	389	Original C version by Wesley Petersen.
	390	This C version by John Burkardt.
	391
	392	Reference:
	393
	394	Wesley Petersen, Peter Arbenz,
	395	Introduction to Parallel Computing - A practical guide with examples in C,
	396	Oxford University Press,
	397	ISBN: 0-19-851576-6,
	398	LC: QA76.58.P47.
	399
	400	Parameters:
	401
	402	Input, int N, the size of the array to be transformed.
	403
	404	Output, double W[N], a table of sines and cosines.
	405	*/
	406	{
	407	double arg;
	408	double aw;
	409	int i;
	410	int n2;
	411	const double pi = 3.141592653589793;
	412
	413	n2 = n / 2;
	414	aw = 2.0 * pi / ( ( double ) n );
	415
	416	# pragma omp parallel \
	417	shared ( aw, n, w ) \
	418	private ( arg, i )
	419
	420	# pragma omp for nowait
	421
	422	for ( i = 0; i < n2; i++ )
	423	{
	424	arg = aw * ( ( double ) i );
	425	w[i*2+0] = cos ( arg );
	426	w[i*2+1] = sin ( arg );
	427	}
	428	return;
	429	}
	430	/******************************************************************************/
	431
	432	double ggl ( double *seed )
	433
	434	/******************************************************************************/
	435	/*
	436	Purpose:
	437
	438	GGL generates uniformly distributed pseudorandom real numbers in [0,1].
	439
	440	Modified:
	441
	442	20 March 2009
	443
	444	Author:
	445
	446	Original C version by Wesley Petersen, M Troyer, I Vattulainen.
	447	This C version by John Burkardt.
	448
	449	Reference:
	450
	451	Wesley Petersen, Peter Arbenz,
	452	Introduction to Parallel Computing - A practical guide with examples in C,
	453	Oxford University Press,
	454	ISBN: 0-19-851576-6,
	455	LC: QA76.58.P47.
	456
	457	Parameters:
	458
	459	Input/output, double *SEED, used as a seed for the sequence.
	460
	461	Output, double GGL, the next pseudorandom value.
	462	*/
	463	{
	464	double d2 = 0.2147483647e10;
	465	double t;
	466	double value;
	467
	468	t = ( double ) *seed;
	469	t = fmod ( 16807.0 * t, d2 );
	470	*seed = ( double ) t;
	471	value = ( double ) ( ( t - 1.0 ) / ( d2 - 1.0 ) );
	472
	473	return value;
	474	}
	475	/******************************************************************************/
	476
	477	void step ( int n, int mj, double a[], double b[], double c[],
	478	double d[], double w[], double sgn )
	479
	480	/******************************************************************************/
	481	/*
	482	Purpose:
	483
	484	STEP carries out one step of the workspace version of CFFT2.
	485
	486	Modified:
	487
	488	20 March 2009
	489
	490	Author:
	491
	492	Original C version by Wesley Petersen.
	493	This C version by John Burkardt.
	494
	495	Reference:
	496
	497	Wesley Petersen, Peter Arbenz,
	498	Introduction to Parallel Computing - A practical guide with examples in C,
	499	Oxford University Press,
	500	ISBN: 0-19-851576-6,
	501	LC: QA76.58.P47.
	502
	503	Parameters:
	504
	505	*/
	506	{
	507	double ambr;
	508	double ambu;
	509	int j;
	510	int ja;
	511	int jb;
	512	int jc;
	513	int jd;
	514	int jw;
	515	int k;
	516	int lj;
	517	int mj2;
	518	double wjw[2];
	519
	520	mj2 = 2 * mj;
	521	lj = n / mj2;
	522
	523	# pragma omp parallel \
	524	shared ( a, b, c, d, lj, mj, mj2, sgn, w ) \
	525	private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw )
	526
	527	# pragma omp for nowait
	528
	529	for ( j = 0; j < lj; j++ )
	530	{
	531	jw = j * mj;
	532	ja = jw;
	533	jb = ja;
	534	jc = j * mj2;
	535	jd = jc;
	536
	537	wjw[0] = w[jw*2+0];
	538	wjw[1] = w[jw*2+1];
	539
	540	if ( sgn < 0.0 )
	541	{
	542	wjw[1] = - wjw[1];
	543	}
	544
	545	for ( k = 0; k < mj; k++ )
	546	{
	547	c[(jc+k)2+0] = a[(ja+k)2+0] + b[(jb+k)*2+0];
	548	c[(jc+k)2+1] = a[(ja+k)2+1] + b[(jb+k)*2+1];
	549
	550	ambr = a[(ja+k)2+0] - b[(jb+k)2+0];
	551	ambu = a[(ja+k)2+1] - b[(jb+k)2+1];
	552
	553	d[(jd+k)2+0] = wjw[0] ambr - wjw[1] * ambu;
	554	d[(jd+k)2+1] = wjw[1] ambr + wjw[0] * ambu;
	555	}
	556	}
	557	return;
	558	}
	559	/******************************************************************************/
	560
	561	void timestamp ( void )
	562
	563	/******************************************************************************/
	564	/*
	565	Purpose:
	566
	567	TIMESTAMP prints the current YMDHMS date as a time stamp.
	568
	569	Example:
	570
	571	31 May 2001 09:45:54 AM
	572
	573	Licensing:
	574
	575	This code is distributed under the GNU LGPL license.
	576
	577	Modified:
	578
	579	24 September 2003
	580
	581	Author:
	582
	583	John Burkardt
	584
	585	Parameters:
	586
	587	None
	588	*/
	589	{
	590	# define TIME_SIZE 40
	591
	592	static char time_buffer[TIME_SIZE];
	593	const struct tm *tm;
	594	size_t len;
	595	time_t now;
	596
	597	now = time ( NULL );
	598	tm = localtime ( &now );
	599
	600	len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
	601
	602	printf ( "%s\n", time_buffer );
	603
	604	return;
	605	# undef TIME_SIZE
	606	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/omp/fft_openmp.c@ 8190175

Download in other formats: