Context Navigation

c_fft6.c@ 1aaefd4

main test-branch

Last change on this file since 1aaefd4 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 17.1 KB

Rev	Line
[feadd65]	1	/* ***********************************************************************
	2	This program is part of the
	3	OpenMP Source Code Repository
	4
	5	http://www.pcg.ull.es/ompscr/
	6	e-mail: ompscr@etsii.ull.es
	7
	8	This program is free software; you can redistribute it and/or modify
	9	it under the terms of the GNU General Public License as published by
	10	the Free Software Foundation; either version 2 of the License, or
	11	(at your option) any later version.
	12
	13	This program is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	GNU General Public License for more details.
	17
	18	You should have received a copy of the GNU General Public License
	19	(LICENSE file) along with this program; if not, write to
	20	the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
	21	Boston, MA 02111-1307 USA
	22
	23	FILE: c_fft6.c
	24	VERSION: 1.0
	25	DATE: May 2004
	26	AUTHOR: F. de Sande
	27	COMMENTS TO: sande@csi.ull.es
	28	DESCRIPTION: Bailey's 6-step 1D Fast Fourier Transform
	29	Computes the 1D FFT of an input signal (complex array)
	30	COMMENTS: The code performs a number (ITERS) of iterations of the
	31	Bailey's 6-step FFT algorithm (following the ideas in the
	32	CMU Task parallel suite).
	33	1.- Generates an input signal vector (dgen) with size
	34	n=n1xn2 stored in row major order
	35	In this code the size of the input signal
	36	is NN=NxN (n=NN, n1=n2=N)
	37	2.- Transpose (tpose) A to have it stored in column
	38
	39	major order
	40	3.- Perform independent FFTs on the rows (cffts)
	41	4.- Scale each element of the resulting array by a
	42	factor of w[n]*(pq)
	43	5.- Transpose (tpose) to prepair it for the next step
	44	6.- Perform independent FFTs on the rows (cffts)
	45	7.- Transpose the resulting matrix
	46	The code requires nested Parallelism.
	47	REFERENCES: David H. Bailey
	48	FFTs in External or Hierarchical Memory,
	49	The Journal of Supercomputing, vol. 4, no. 1, pg. 23-35. Mar 1990,
	50	vol. 4, no. 7, pg. 321. Jul 1961
	51	http://www-2.cs.cmu.edu/~fx/tpsuite.html
	52	http://en.wikipedia.org/wiki/Cooley-Tukey_FFT_algorithm
	53	BASIC PRAGMAS: parallel for
	54	USAGE: ./c_fft6.par 1024 10
	55	INPUT: The size of the input signal.
	56	The number of iterations to perform
	57	OUTPUT: The code tests the correctness of the output signal
	58	FILE FORMATS: -
	59	RESTRICTIONS: The size of the input signal MUST BE a power of 2
	60	REVISION HISTORY:
	61	**************************************************************************/
	62	//#include "OmpSCR.h"
	63	#include <omp.h>
	64	#include <stdlib.h>
	65	#include <math.h>
	66	#include <stdio.h>
	67
	68	#define PI (3.141592653589793f)
	69	#define NUM_ARGS 2
	70	#define NUM_TIMERS 4
	71	typedef double real_type;
	72
	73	#define NINIT 2
	74	#define ITERSINIT 2
	75
	76	/* Input values (CONSTANTS) */
	77	unsigned NN; /* input vector is NN complex values */
	78	unsigned LOGNN; /* log base 2 of NN */
	79	unsigned N; /* square root of NN */
	80	unsigned LOGN; /* log base 2 of N */
	81	int ITERS; /* number of input vectors */
	82
	83	typedef struct { real_type re, im; } complex;
	84	/* -----------------------------------------------------------------------
	85	PROTOTYPES
	86	* ----------------------------------------------------------------------- */
	87	int prperf(double tpose_time, double scale_time, double ffts_time, int nn, int lognn, int iters);
	88	int gen_bit_reverse_table(int *brt, int n);
	89	int gen_w_table(complex *w, int n, int logn, int ndv2);
	90	int gen_v_table(complex *v, int n);
	91	int dgen(complex *xin, int n);
	92	int tpose(complex a, complex b, int n);
	93	int tpose_seq(complex a, complex b, const int n);
	94	int cffts(complex a, int brt, complex *w, int n, int logn, int ndv2);
	95	int cffts_seq(complex a, int brt, complex *w, const int n, int logn, int ndv2);
	96	int fft(complex a, int brt, complex *w, int n, int logn, int ndv2);
	97	int scale(complex a, complex v, int n);
	98	int scale_seq(complex a, complex v, const int n);
	99	int chkmat(complex *a, int n);
	100	complex complex_pow(complex z, int n);
	101	int log2_int(int n);
	102	int print_mat(complex *a, int n);
	103	int print_vec(complex *a, int n);
	104	/* -----------------------------------------------------------------
	105	IMPLEMENTATION
	106	* ----------------------------------------------------------------- */
	107	/* -----------------------------------------------------------------
	108	gen_bit_reverse_table - initialize bit reverse table
	109	Postcondition: br(i) = bit-reverse(i-1) + 1
	110	----------------------------------------------------------------- */
	111	int gen_bit_reverse_table(int *brt, int n) {
	112	register int i, j, k, nv2;
	113
	114	nv2 = n / 2;
	115	j = 1;
	116	brt[0] = j;
	117	for (i = 1; i < n; ++i) {
	118	k = nv2;
	119	while (k < j) {
	120	j -= k;
	121	k /= 2;
	122	}
	123	j += k;
	124	brt[i] = j;
	125	}
	126	return 0;
	127	}
	128	/* -----------------------------------------------------------------------
	129	gen_w_table: generate powers of w.
	130	postcondition: w(i) = w**(i-1)
	131	* ----------------------------------------------------------------------- */
	132	int gen_w_table(complex *w, int n, int logn, int ndv2) {
	133	register int i;
	134	complex ww, pt;
	135
	136	ww.re = cos(PI / ndv2);
	137	ww.im = -sin(PI / ndv2);
	138	w[0].re = 1.f;
	139	w[0].im = 0.f;
	140	pt.re = 1.f;
	141	pt.im = 0.f;
	142	for (i = 1; i < ndv2; ++i) {
	143	w[i].re = pt.re * ww.re - pt.im * ww.im;
	144	w[i].im = pt.re * ww.im + pt.im * ww.re;
	145	pt = w[i];
	146	}
	147	return 0;
	148	}
	149	/* -----------------------------------------------------------------------
	150	gen_v_table - gen 2d twiddle factors
	151	* ----------------------------------------------------------------------- */
	152	int gen_v_table(complex *v, int n) {
	153	register int j, k;
	154	complex wn;
	155
	156	wn.re = cos(PI * 2.f / (n * n));
	157	wn.im = -sin(PI * 2.f / (n * n));
	158	for (j = 0; j < n; ++j) {
	159	for (k = 0; k < n; ++k) {
	160	v[j * n + k] = complex_pow(wn, j * k);
	161	}
	162	}
	163	return 0;
	164	}
	165	/* -----------------------------------------------------------------------
	166	z^n = modulo^n (cos(n * alfa) + i sin(n * alfa)
	167	* ----------------------------------------------------------------------- */
	168	complex complex_pow(complex z, int n) {
	169	complex temp;
	170	real_type pot, nangulo, modulo, angulo;
	171
	172	modulo = sqrt(z.re * z.re + z.im * z.im);
	173	angulo = atan(z.im / z.re);
	174	pot = pow(modulo, n);
	175	nangulo = n * angulo;
	176
	177	temp.re = pot * cos(nangulo);
	178	temp.im = pot * sin(nangulo);
	179	return(temp);
	180	}
	181	/* -----------------------------------------------------------------------
	182	A: Para una señal de entrada de tamaño N y de forma
	183	(1,0), (1,0), ..., (1,0)
	184	la salida debe ser de la forma
	185	(N,0), (0,0), ..., (0,0)
	186
	187	B: Para una señal de entrada de tamaño N y de forma
	188	(0,0), (0,0), ..., (N*N, 0), ..., (0,0)
	189	la salida debe ser de la forma (N*N, 0), (0,0), ..., (0,0)
	190
	191	Propiedad útil: C: FFT(s1 + s2) = FFT(s1) + FFT(s2)
	192
	193	* ----------------------------------------------------------------------- */
	194	int dgen(complex *xin, int n) {
	195	register int i;
	196	int nn = n * n;
	197	/* Señal de forma B */
	198	for (i = 0; i < nn; ++i) {
	199	xin[i].re = 0.f;
	200	xin[i].im = 0.f;
	201	}
	202	xin[nn / 2].re = (real_type)nn;
	203
	204	/* Señal de forma A */
	205	/*
	206	for (i = 0; i < nn; ++i) {
	207	xin[i].re = 1.0;
	208	xin[i].im = 0.f;
	209	}
	210	*/
	211	return 0;
	212	}
	213	/* ----------------------------------------------------------------- */
	214	int tpose(complex a, complex b, const int n) {
	215	register int i, j;
	216
	217	#pragma omp parallel for private(i, j)
	218	for (i = 0; i < n; ++i) {
	219	for (j = i; j < n; ++j) {
	220	b[i * n + j] = a[j * n + i];
	221	b[j * n + i] = a[i * n + j];
	222	}
	223	}
	224	return 0;
	225	}
	226
	227	/* ----------------------------------------------------------------- */
	228	int tpose_seq(complex a, complex b, const int n) {
	229	register int i, j;
	230
	231	for (i = 0; i < n; ++i) {
	232	for (j = i; j < n; ++j) {
	233	b[i * n + j] = a[j * n + i];
	234	b[j * n + i] = a[i * n + j];
	235	}
	236	}
	237	return 0;
	238	}
	239	/* ----------------------------------------------------------------- */
	240	int cffts(complex a, int brt, complex *w, const int n, int logn, int ndv2) {
	241	register int i;
	242
	243	#pragma omp parallel for private(i)
	244	for (i = 0; i < n; ++i)
	245	fft(&a[i * n], brt, w, n, logn, ndv2);
	246	/* fft(a + i * n, brt, w, n, logn, ndv2); */
	247	return 0;
	248	}
	249	/* ----------------------------------------------------------------- */
	250	int cffts_seq(complex a, int brt, complex *w, const int n, int logn, int ndv2) {
	251	register int i;
	252
	253	for (i = 0; i < n; ++i)
	254	fft(&a[i * n], brt, w, n, logn, ndv2);
	255	return 0;
	256	}
	257	/* -----------------------------------------------------------------------
	258	Fast Fourier Transform
	259	1D in-place complex-complex decimation-in-time Cooley-Tukey
	260	* ----------------------------------------------------------------------- */
	261	int fft(complex a, int brt, complex *w, int n, int logn, int ndv2) {
	262	register int i, j;
	263	int powerOfW, stage, first, spowerOfW;
	264	int ijDiff;
	265	int stride;
	266	complex ii, jj, temp, pw;
	267
	268	/* bit reverse step */
	269	for (i = 0; i < n; ++i) {
	270	j = brt[i];
	271	if (i < (j-1)) {
	272	temp = a[j - 1];
	273	a[j - 1] = a[i];
	274	a[i] = temp;
	275	}
	276	}
	277
	278	/* butterfly computations */
	279	ijDiff = 1;
	280	stride = 2;
	281	spowerOfW = ndv2;
	282	for (stage = 0; stage < logn; ++stage) {
	283	/* Invariant: stride = 2 ** stage
	284	Invariant: ijDiff = 2 ** (stage - 1) */
	285	first = 0;
	286	for (powerOfW = 0; powerOfW < ndv2; powerOfW += spowerOfW) {
	287	pw = w[powerOfW];
	288	/* Invariant: pwr + sqrt(-1)pwi = W(powerOfW - 1) /
	289	for (i = first; i < n; i += stride) {
	290	j = i + ijDiff;
	291	jj = a[j];
	292	ii = a[i];
	293	temp.re = jj.re * pw.re - jj.im * pw.im;
	294	temp.im = jj.re * pw.im + jj.im * pw.re;
	295	a[j].re = ii.re - temp.re;
	296	a[j].im = ii.im - temp.im;
	297	a[i].re = ii.re + temp.re;
	298	a[i].im = ii.im + temp.im;
	299	}
	300	++first;
	301	}
	302	ijDiff = stride;
	303	stride <<= 1;
	304	spowerOfW /= 2;
	305	}
	306	return 0;
	307	} /* fft */
	308	/* ----------------------------------------------------------------- */
	309	int scale(complex a, complex v, const int n) {
	310	register int i, j, index;
	311	complex aa, vv;
	312
	313	#pragma omp parallel for private(i, j, index, aa, vv)
	314	for (i = 0; i < n; ++i) {
	315	for (j = 0; j < n; ++j) {
	316	index = i * n + j;
	317	aa = a[index];
	318	vv = v[index];
	319	a[index].re = aa.re * vv.re - aa.im * vv.im;
	320	a[index].im = aa.re * vv.im + aa.im * vv.re;
	321	}
	322	}
	323	return 0;
	324	}
	325	/* ----------------------------------------------------------------- */
	326	int scale_seq(complex a, complex v, const int n) {
	327	register int i, j, index;
	328	complex aa, vv;
	329
	330	for (i = 0; i < n; ++i) {
	331	for (j = 0; j < n; ++j) {
	332	index = i * n + j;
	333	aa = a[index];
	334	vv = v[index];
	335	a[index].re = aa.re * vv.re - aa.im * vv.im;
	336	a[index].im = aa.re * vv.im + aa.im * vv.re;
	337	}
	338	}
	339	return 0;
	340	}
	341	/* -----------------------------------------------------------------------
	342	chkmat - check the output matrix for correctness
	343	* ----------------------------------------------------------------------- */
	344	int chkmat(complex *a, int n) {
	345	int sign, i, j, nn, errors;
	346	real_type EPSILON = 1e-4f;
	347
	348	errors = 0;
	349	nn = n * n;
	350	for (i = 0; i < n; ++i) {
	351	sign = 1;
	352	for (j = 0; j < n; ++j) {
	353	if (a[i * n + j].re > nn * sign + EPSILON)
	354	++errors;
	355	if (a[i * n + j].re < nn * sign - EPSILON)
	356	++errors;
	357	sign = -sign;
	358	}
	359	}
	360	if (errors > 0) {
	361	printf("Errors = %d\n", errors);
	362	exit(-1);
	363	}
	364	return 0;
	365	}
	366	/* ----------------------------------------------------------------- */
	367	int prperf(double tpose_time, double scale_time, double ffts_time, int nn, int lognn, int iters) {
	368	double secs;
	369	double fpercent, spercent, tpercent;
	370	double flops, mflops;
	371
	372	tpose_time /= iters;
	373	scale_time /= iters;
	374	ffts_time /= iters;
	375	secs = tpose_time + scale_time + ffts_time;
	376	tpercent = tpose_time / secs * 100;
	377	spercent = scale_time / secs * 100;
	378	fpercent = ffts_time / secs * 100;
	379	flops = (real_type) (nn * 5 * lognn);
	380	mflops = flops / 1e6f / secs;
	381	printf("***********************\n");
	382	printf("1D FFT %d points\n" ,nn);
	383	printf("***********************\n");
	384	printf("Time per input vector:\n");
	385	printf("tpose : %lf %lf percent\n", tpose_time, tpercent);
	386	printf("scale : %lf %lf percent\n", scale_time, spercent);
	387	printf("ffts : %lf %lf percent\n", ffts_time, fpercent);
	388	printf("total(s) : %lf\n", secs);
	389	printf("mflop/s : %lf\n", mflops);
	390	return 0;
	391	} /* prperf */
	392	/* -----------------------------------------------------------------------
	393	Base 2 logarithm
	394	* ----------------------------------------------------------------------- */
	395	int log2_int(int n) {
	396	register int i, aux;
	397
	398	aux = 1;
	399	for (i = 0; i <= 128; i++) {
	400	if (aux > n)
	401	return (i - 1);
	402	aux <<= 1;
	403	}
	404	return -1;
	405	}
	406	/* ----------------------------------------------------------------- */
	407	int print_mat(complex *a, int n) {
	408	register int i, j;
	409
	410	for (i = 0; i < n; ++i) {
	411	for (j = 0; j < n; ++j) {
	412	printf(" (%.0f, %.0f)", a[i * n + j].re, a[i * n + j].im);
	413	}
	414	printf("\n");
	415	}
	416	printf("\n");
	417	return 0;
	418	}
	419	/* ----------------------------------------------------------------- */
	420	int print_vec(complex *a, int n) {
	421	register int i;
	422
	423	for (i = 0; i < n*n; ++i)
	424	printf(" (%.0f, %.0f)", a[i].re, a[i].im);
	425	printf("\n\n");
	426	return 0;
	427	}
	428	/* ----------------------------------------------------------------- */
	429	int main(int argc, char *argv[]) {
	430	complex *xin; / input vector */
	431	complex *aux; / intermediate array (same size) */
	432	/* precomputed vectors */
	433	int brt; / bit reverse table for 1d fft's */
	434	complex w; / twiddles for 1d fft's */
	435	complex v; / twiddles for 2d fft */
	436	double total_time; /* timing */
	437	int k;
	438	int NUMTHREADS, MAX_THREADS;
	439	#define ID omp_get_thread_num()
	440	float MEMORY; /* number of bytes required */
	441	// char *PARAM_NAMES[NUM_ARGS] = {"Size (a power of 2).", "No. of iterations"};
	442	// char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time", "Tpose time", "Scale time", "Column FFTs time" };
	443	// char *DEFAULT_VALUES[NUM_ARGS] = {"64", "10"};
	444
	445
[ee9e9c3]	446	NUMTHREADS = 1; //omp_get_num_threads();
[feadd65]	447	//OSCR_init (NUMTHREADS, "Bailey's '6-step' Fast Fourier Transform", "Use 'fft6' <size (in K)> <iters>", NUM_ARGS,
	448	// PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES,
	449	// argc, argv);
	450
	451
	452	/* Command line arguments processing */
	453	N = NINIT; // OSCR_getarg_int(1);
	454	ITERS = ITERSINIT; // OSCR_getarg_int(2);
	455	/*
	456	if(argc == 3) {
	457	N = atoi(argv[1]);
	458	ITERS = atoi(argv[2]);
	459	}
	460	else {
	461	printf("Usage: %s N ITERS\n", argv[0]);
	462	printf("N is the size of the input signal. Should be a power of two.\n");
	463	N = 64;
	464	ITERS = 10;
	465	}
	466	*/
	467
[ee9e9c3]	468	MAX_THREADS = 1; //omp_get_num_threads();
[feadd65]	469	NN = N * N;
	470	LOGN = log2_int(N);
	471	LOGNN = log2_int(NN);
	472	MEMORY = N * sizeof(int) +
	473	2 * (NN) * sizeof(complex) * MAX_THREADS + (NN) * sizeof(complex) +
	474	(N / 2) * sizeof(complex);
	475
	476	/* Memory allocation */
	477	xin = calloc(MAX_THREADS, sizeof(*xin));
	478	aux = calloc(MAX_THREADS, sizeof(*aux));
	479	brt = (int *)calloc(N, sizeof(int));
	480	v = (complex )calloc(N N, sizeof(complex)); /* twiddles for 2d fft */
	481	w = (complex )calloc((N / 2), sizeof(complex)); / twiddles for 1d fft's */
	482
	483	printf("Input values:\n");
	484	printf("=============\n");
	485	printf("N : %u\n", N);
	486	printf("ITERS : %u\n", ITERS);
	487	printf("NN : %u\n", NN);
	488	printf("LOGN : %u\n", LOGN);
	489	printf("LOGNN : %u\n", LOGNN);
	490	printf("Memory : %.2f Kbytes\n", MEMORY / 1024);
	491
	492	/* precompute the input array and fft constants */
	493	gen_bit_reverse_table(brt, N);
	494	gen_w_table(w, N, LOGN, N / 2);
	495	gen_v_table(v, N);
	496
	497	for (k = 0; k < MAX_THREADS; k++) {
	498	xin[k] = (complex )calloc(N N, sizeof(complex));
	499	aux[k] = (complex )calloc(N N, sizeof(complex));
	500	}
	501	#define TOTAL 0
	502
	503	//OSCR_timer_start(TOTAL);
	504	#pragma omp parallel for private(k)
	505	for(k = 0; k < ITERS; k++) {
	506	dgen(xin[ID], N);
	507
	508	tpose(xin[ID], aux[ID], N);
	509
	510	cffts(aux[ID], brt, w, N, LOGN, N / 2);
	511
	512	scale(aux[ID], v, N);
	513
	514	tpose(aux[ID], xin[ID], N);
	515
	516	cffts(xin[ID], brt, w, N, LOGN, N / 2);
	517
	518	tpose(xin[ID], aux[ID], N);
	519
	520	chkmat(aux[ID], N);
	521	}
	522	//OSCR_timer_stop(TOTAL);
	523	total_time = 1; //OSCR_timer_read(TOTAL);
	524
	525	/* Display results and time */
	526	//OSCR_report(1, TIMERS_NAMES);
	527
	528	printf("============================");
	529	printf("\n# THREADS : %d\n", NUMTHREADS);
	530	printf("N : %d\n", N);
	531	printf("ITERS : %d\n", ITERS);
	532	printf("total TIME: %.6lf secs.\n", total_time);
	533
	534	#undef ID
	535	#undef MAX_THREADS
	536	return 0;
	537	}
	538	/* ----------------------------------------------------------------- */
	539
	540	/*
	541	* vim:ts=2:sw=2:set nonu:
	542	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/omp/c_fft6.c@ 1aaefd4

Download in other formats: