| 1 | /* ***********************************************************************
|
|---|
| 2 | This program is part of the
|
|---|
| 3 | OpenMP Source Code Repository
|
|---|
| 4 |
|
|---|
| 5 | http://www.pcg.ull.es/ompscr/
|
|---|
| 6 | e-mail: ompscr@etsii.ull.es
|
|---|
| 7 |
|
|---|
| 8 | This program is free software; you can redistribute it and/or modify
|
|---|
| 9 | it under the terms of the GNU General Public License as published by
|
|---|
| 10 | the Free Software Foundation; either version 2 of the License, or
|
|---|
| 11 | (at your option) any later version.
|
|---|
| 12 |
|
|---|
| 13 | This program is distributed in the hope that it will be useful,
|
|---|
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 16 | GNU General Public License for more details.
|
|---|
| 17 |
|
|---|
| 18 | You should have received a copy of the GNU General Public License
|
|---|
| 19 | (LICENSE file) along with this program; if not, write to
|
|---|
| 20 | the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
|---|
| 21 | Boston, MA 02111-1307 USA
|
|---|
| 22 |
|
|---|
| 23 | FILE: c_fft.c
|
|---|
| 24 | VERSION: 1.0
|
|---|
| 25 | DATE: May 2004
|
|---|
| 26 | AUTHOR: F. de Sande
|
|---|
| 27 | COMMENTS TO: sande@csi.ull.es
|
|---|
| 28 | DESCRIPTION: This program computes the Fast Fourier Transform
|
|---|
| 29 | on an input signal
|
|---|
| 30 | COMMENTS: The algorithm uses a divide and conquer strategy
|
|---|
| 31 | and the transform is computed as a combination of the
|
|---|
| 32 | transforms of the even and odd terms of the original signal.
|
|---|
| 33 | The code requires nested Parallelism.
|
|---|
| 34 | Function write_array() is provided only for debuging purposes.
|
|---|
| 35 | (use a small size signal if you want to write it).
|
|---|
| 36 | REFERENCES: James W. Cooley and John W. Tukey,
|
|---|
| 37 | An Algorithm for the Machine Calculation of Complex Fourier Series,
|
|---|
| 38 | Mathematics of Computation, 1965, vol. 19, no. 90, pg 297-301
|
|---|
| 39 | http://en.wikipedia.org/wiki/Cooley-Tukey_FFT_algorithm
|
|---|
| 40 | BASIC PRAGMAS: parallel for
|
|---|
| 41 | USAGE: ./c_fft.par 8192
|
|---|
| 42 | INPUT: The size of the input signal
|
|---|
| 43 | OUTPUT: The code tests the correctness of the result for the input
|
|---|
| 44 | FILE FORMATS: -
|
|---|
| 45 | RESTRICTIONS: The size of the input signal MUST be a power of 2
|
|---|
| 46 | REVISION HISTORY:
|
|---|
| 47 | **************************************************************************/
|
|---|
| 48 | //#include "OmpSCR.h"
|
|---|
| 49 | #include <omp.h>
|
|---|
| 50 | #include <math.h>
|
|---|
| 51 | #include <stdio.h>
|
|---|
| 52 |
|
|---|
| 53 | #define KILO (1024)
|
|---|
| 54 | #define DEFAULT_SIZE_IN_KB (64)
|
|---|
| 55 | #define NUM_ARGS 1
|
|---|
| 56 | #define NUM_TIMERS 1
|
|---|
| 57 | #define ARG 1
|
|---|
| 58 |
|
|---|
| 59 | typedef double doubleType;
|
|---|
| 60 | typedef struct {
|
|---|
| 61 | doubleType re;
|
|---|
| 62 | doubleType im;
|
|---|
| 63 | } Complex;
|
|---|
| 64 |
|
|---|
| 65 | /* -----------------------------------------------------------------------
|
|---|
| 66 | PROTOTYPES
|
|---|
| 67 | * ----------------------------------------------------------------------- */
|
|---|
| 68 | void initialize(unsigned Size, Complex *a);
|
|---|
| 69 | void write_array(unsigned Size, Complex *a);
|
|---|
| 70 | int test_array(unsigned Size, Complex *a);
|
|---|
| 71 | void FFT(Complex *A, Complex *a, Complex *W, unsigned N, unsigned stride, Complex *D);
|
|---|
| 72 | void Roots(unsigned Size, Complex *W);
|
|---|
| 73 | unsigned get_params(int argc, char *argv[]);
|
|---|
| 74 |
|
|---|
| 75 | /* -----------------------------------------------------------------------
|
|---|
| 76 | IMPLEMENTATION
|
|---|
| 77 | * ----------------------------------------------------------------------- */
|
|---|
| 78 | /* -----------------------------------------------------------------------
|
|---|
| 79 | Routine: initialize
|
|---|
| 80 | Description: Initialise a vector of complex numbers
|
|---|
| 81 | Comment: all numbers have real part 1.0 and imaginary part 0.0
|
|---|
| 82 | * ----------------------------------------------------------------------- */
|
|---|
| 83 | void initialize(unsigned Size, Complex *a) {
|
|---|
| 84 | unsigned i;
|
|---|
| 85 |
|
|---|
| 86 | for(i = 0; i < Size; i++) {
|
|---|
| 87 | a[i].re = 1.0;
|
|---|
| 88 | a[i].im = 0.0;
|
|---|
| 89 | }
|
|---|
| 90 | }
|
|---|
| 91 | /* -----------------------------------------------------------------------
|
|---|
| 92 | Routine: write_array
|
|---|
| 93 | Description: Display a vector of complex numbers
|
|---|
| 94 | * ----------------------------------------------------------------------- */
|
|---|
| 95 | void write_array(unsigned Size, Complex *a) {
|
|---|
| 96 | unsigned i;
|
|---|
| 97 |
|
|---|
| 98 | for(i = 0; i < Size; i++)
|
|---|
| 99 | printf("a[%2u] = [%.8lf,%.8lf]\n", i, a[i].re, a[i].im);
|
|---|
| 100 | }
|
|---|
| 101 | /* -----------------------------------------------------------------------
|
|---|
| 102 | Routine: test_array
|
|---|
| 103 | Description: Test is true if the complex vector is of the form
|
|---|
| 104 | [(Size,0),(0,0),...,(0,0)]
|
|---|
| 105 | * ----------------------------------------------------------------------- */
|
|---|
| 106 | int test_array(unsigned Size, Complex *a) {
|
|---|
| 107 | register unsigned i;
|
|---|
| 108 | unsigned OK = 1;
|
|---|
| 109 |
|
|---|
| 110 | if((a[0].re == Size) && (a[0].im == 0)) {
|
|---|
| 111 | for(i = 1; i < Size; i++)
|
|---|
| 112 | if (a[i].re != 0.0 || a[i].im != 0.0) {
|
|---|
| 113 | OK = 0;
|
|---|
| 114 | break;
|
|---|
| 115 | }
|
|---|
| 116 | }
|
|---|
| 117 | else OK = 0;
|
|---|
| 118 | return OK;
|
|---|
| 119 | }
|
|---|
| 120 | /* -----------------------------------------------------------------------
|
|---|
| 121 | Procedure: Roots
|
|---|
| 122 | Description: Computes roots of the Unary
|
|---|
| 123 | Parameters:
|
|---|
| 124 | unsigned Size, number of roots to compute
|
|---|
| 125 | Complex *W, vector containing the roots
|
|---|
| 126 | * ----------------------------------------------------------------------- */
|
|---|
| 127 | void Roots(unsigned Size, Complex *W) {
|
|---|
| 128 | register unsigned i;
|
|---|
| 129 | double phi;
|
|---|
| 130 | Complex Omega;
|
|---|
| 131 |
|
|---|
| 132 | phi = 4 * atan(1.0) / (double)Size; /* PI/Size */
|
|---|
| 133 | Omega.re = cos(phi);
|
|---|
| 134 | Omega.im = sin(phi);
|
|---|
| 135 | W[0].re = 1.0;
|
|---|
| 136 | W[0].im = 0.0;
|
|---|
| 137 | for(i = 1; i < Size; i++) {
|
|---|
| 138 | W[i].re = W[i-1].re * Omega.re - W[i-1].im * Omega.im;
|
|---|
| 139 | W[i].im = W[i-1].re * Omega.im + W[i-1].im * Omega.re;
|
|---|
| 140 | }
|
|---|
| 141 | }
|
|---|
| 142 | /* -----------------------------------------------------------------------
|
|---|
| 143 | Procedure: FFT
|
|---|
| 144 | Description: Recursive (divide and conquer) Fast Fourier Transform
|
|---|
| 145 | Parameters:
|
|---|
| 146 | Complex *A, transformed output signal
|
|---|
| 147 | Complex *a, input signal
|
|---|
| 148 | Complex *W, vector containing the roots
|
|---|
| 149 | unsigned N, number of elements in a
|
|---|
| 150 | unsigned stride, between consecutive elements in a to be considered
|
|---|
| 151 | Complex *D, auxiliar vector to do combination
|
|---|
| 152 | * ----------------------------------------------------------------------- */
|
|---|
| 153 | void FFT(Complex *A, Complex *a, Complex *W, unsigned N,
|
|---|
| 154 | unsigned stride, Complex *D) {
|
|---|
| 155 | Complex *B, *C;
|
|---|
| 156 | Complex Aux, *pW;
|
|---|
| 157 | unsigned n;
|
|---|
| 158 | int i;
|
|---|
| 159 |
|
|---|
| 160 | if (N == 1) {
|
|---|
| 161 | A[0].re = a[0].re;
|
|---|
| 162 | A[0].im = a[0].im;
|
|---|
| 163 | }
|
|---|
| 164 | else {
|
|---|
| 165 | /* Division stage without copying input data */
|
|---|
| 166 | n = (N >> 1); /* N = N div 2 */
|
|---|
| 167 |
|
|---|
| 168 | /* Subproblems resolution stage */
|
|---|
| 169 | #pragma omp parallel for
|
|---|
| 170 | for(i = 0; i <= 1; i++) {
|
|---|
| 171 | FFT(D + i * n, a + i * stride, W, n, stride << 1, A + i * n);
|
|---|
| 172 | }
|
|---|
| 173 | /* Combination stage */
|
|---|
| 174 | B = D;
|
|---|
| 175 | C = D + n;
|
|---|
| 176 | #pragma omp parallel for default(none) private(i, Aux, pW) shared(stride, n, A, B, C, W)
|
|---|
| 177 | for(i = 0; i <= n - 1; i++) {
|
|---|
| 178 | pW = W + i * stride;
|
|---|
| 179 | Aux.re = pW->re * C[i].re - pW->im * C[i].im;
|
|---|
| 180 | Aux.im = pW->re * C[i].im + pW->im * C[i].re;
|
|---|
| 181 |
|
|---|
| 182 | A[i].re = B[i].re + Aux.re;
|
|---|
| 183 | A[i].im = B[i].im + Aux.im;
|
|---|
| 184 | A[i+n].re = B[i].re - Aux.re;
|
|---|
| 185 | A[i+n].im = B[i].im - Aux.im;
|
|---|
| 186 | }
|
|---|
| 187 | }
|
|---|
| 188 | }
|
|---|
| 189 | /* ----------------------------------------------------------------------- */
|
|---|
| 190 | unsigned get_params(int argc, char *argv[]) {
|
|---|
| 191 | char usage_str[] = "<size_in_Kb>";
|
|---|
| 192 | unsigned sizeInKb;
|
|---|
| 193 |
|
|---|
| 194 | if (argc == 2)
|
|---|
| 195 | sizeInKb = atoi(argv[1]);
|
|---|
| 196 | else
|
|---|
| 197 | if (argc == 1)
|
|---|
| 198 | sizeInKb = DEFAULT_SIZE_IN_KB;
|
|---|
| 199 | else {
|
|---|
| 200 | printf("\nUse: %s %s\n", argv[0], usage_str);
|
|---|
| 201 | exit(-1);
|
|---|
| 202 | }
|
|---|
| 203 | printf("\nUse: %s %s\n", argv[0], usage_str);
|
|---|
| 204 | printf("Running with Size: %d K\n", sizeInKb);
|
|---|
| 205 | return sizeInKb;
|
|---|
| 206 | }
|
|---|
| 207 | /* ----------------------------------------------------------------------- */
|
|---|
| 208 | int main(int argc, char *argv[]) {
|
|---|
| 209 | unsigned N;
|
|---|
| 210 | Complex *a, *A, *W, *D;
|
|---|
| 211 | int NUMTHREADS;
|
|---|
| 212 | char *PARAM_NAMES[NUM_ARGS] = {"Size of the input signal (in Kb)"};
|
|---|
| 213 | char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time" };
|
|---|
| 214 | char *DEFAULT_VALUES[NUM_ARGS] = {"64"};
|
|---|
| 215 |
|
|---|
| 216 |
|
|---|
| 217 | NUMTHREADS = 1; //omp_get_num_threads();
|
|---|
| 218 | //OSCR_init (NUMTHREADS, "Divide and Conquer Fast Fourier Transform.", "Use 'fft' <size (in K)>", NUM_ARGS,
|
|---|
| 219 | // PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES,
|
|---|
| 220 | // argc, argv);
|
|---|
| 221 |
|
|---|
| 222 | N = KILO * ARG; // OSCR_getarg_int(1);
|
|---|
| 223 |
|
|---|
| 224 | /* N = KILO * get_params(argc, argv); */
|
|---|
| 225 |
|
|---|
| 226 | /* Memory allocation */
|
|---|
| 227 | a = (Complex*)calloc(N, sizeof(Complex));
|
|---|
| 228 | A = (Complex*)calloc(N, sizeof(Complex));
|
|---|
| 229 | D = (Complex*)calloc(N, sizeof(Complex));
|
|---|
| 230 | W = (Complex*)calloc(N>>1, sizeof(Complex));
|
|---|
| 231 | if((a==NULL) || (A==NULL) || (D==NULL) || (W==NULL)) {
|
|---|
| 232 | printf("Not enough memory initializing arrays\n");
|
|---|
| 233 | exit(1);
|
|---|
| 234 | }
|
|---|
| 235 | initialize(N, a); /* Generate test input signal */
|
|---|
| 236 | /* write_array(N, a); */
|
|---|
| 237 | Roots(N >> 1, W); /* Initialise the vector of imaginary roots */
|
|---|
| 238 | //OSCR_timer_start(0);
|
|---|
| 239 | FFT(A, a, W, N, 1, D);
|
|---|
| 240 | //OSCR_timer_stop(0);
|
|---|
| 241 | /* write_array(N, A); */
|
|---|
| 242 |
|
|---|
| 243 | /* Display results and time */
|
|---|
| 244 | printf("Test array: ");
|
|---|
| 245 | if (test_array(N, A))
|
|---|
| 246 | printf("Ok\n");
|
|---|
| 247 | else
|
|---|
| 248 | printf("Fails\n");
|
|---|
| 249 | //OSCR_report(1, TIMERS_NAMES);
|
|---|
| 250 | free(W);
|
|---|
| 251 | free(D);
|
|---|
| 252 | free(A);
|
|---|
| 253 | free(a);
|
|---|
| 254 |
|
|---|
| 255 | return 0;
|
|---|
| 256 | }
|
|---|
| 257 |
|
|---|
| 258 | /*
|
|---|
| 259 | * vim:ts=2:sw=2:
|
|---|
| 260 | */
|
|---|
| 261 |
|
|---|