source: CIVL/examples/omp/shtns/time_SHT.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 23.9 KB
RevLine 
[2131108]1/*
2 * Copyright (c) 2010-2015 Centre National de la Recherche Scientifique.
3 * written by Nathanael Schaeffer (CNRS, ISTerre, Grenoble, France).
4 *
5 * nathanael.schaeffer@ujf-grenoble.fr
6 *
7 * This software is governed by the CeCILL license under French law and
8 * abiding by the rules of distribution of free software. You can use,
9 * modify and/or redistribute the software under the terms of the CeCILL
10 * license as circulated by CEA, CNRS and INRIA at the following URL
11 * "http://www.cecill.info".
12 *
13 * The fact that you are presently reading this means that you have had
14 * knowledge of the CeCILL license and that you accept its terms.
15 *
16 */
17
18/// \file time_SHT.c This program performs some spherical harmonic transforms, and displays timings and accuracy.
19/// \c make \c time_SHT to compile, and then run.
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <complex.h>
25#include <math.h>
26#include <time.h>
27#include "fftw3/fftw3.h"
28
29// cycle counter from FFTW
30#include "fftw3/cycle.h"
31
32#include <shtns.h>
33
34shtns_cfg shtns;
35
36complex double *Slm, *Slm0, *Tlm, *Tlm0, *Qlm; // spherical harmonics l,m space
37complex double *ShF, *ThF, *NLF; // Fourier space : theta,m
38double *Sh, *Th, *NL; // real space : theta,phi (alias of ShF)
39
40int LMAX,MMAX,MRES,NLM;
41int NLAT = 0;
42int NPHI = 0;
43
44// number of SH iterations
45int SHT_ITER = 50; // do 50 iterations by default
46
47
48#ifdef __MACH__ // Mac OSX : clock_gettime is not implemented
49#include <sys/time.h>
50#ifndef CLOCK_MONOTONIC
51 #define CLOCK_MONOTONIC 0
52#endif
53#ifndef CLOCK_REALTIME
54 #define CLOCK_REALTIME 0
55#endif
56int clock_gettime(int ignored, struct timespec* t) {
57 struct timeval now;
58 int rv = gettimeofday(&now, NULL);
59 if (rv) return rv;
60 t->tv_sec = now.tv_sec;
61 t->tv_nsec = now.tv_usec * 1000;
62 return 0;
63}
64#endif
65
66
67void runerr(const char * error_text)
68{
69 printf("%s\n",error_text);
70 exit(1);
71}
72
73void write_vect(char *fn, double *vec, int N)
74{
75 FILE *fp;
76 int i;
77
78 fp = fopen(fn,"w");
79 for (i=0;i<N;i++) {
80 fprintf(fp,"%.6g ",vec[i]);
81 }
82 fclose(fp);
83}
84
85void write_mx(char *fn, double *mx, int N1, int N2)
86{
87 FILE *fp;
88 int i,j;
89
90 fp = fopen(fn,"w");
91 for (i=0;i<N1;i++) {
92 for(j=0;j<N2;j++) {
93 fprintf(fp,"%.6g ",mx[i*N2+j]);
94 }
95 fprintf(fp,"\n");
96 }
97 fclose(fp);
98}
99
100double tdiff(struct timespec *start, struct timespec *end)
101{
102 double ns = 1.e9 * ((long) end->tv_sec - (long) start->tv_sec);
103 ns += ((long) end->tv_nsec - (long) start->tv_nsec);
104 return ns * (1.e-6/SHT_ITER); // time in ms.
105}
106
107// isNaN testing that works with -ffast-math
108double isNaN_0=0, isNaN_1=1;
109inline int isNaN(double x) {
110 return ((x == isNaN_0)&&(x == isNaN_1));
111}
112
113double scal_error(complex double *Slm, complex double *Slm0, int ltr)
114{
115 long int jj,i, nlm_cplx;
116 double tmax,t,n2;
117
118 nlm_cplx = (MMAX*2 == NPHI) ? LiM(shtns, MRES*MMAX,MMAX) : NLM;
119// compute error :
120 tmax = 0; n2 = 0; jj=0;
121 for (i=0;i<NLM;i++) {
122 //if ((isNaN(creal(Slm[i]))) || (isNaN(cimag(Slm[i])))) printf("NaN @ lm=%ld (l=%d)\n",i,shtns->li[i]);
123 if ((i <= LMAX)||(i >= nlm_cplx)) { // m=0, and 2*m=nphi is real
124 if (shtns->li[i] <= ltr) Slm[i] = creal(Slm[i]-Slm0[i]);
125 t = fabs(creal(Slm[i]));
126 } else {
127 if (shtns->li[i] <= ltr) Slm[i] -= Slm0[i];
128 t = cabs(Slm[i]);
129 }
130 n2 += t*t;
131 if (t>tmax) { tmax = t; jj = i; }
132 }
133 printf(" => max error = %g (l=%d,lm=%ld) rms error = %g",tmax,shtns->li[jj],jj,sqrt(n2/NLM));
134 if (tmax > 1e-3) {
135 if (NLM < 15) {
136 printf("\n orig:");
137 for (i=0; i<NLM;i++)
138 if ((i <= LMAX)||(i >= nlm_cplx)) { // m=0, and 2*m=nphi is real
139 printf(" %g",creal(Slm0[i]));
140 } else {
141 printf(" %g,%g",creal(Slm0[i]),cimag(Slm0[i]));
142 }
143 printf("\n diff:");
144 for (i=0; i<NLM;i++)
145 if ((i <= LMAX)||(i >= nlm_cplx)) { // m=0, and 2*m=nphi is real
146 printf(" %g",creal(Slm[i]));
147 } else {
148 printf(" %g,%g",creal(Slm[i]),cimag(Slm[i]));
149 }
150 }
151 printf(" **** ERROR ****\n");
152 }
153 else printf("\n");
154 return(tmax);
155}
156
157double vect_error(complex double *Slm, complex double *Tlm, complex double *Slm0, complex double *Tlm0, int ltr)
158{
159 long int jj,i;
160 double tmax0, tmax,t,n2;
161
162// compute error :
163 tmax = 0; n2 = 0; jj=0;
164 for (i=0;i<NLM;i++) {
165 if ((i <= LMAX)||(i >= LiM(shtns, MRES*(NPHI+1)/2,(NPHI+1)/2))) {
166 if (shtns->li[i] <= ltr) Slm[i] = creal(Slm[i]-Slm0[i]);
167 t = fabs(creal(Slm[i]));
168 } else {
169 if (shtns->li[i] <= ltr) Slm[i] -= Slm0[i];
170 t = cabs(Slm[i]);
171 }
172 n2 += t*t;
173 if (t>tmax) { tmax = t; jj = i; }
174 }
175 printf(" Spheroidal => max error = %g (l=%d,lm=%ld) rms error = %g",tmax,shtns->li[jj],jj,sqrt(n2/NLM));
176 if (tmax > 1e-3) { printf(" **** ERROR ****\n"); }
177 else printf("\n");
178// write_vect("Slm",Slm,NLM*2);
179 tmax0 = tmax;
180
181// compute error :
182 tmax = 0; n2 = 0; jj=0;
183 for (i=0;i<NLM;i++) {
184 if ((i <= LMAX)||(i >= LiM(shtns, MRES*(NPHI+1)/2,(NPHI+1)/2))) {
185 if (shtns->li[i] <= ltr) Tlm[i] = creal(Tlm[i]- Tlm0[i]);
186 t = fabs(creal(Tlm[i]));
187 } else {
188 if (shtns->li[i] <= ltr) Tlm[i] -= Tlm0[i];
189 t = cabs(Tlm[i]);
190 }
191 n2 += t*t;
192 if (t>tmax) { tmax = t; jj = i; }
193 }
194 printf(" Toroidal => max error = %g (l=%d,lm=%ld) rms error = %g",tmax,shtns->li[jj],jj,sqrt(n2/NLM));
195 if (tmax > 1e-3) { printf(" **** ERROR ****\n"); }
196 else printf("\n");
197// write_vect("Tlm",Tlm,NLM*2);
198 return(tmax > tmax0 ? tmax : tmax0);
199}
200
201void test_SH_point()
202{
203 long int jj,i;
204 double ts2, ta2;
205 struct timespec t1, t2;
206
207 for (i=0;i<NLM;i++) Slm[i] = Slm0[i]; // restore test case...
208
209 clock_gettime(CLOCK_MONOTONIC, &t1);
210 for (jj=0; jj< SHT_ITER; jj++) {
211 ta2 = SH_to_point(shtns, Slm, 0.8, 0.76);
212 }
213 clock_gettime(CLOCK_MONOTONIC, &t2);
214 ts2 = tdiff(&t1, &t2);
215
216 clock_gettime(CLOCK_MONOTONIC, &t1);
217 for (jj=1; jj< SHT_ITER; jj++) {
218 double vr, vt, vp;
219 SHqst_to_point(shtns, Slm, Slm0, Tlm0, 0.8, 0.76, &vr, &vt, &vp);
220 }
221 clock_gettime(CLOCK_MONOTONIC, &t2);
222 ta2 = tdiff(&t1, &t2);
223
224 printf(" SHT_to_point time = %f ms [scalar], %f ms [3D vector]\n", ts2, ta2);
225 return;
226}
227
228
229void test_SHT()
230{
231 long int jj,i;
232 clock_t tcpu;
233 double ts, ta, ts2, ta2;
234 struct timespec t1, t2;
235 double gflop = 1e-6 * (NLAT*(NLM*4 +(MMAX+1)*2 + MMAX*log2(MMAX+1) + 5*NPHI*log2(NPHI))); // Million floating point ops
236
237 for (i=0;i<NLM;i++) Slm[i] = Slm0[i]; // restore test case...
238
239 tcpu = clock();
240 clock_gettime(CLOCK_MONOTONIC, &t1);
241 for (jj=0; jj< SHT_ITER; jj++) {
242 SH_to_spat(shtns, Slm,Sh);
243 }
244 clock_gettime(CLOCK_MONOTONIC, &t2);
245 tcpu = clock() - tcpu;
246 ts = tcpu / (1000.*SHT_ITER);
247 ts2 = tdiff(&t1, &t2);
248
249 clock_gettime(CLOCK_MONOTONIC, &t1);
250 tcpu = clock();
251 spat_to_SH(shtns, Sh,Slm);
252 for (jj=1; jj< SHT_ITER; jj++) {
253 spat_to_SH(shtns, Sh,Tlm);
254 }
255 tcpu = clock() - tcpu;
256 clock_gettime(CLOCK_MONOTONIC, &t2);
257 ta = tcpu / (1000.*SHT_ITER);
258 ta2 = tdiff(&t1, &t2);
259 #ifdef _OPENMP
260 printf(" SHT time (lmax=%d): \t synthesis = %.5f ms [cpu %.3f] [%.3f Gflops] \t analysis = %.5f ms [cpu %.3f] [%.3f Gflops] \n", LMAX, ts2, ts, gflop/ts2, ta2, ta, gflop/ta2);
261 #else
262 printf(" SHT time (lmax=%d): \t synthesis = %f ms [%f Gflops] \t analysis = %f ms [%f Gflops] \n", LMAX, ts2, gflop/ts2, ta2, gflop/ta2);
263 #endif
264 scal_error(Slm, Slm0, LMAX);
265 return;
266}
267
268void test_SHT_m0()
269{
270 long int jj,i;
271 double ts, ta;
272 struct timespec t1, t2;
273
274 for (i=0;i<NLM;i++) Slm[i] = Slm0[i]; // restore test case...
275
276 clock_gettime(CLOCK_MONOTONIC, &t1);
277 for (jj=0; jj< SHT_ITER; jj++) {
278 SHsph_to_spat(shtns, Slm,Sh,NULL);
279 }
280 clock_gettime(CLOCK_MONOTONIC, &t2);
281 ts = tdiff(&t1, &t2);
282
283 clock_gettime(CLOCK_MONOTONIC, &t1);
284 SHtor_to_spat(shtns, Slm, NULL, Sh);
285 for (jj=1; jj< SHT_ITER; jj++) {
286 SHtor_to_spat(shtns, Slm, NULL, Sh);
287 }
288 clock_gettime(CLOCK_MONOTONIC, &t2);
289 ta = tdiff(&t1, &t2);
290 printf(" SHT time : \t spheroidal = %f ms \t torodial = %f ms\n", ts, ta);
291
292 return;
293}
294
295void test_SHT_l(int ltr)
296{
297 int jj,i;
298 double ts, ta;
299 struct timespec t1, t2;
300
301 for (i=0;i<NLM;i++) Slm[i] = Slm0[i]; // restore test case...
302
303
304 clock_gettime(CLOCK_MONOTONIC, &t1);
305 for (jj=0; jj< SHT_ITER; jj++) {
306 SH_to_spat_l(shtns, Slm,Sh,ltr);
307 }
308 clock_gettime(CLOCK_MONOTONIC, &t2);
309 ts = tdiff(&t1, &t2);
310
311 clock_gettime(CLOCK_MONOTONIC, &t1);
312 spat_to_SH_l(shtns, Sh,Slm,ltr);
313 for (jj=1; jj< SHT_ITER; jj++) {
314 spat_to_SH_l(shtns, Sh,Tlm,ltr);
315 }
316 clock_gettime(CLOCK_MONOTONIC, &t2);
317 ta = tdiff(&t1, &t2);
318 printf(" SHT time truncated at l=%d : synthesis = %f ms, analysis = %f ms\n", ltr, ts, ta);
319
320 scal_error(Slm, Slm0, ltr);
321 return;
322}
323
324void test_SHT_vect_l(int ltr)
325{
326 int jj,i;
327 double ts, ta;
328 struct timespec t1, t2;
329
330 complex double *S2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
331 complex double *T2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
332
333 for (i=0;i<NLM;i++) {
334 Slm[i] = Slm0[i]; Tlm[i] = Tlm0[i];
335 }
336 clock_gettime(CLOCK_MONOTONIC, &t1);
337 for (jj=0; jj< SHT_ITER; jj++) {
338 SHsphtor_to_spat_l(shtns, Slm,Tlm,Sh,Th,ltr);
339 }
340 clock_gettime(CLOCK_MONOTONIC, &t2);
341 ts = tdiff(&t1, &t2);
342
343 clock_gettime(CLOCK_MONOTONIC, &t1);
344 spat_to_SHsphtor_l(shtns, Sh,Th,Slm,Tlm, ltr);
345 for (jj=1; jj< SHT_ITER; jj++) {
346 spat_to_SHsphtor_l(shtns, Sh,Th,S2,T2, ltr);
347 }
348 clock_gettime(CLOCK_MONOTONIC, &t2);
349 ta = tdiff(&t1, &t2);
350 printf(" vector SHT time trucated at l=%d : \t synthesis %f ms \t analysis %f ms\n", ltr, ts, ta);
351
352 fftw_free(T2); fftw_free(S2);
353 vect_error(Slm, Tlm, Slm0, Tlm0, ltr);
354 return;
355}
356
357void test_SHT_vect()
358{
359 int jj,i;
360 double ts, ta;
361 struct timespec t1, t2;
362
363 complex double *S2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
364 complex double *T2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
365
366 for (i=0;i<NLM;i++) {
367 Slm[i] = Slm0[i]; Tlm[i] = Tlm0[i];
368 }
369 clock_gettime(CLOCK_MONOTONIC, &t1);
370 for (jj=0; jj< SHT_ITER; jj++) {
371 SHsphtor_to_spat(shtns, Slm,Tlm,Sh,Th);
372 }
373 clock_gettime(CLOCK_MONOTONIC, &t2);
374 ts = tdiff(&t1, &t2);
375
376 clock_gettime(CLOCK_MONOTONIC, &t1);
377 spat_to_SHsphtor(shtns, Sh,Th,Slm,Tlm);
378 for (jj=1; jj< SHT_ITER; jj++) {
379 spat_to_SHsphtor(shtns, Sh,Th,S2,T2);
380 }
381 clock_gettime(CLOCK_MONOTONIC, &t2);
382 ta = tdiff(&t1, &t2);
383 printf(" vector SHT time (lmax=%d) : \t synthesis %f ms \t analysis %f ms\n", LMAX, ts, ta);
384
385 fftw_free(T2); fftw_free(S2);
386 vect_error(Slm, Tlm, Slm0, Tlm0, LMAX);
387 return;
388}
389
390void test_SHT_vect3d_l(int ltr)
391{
392 int jj,i;
393 double ts, ta;
394 struct timespec t1, t2;
395
396 complex double *Q2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
397 complex double *S2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
398 complex double *T2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
399
400 for (i=0;i<NLM;i++) {
401 Slm[i] = Slm0[i]; Tlm[i] = Tlm0[i]; Qlm[i] = Tlm0[i];
402 }
403
404 clock_gettime(CLOCK_MONOTONIC, &t1);
405 for (jj=0; jj< SHT_ITER; jj++) {
406 SHqst_to_spat_l(shtns, Qlm,Slm,Tlm,NL,Sh,Th, ltr);
407 }
408 clock_gettime(CLOCK_MONOTONIC, &t2);
409 ts = tdiff(&t1, &t2);
410
411 clock_gettime(CLOCK_MONOTONIC, &t1);
412 spat_to_SHqst_l(shtns, NL,Sh,Th,Qlm,Slm,Tlm, ltr);
413 for (jj=1; jj< SHT_ITER; jj++) {
414 spat_to_SHqst_l(shtns, NL,Sh,Th,Q2,S2,T2, ltr);
415 }
416 clock_gettime(CLOCK_MONOTONIC, &t2);
417 ta = tdiff(&t1, &t2);
418 printf(" 3D vector SHT time : \t synthesis %f ms \t analysis %f ms\n", ts, ta);
419
420 fftw_free(T2); fftw_free(S2); fftw_free(Q2);
421 vect_error(Slm, Tlm, Slm0, Tlm0, ltr);
422 scal_error(Qlm, Tlm0, ltr);
423 return;
424}
425
426void test_SHT_vect3d()
427{
428 int jj,i;
429 double ts, ta;
430 struct timespec t1, t2;
431
432 complex double *Q2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
433 complex double *S2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
434 complex double *T2 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
435
436 for (i=0;i<NLM;i++) {
437 Slm[i] = Slm0[i]; Tlm[i] = Tlm0[i]; Qlm[i] = Tlm0[i];
438 }
439
440 clock_gettime(CLOCK_MONOTONIC, &t1);
441 for (jj=0; jj< SHT_ITER; jj++) {
442 SHqst_to_spat(shtns, Qlm,Slm,Tlm,NL,Sh,Th);
443 }
444 clock_gettime(CLOCK_MONOTONIC, &t2);
445 ts = tdiff(&t1, &t2);
446
447 clock_gettime(CLOCK_MONOTONIC, &t1);
448 spat_to_SHqst(shtns, NL,Sh,Th,Qlm,Slm,Tlm);
449 for (jj=1; jj< SHT_ITER; jj++) {
450 spat_to_SHqst(shtns, NL,Sh,Th,Q2,S2,T2);
451 }
452 clock_gettime(CLOCK_MONOTONIC, &t2);
453 ta = tdiff(&t1, &t2);
454 printf(" 3D vector SHT time (lmax=%d): \t synthesis %f ms \t analysis %f ms\n", LMAX, ts, ta);
455
456 fftw_free(T2); fftw_free(S2); fftw_free(Q2);
457 vect_error(Slm, Tlm, Slm0, Tlm0, LMAX);
458 scal_error(Qlm, Tlm0, LMAX);
459 return;
460}
461
462/*
463fftw_plan ifft_in, ifft_out;
464fftw_plan fft_in, fft_out;
465fftw_plan fft_tr, ifft_tr;
466
467
468// we want to test if in-place is faster than out-of place or not.
469init_fft_tests()
470{
471 complex double *ShF, *Shout;
472 double *Sh;
473 int nfft, ncplx, nreal;
474 unsigned fftw_plan_mode = FFTW_EXHAUSTIVE; // defines the default FFTW planner mode.
475
476 nfft = NPHI;
477 ncplx = NPHI/2 +1;
478 nreal = 2*ncplx;
479
480// Allocate dummy Spatial Fields.
481 ShF = (complex double *) fftw_malloc(ncplx * NLAT * sizeof(complex double));
482 Sh = (double *) ShF;
483
484// IFFT : unnormalized
485 ifft_in = fftw_plan_many_dft_c2r(1, &nfft, NLAT, ShF, &ncplx, NLAT, 1, Sh, &nreal, NLAT, 1, fftw_plan_mode);
486 if (ifft_in == NULL) printf("ifft_in failed\n");
487// FFT : must be normalized.
488 fft_in = fftw_plan_many_dft_r2c(1, &nfft, NLAT, Sh, &nreal, NLAT, 1, ShF, &ncplx, NLAT, 1, fftw_plan_mode);
489 if (fft_in == NULL) printf("fft_in failed\n");
490printf("in-place done\n");
491 printf("** ifft in-place :\n"); fftw_print_plan(ifft_in);
492 printf("\n** fft in-place :\n"); fftw_print_plan(fft_in);
493
494 Shout = (complex double *) fftw_malloc(ncplx * NLAT * sizeof(complex double));
495 ifft_out = fftw_plan_many_dft_c2r(1, &nfft, NLAT, Shout, &ncplx, NLAT, 1, Sh, &nfft, NLAT, 1, fftw_plan_mode);
496 if (ifft_out == NULL) printf("ifft_out failed\n");
497 fft_out = fftw_plan_many_dft_r2c(1, &nfft, NLAT, Sh, &nfft, NLAT, 1, Shout, &ncplx, NLAT, 1, fftw_plan_mode);
498 if (fft_out == NULL) printf("fft_out failed\n");
499printf("\nout-of-place done\n");
500 printf("** ifft out-of-place :\n"); fftw_print_plan(ifft_out);
501 printf("\n** fft out-of-place :\n"); fftw_print_plan(fft_out);
502
503 ifft_tr = fftw_plan_many_dft_c2r(1, &nfft, NLAT, Shout, &ncplx, NLAT, 1, Sh, &nfft, 1, NPHI, fftw_plan_mode);
504 if (ifft_out == NULL) printf("ifft_out failed\n");
505 fft_tr = fftw_plan_many_dft_r2c(1, &nfft, NLAT, Sh, &nfft, 1, NPHI, Shout, &ncplx, NLAT, 1, fftw_plan_mode);
506 if (fft_out == NULL) printf("fft_out failed\n");
507printf("\ntranspose done\n");
508 printf("** ifft + transpose :\n"); fftw_print_plan(ifft_tr);
509 printf("\n** fft + transpose :\n"); fftw_print_plan(fft_tr);
510
511 fftw_free(Shout); fftw_free(ShF);
512}
513
514do_fft_tests()
515{
516 complex double *Sho;
517 int jj;
518 clock_t tcpu;
519
520 tcpu = clock();
521 for (jj=0; jj< SHT_ITER; jj++) {
522 fftw_execute_dft_c2r(ifft_in, ShF, (double *) ShF);
523 }
524 tcpu = clock() - tcpu;
525 printf(" ifft in-place : %d\n", (int) tcpu);
526
527 tcpu = clock();
528 for (jj=0; jj< SHT_ITER; jj++) {
529 fftw_execute_dft_r2c(fft_in, (double *) ShF, ShF);
530 }
531 tcpu = clock() - tcpu;
532 printf(" fft in-place : %d\n", (int) tcpu);
533
534 tcpu = clock();
535 for (jj=0; jj< SHT_ITER; jj++) {
536 Sho = (complex double *) fftw_malloc( (NPHI/2+1) * NLAT * sizeof(complex double));
537 fftw_execute_dft_c2r(ifft_out, Sho, (double *) ShF);
538 fftw_free(Sho);
539 }
540 tcpu = clock() - tcpu;
541 printf(" ifft out-of-place (+malloc) : %d\n", (int) tcpu);
542
543 tcpu = clock();
544 for (jj=0; jj< SHT_ITER; jj++) {
545 Sho = (complex double *) fftw_malloc( (NPHI/2+1) * NLAT * sizeof(complex double));
546 fftw_execute_dft_r2c(fft_out, (double *) ShF, Sho);
547 fftw_free(Sho);
548 }
549 tcpu = clock() - tcpu;
550 printf(" fft out-of-place (+malloc) : %d\n", (int) tcpu);
551
552 tcpu = clock();
553 for (jj=0; jj< SHT_ITER; jj++) {
554 Sho = (complex double *) fftw_malloc( (NPHI/2+1) * NLAT * sizeof(complex double));
555 fftw_execute_dft_c2r(ifft_tr, Sho, (double *) ShF);
556 fftw_free(Sho);
557 }
558 tcpu = clock() - tcpu;
559 printf(" ifft transpose (+malloc) : %d\n", (int) tcpu);
560
561 tcpu = clock();
562 for (jj=0; jj< SHT_ITER; jj++) {
563 Sho = (complex double *) fftw_malloc( (NPHI/2+1) * NLAT * sizeof(complex double));
564 fftw_execute_dft_r2c(fft_tr, (double *) ShF, Sho);
565 fftw_free(Sho);
566 }
567 tcpu = clock() - tcpu;
568 printf(" fft transpose (+malloc) : %d\n", (int) tcpu);
569
570}
571*/
572
573void usage()
574{
575 printf("\nUsage: time_SHT lmax [options] \n");
576 printf(" where lmax is the maxiumum spherical harmonic degree.\n");
577 printf("** available options :\n");
578 printf(" -mmax=<mmax> : defines the maximum spherical harmonic order <mmax>\n");
579 printf(" -nphi=<nphi> : defines the number of azimutal (longitude) point\n");
580 printf(" -nlat=<nlat> : defines the number of grid points in theta (latitude)\n");
581 printf(" -mres=<mres> : the azimutal periodicity (1 for no symmetry; 2 for two-fold symmetry, ...)\n");
582 printf(" -polaropt=<thr> : set the threshold for polar optimization. 0 for no polar optimization, 1.e-6 for agressive.\n");
583 printf(" -iter=<n> : set the number of back-and-forth transforms to compute timings and errors.\n");
584 printf(" -gauss : force gauss grid\n");
585 printf(" -fly : force gauss grid with on-the-fly computations only\n");
586 printf(" -quickinit : force gauss grid and fast initialiation time (but suboptimal fourier transforms)\n");
587 printf(" -vector : time and test also vector transforms (2D and 3D)\n");
588 printf(" -reg : force regular grid\n");
589 printf(" -oop : force out-of-place transform\n");
590 printf(" -transpose : force transpose data (ie phi varies fastest)\n");
591 printf(" -nlorder : define non-linear order to be resolved.\n");
592 printf(" -schmidt : use schmidt semi-normalization.\n");
593 printf(" -4pi : use 4pi normalization.\n");
594 #ifdef _OPENMP
595 printf(" -nth=<n> : use n threads.\n");
596 #endif
597}
598
599int main(int argc, char *argv[])
600{
601 complex double t1, t2;
602 double t,tmax,n2;
603 int nthreads = 0;
604 int i,im,m,l;
605 clock_t tcpu;
606 ticks tik0, tik1;
607 double e0,e1;
608 double polaropt = 1.e-8; // default for polar optimization.
609 enum shtns_type shtmode = sht_auto; // default to "auto" (fastest) mode.
610 enum shtns_norm shtnorm = sht_orthonormal; // default to "orthonormal" SH.
611 int layout = SHT_NATIVE_LAYOUT;
612 int nlorder = 0;
613 int point = 0;
614 int vector = 0;
615 int loadsave = 0;
616 char name[20];
617 FILE* fw;
618
619 srand( time(NULL) ); // initialise les nombres.
620 shtns_verbose(2); // output some diagnostics.
621
622 printf("time_SHT performs some spherical harmonic transforms, and displays timings and accuracy.\n");
623 if (argc < 2) {
624 usage(); exit(1);
625 }
626
627// first argument is lmax, and is mandatory.
628 sscanf(argv[1],"%lf",&t); LMAX=t;
629 MMAX=-1; MRES=1;
630
631 for (i=2; i<argc; i++) { // parse command line
632 sscanf(argv[i],"-%[^=]=%lf",name,&t);
633 if (strcmp(name,"mmax") == 0) MMAX = t;
634 if (strcmp(name,"mres") == 0) MRES = t;
635 if (strcmp(name,"nlat") == 0) NLAT = t;
636 if (strcmp(name,"nphi") == 0) NPHI = t;
637 if (strcmp(name,"polaropt") == 0) polaropt = t;
638 if (strcmp(name,"iter") == 0) SHT_ITER = t;
639 if (strcmp(name,"nth") == 0) nthreads = t;
640 if (strcmp(name,"gauss") == 0) shtmode = sht_gauss; // force gauss grid.
641 if (strcmp(name,"fly") == 0) shtmode = sht_gauss_fly; // force gauss grid with on-the-fly computation.
642 if (strcmp(name,"reg") == 0) shtmode = sht_reg_fast; // force regular grid.
643 if (strcmp(name,"quickinit") == 0) shtmode = sht_quick_init; // Gauss grid and fast initialization time, but suboptimal fourier transforms.
644 if (strcmp(name,"schmidt") == 0) shtnorm = sht_schmidt | SHT_NO_CS_PHASE;
645 if (strcmp(name,"4pi") == 0) shtnorm = sht_fourpi | SHT_REAL_NORM;
646 if (strcmp(name,"oop") == 0) layout = SHT_THETA_CONTIGUOUS;
647 if (strcmp(name,"transpose") == 0) layout = SHT_PHI_CONTIGUOUS;
648 if (strcmp(name,"nlorder") == 0) nlorder = t;
649 if (strcmp(name,"vector") == 0) vector = 1;
650 if (strcmp(name,"point") == 0) point = 1;
651 if (strcmp(name,"loadsave") == 0) loadsave = 1;
652 }
653
654 if (vector == 0) layout |= SHT_SCALAR_ONLY;
655 if (loadsave) layout |= SHT_LOAD_SAVE_CFG;
656 if (MMAX == -1) MMAX=LMAX/MRES;
657 shtns_use_threads(nthreads); // 0 : means automatically chooses the number of threads.
658 shtns = shtns_create(LMAX, MMAX, MRES, shtnorm);
659 NLM = shtns->nlm;
660 shtns_set_grid_auto(shtns, shtmode | layout, polaropt, nlorder, &NLAT, &NPHI);
661
662 shtns_print_cfg(shtns);
663
664/*
665 t1 = 1.0+2.0*I;
666 t2 = 1.0-I;
667 printf("test : %f, %f, %f, %f\n",creal(t1),cimag(t1), creal(t2),cimag(t2));
668
669 (double) t1 = 8.0 +I;
670 (double) t2 = 8.1;
671 printf("test : %f, %f, %f, %f\n",creal(t1),cimag(t1), creal(t2),cimag(t2));
672*/
673// write_vect("cost",ct,NLAT);
674// write_vect("sint",st,NLAT);
675
676 ShF = (complex double *) fftw_malloc( 2*(NPHI/2+1) * NLAT * sizeof(double));
677 Sh = (double *) ShF;
678 if (ShF == NULL) runerr("memory allocation 1 failed");
679 if (vector) {
680 ThF = (complex double *) fftw_malloc( 2*(NPHI/2+1) * NLAT * sizeof(double));
681 Th = (double *) ThF;
682 NLF = (complex double *) fftw_malloc( 2*(NPHI/2+1) * NLAT * sizeof(double));
683 NL = (double *) NLF;
684 if ((ThF == NULL)||(NLF == NULL)) runerr("memory allocation 2 failed");
685 }
686
687 Slm0 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
688 Slm = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
689 Tlm = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
690 if ((Slm0 == NULL)||(Slm == NULL)||(Tlm == NULL)) runerr("memory allocation 3 failed");
691 if (vector) {
692 Tlm0 = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
693 Qlm = (complex double *) fftw_malloc(sizeof(complex double)* NLM);
694 if ((Tlm0 == NULL)||(Qlm == NULL)) runerr("memory allocation 4 failed");
695 }
696
697// perform fft tests.
698// init_fft_tests();
699// do_fft_tests();
700// exit(0);
701
702 if (NLM < 10000) {
703// SH_to_spat
704 for (i=0;i<NLM;i++) {
705 Slm[i] = 0.0;
706 if (vector) Tlm[i] = 0.0;
707 }
708 for (im=0;im<NPHI;im++) {
709 for (i=0;i<NLAT;i++) {
710 Sh[im*NLAT+i] = 0.0;
711 }
712 }
713 Slm[LiM(shtns, 1,0)] = sh10_ct(shtns);
714 if ((MMAX > 0)&&(MRES==1))
715 Slm[LiM(shtns, 1,1)] = sh11_st(shtns);
716// write_vect("ylm0",Slm, NLM*2);
717// SH_to_spat_ml(shtns, 0,Slm, Sh, LMAX);
718// spat_to_SH_ml(shtns, 0,Sh, Slm, LMAX);
719 SH_to_spat(shtns, Slm,Sh);
720 write_mx("spat",Sh,NPHI,NLAT);
721 if (vector) {
722 SHtor_to_spat(shtns, Slm,Sh,Th);
723 write_mx("spatt",Sh,NPHI,NLAT);
724 write_mx("spatp",Th,NPHI,NLAT);
725 SHtor_to_spat_l(shtns, Slm,Sh,Th,LMAX/2);
726 write_mx("spatt_l",Sh,NPHI,NLAT);
727 write_mx("spatp_l",Th,NPHI,NLAT);
728 }
729
730// SHqst_to_lat(Slm,Slm,Tlm,ct[0],Sh,Th,Th,NPHI/2,LMAX,MMAX);
731// write_vect("spat_lat", Sh, NPHI/2);
732
733/* for (i=0;i<(NLAT/2)*NPHI;i++) {
734 Sh[i] = 0.0;
735 }
736 SHeo_to_spat(Slm, Sh, 0);
737 write_mx("spate",Sh,NPHI,NLAT/2);
738 for (i=0;i<(NLAT/2)*NPHI;i++) {
739 Th[i] = 0.0;
740 }
741 SHeo_to_spat(Slm, Th, 1);
742 write_mx("spato",Th,NPHI,NLAT/2);
743
744 for (i=0;i<NLM;i++)
745 Slm[i] = 0.0;
746 spat_to_SHeo(Sh, Slm, 0);
747 write_vect("ylme",Slm, NLM*2);
748 for (i=0;i<NLM;i++)
749 Tlm[i] = 0.0;
750 spat_to_SHeo(Th, Slm, 1);
751 write_vect("ylmeo",Slm, NLM*2);
752*/
753// spat_to_SH
754 for (im=0;im<NPHI;im++) {
755 for (i=0;i<NLAT;i++) {
756 Sh[im*NLAT+i] = shtns->ct[i];
757 }
758 }
759 spat_to_SH(shtns, Sh,Slm);
760 write_vect("ylm",(double *)Slm,NLM*2);
761 }
762
763// test case...
764 printf("generating random test case...\n");
765 t = 1.0 / (RAND_MAX/2);
766 for (i=0;i<NLM;i++) {
767 Slm0[i] = t*((double) (rand() - RAND_MAX/2)) + I*t*((double) (rand() - RAND_MAX/2));
768 if (vector) Tlm0[i] = t*((double) (rand() - RAND_MAX/2)) + I*t*((double) (rand() - RAND_MAX/2));
769 }
770
771 if (point) {
772 test_SH_point();
773 exit(0);
774 }
775
776
777// printf("** performing %d scalar SHT with NL evaluation\n", SHT_ITER);
778 printf("** performing %d scalar SHT\n", SHT_ITER);
779 printf(":: STD\n");
780 test_SHT();
781 printf(":: LTR\n");
782 test_SHT_l(LMAX/2);
783
784 if (vector) {
785 Slm0[LM(shtns, 0,0)] = 0.0; // l=0, m=0 n'a pas de signification sph/tor
786 Tlm0[LM(shtns, 0,0)] = 0.0; // l=0, m=0 n'a pas de signification sph/tor
787 // for (i=0;i<NLM;i++) Slm0[i] = 0.0; // zero out Slm.
788
789 printf("** performing %d vector SHT\n", SHT_ITER);
790 printf(":: STD\n");
791 test_SHT_vect();
792 printf(":: LTR\n");
793 test_SHT_vect_l(LMAX/2);
794
795 printf("** performing %d 3D vector SHT\n", SHT_ITER);
796 printf(":: STD\n");
797 test_SHT_vect3d();
798 printf(":: LTR\n");
799 test_SHT_vect3d_l(LMAX/2);
800
801 if (NPHI == 1) { // test the special m=0 transforms
802 printf("** performing %d m=0 gradient SHT\n", SHT_ITER);
803 printf(":: STD\n");
804 test_SHT_m0();
805 }
806 }
807
808
809 shtns_create(LMAX, MMAX, MRES, shtnorm); // test memory allocation and management.
810// shtns_create_with_grid(shtns, MMAX/2, 1);
811
812// free memory and resources (to track memory leaks)
813 fftw_free(Slm); fftw_free(ShF);
814 if (vector) {
815 fftw_free(Qlm); fftw_free(Tlm);
816 fftw_free(Slm0); fftw_free(Tlm0);
817 fftw_free(NLF); fftw_free(ThF);
818 }
819
820 shtns_reset();
821 fftw_cleanup();
822}
823
Note: See TracBrowser for help on using the repository browser.