/* * Copyright (c) 2010-2015 Centre National de la Recherche Scientifique. * written by Nathanael Schaeffer (CNRS, ISTerre, Grenoble, France). * * nathanael.schaeffer@ujf-grenoble.fr * * This software is governed by the CeCILL license under French law and * abiding by the rules of distribution of free software. You can use, * modify and/or redistribute the software under the terms of the CeCILL * license as circulated by CEA, CNRS and INRIA at the following URL * "http://www.cecill.info". * * The fact that you are presently reading this means that you have had * knowledge of the CeCILL license and that you accept its terms. * */ # This file is meta-code for SHT.c (spherical harmonic transform). # it is intended for "make" to generate C code for similar SHT functions, # from one generic function + tags. # > See Makefile and SHT.c # Basically, there are tags at the beginning of lines that are information # to keep or remove the line depending on the function to build. # tags : # Q : line for scalar transform # V : line for vector transform (both spheroidal and toroidal) # S : line for vector transfrom, spheroidal component # T : line for vector transform, toroidal component. 3 static void GEN3(SHqst_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, cplx *Qlm, cplx *Slm, cplx *Tlm, double *Vr, double *Vt, double *Vp, long int llim) { QX static void GEN3(SH_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, cplx *Qlm, double *Vr, long int llim) { #ifndef SHT_GRAD VX static void GEN3(SHsphtor_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, cplx *Slm, cplx *Tlm, double *Vt, double *Vp, long int llim) { #else S static void GEN3(SHsph_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, cplx *Slm, double *Vt, double *Vp, long int llim) { T static void GEN3(SHtor_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, cplx *Tlm, double *Vt, double *Vp, long int llim) { #endif Q v2d *BrF; #ifndef SHT_AXISYM V v2d *BtF, *BpF; Q #define BR0(i) ((double *)BrF)[2*(i)] V #define BT0(i) ((double *)BtF)[2*(i)] V #define BP0(i) ((double *)BpF)[2*(i)] Q #define qr(l) vall(creal(Ql[l])) Q #define qi(l) vall(cimag(Ql[l])) S #define sr(l) vall(creal(Sl[l])) S #define si(l) vall(cimag(Sl[l])) T #define tr(l) vall(creal(Tl[l])) T #define ti(l) vall(cimag(Tl[l])) V double m_1; unsigned im, imlim; #else S v2d *BtF; T v2d *BpF; Q #define BR0(i) ((double *)BrF)[i] S #define BT0(i) ((double *)BtF)[i] T #define BP0(i) ((double *)BpF)[i] #endif long int nk, k,l,m; double *alm, *al; s2d *ct, *st; Q double Ql0[llim+1]; S double Sl0[llim]; T double Tl0[llim]; #ifndef SHT_AXISYM Q BrF = (v2d*) Vr; V BtF = (v2d*) Vt; BpF = (v2d*) Vp; #ifdef _GCC_VEC_ if (shtns->fftc_mode > 0) { // alloc memory for the FFT unsigned long nv = shtns->nspat; QX BrF = (v2d*) VMALLOC( nv * sizeof(double) ); VX BtF = (v2d*) VMALLOC( 2*nv * sizeof(double) ); VX BpF = BtF + nv/2; 3 BrF = (v2d*) VMALLOC( 3*nv * sizeof(double) ); 3 BtF = BrF + nv/2; BpF = BrF + nv; } #ifdef SHT_GRAD S k=0; do { BpF[k]=vdup(0.0); } while(++kncplx_fft > 0) { // alloc memory for the FFT QX BrF = VMALLOC( shtns->ncplx_fft * sizeof(cplx) ); VX BtF = VMALLOC( 2* shtns->ncplx_fft * sizeof(cplx) ); VX BpF = BtF + shtns->ncplx_fft; 3 BrF = VMALLOC( 3* shtns->ncplx_fft * sizeof(cplx) ); 3 BtF = BrF + shtns->ncplx_fft; BpF = BtF + shtns->ncplx_fft; } #ifdef SHT_GRAD S k=0; do { BpF[k]=vdup(0.0); } while(++k (unsigned) llim) imlim = ((unsigned) llim)/MRES; // 32bit mul and div should be faster #endif #else #ifdef SHT_GRAD S if (Vp != NULL) { k=0; do { ((v2d*)Vp)[k]=vdup(0.0); } while(++kct; st = (s2d*) shtns->st; // im=0; l=1; alm = shtns->alm; Q Ql0[0] = (double) Qlm[0]; // l=0 do { // for m=0, compress the complex Q,S,T to double Q Ql0[l] = (double) Qlm[l]; // Ql[l+1] = (double) Qlm[l+1]; S Sl0[l-1] = (double) Slm[l]; // Sl[l] = (double) Slm[l+1]; T Tl0[l-1] = (double) Tlm[l]; // Tl[l] = (double) Tlm[l+1]; ++l; } while(l<=llim); k=0; nk = NLAT_2; #if _GCC_VEC_ nk = ((unsigned)(nk+VSIZE2-1)) / VSIZE2; #endif do { l=0; al = alm; rnd cost[NWAY], y0[NWAY], y1[NWAY]; V rnd sint[NWAY], dy0[NWAY], dy1[NWAY]; Q rnd re[NWAY], ro[NWAY]; S rnd te[NWAY], to[NWAY]; T rnd pe[NWAY], po[NWAY]; for (int j=0; j>1; V m_1 = 1.0/m; //alm = shtns->alm[im]; //alm = shtns->alm[0] + im*(2*LMAX - (im-1)*MRES); // for m > 0 alm += 2*(LMAX-m+MRES); Q cplx* Ql = &Qlm[l]; // virtual pointer for l=0 and im S cplx* Sl = &Slm[l]; // virtual pointer for l=0 and im T cplx* Tl = &Tlm[l]; k=0; l=shtns->tm[im]; #if _GCC_VEC_ l>>=1; // stay on a 16 byte boundary while (k>= 1); } else { long int nsint = 0; do { // sin(theta)^m (use rescaling to avoid underflow) if (l&1) { for (int j=0; j>= 1); } for (int j=0; j SHT_ACCURACY*SHT_SCALE_FACTOR + 1.0) { // rescale when value is significant ++ny; for (int j=0; j>1) -imlim); ++k) { // padding for high m's Q BrF[k] = 0.0; V BtF[k] = 0.0; BpF[k] = 0.0; } Q BrF -= NLAT*(imlim+1); // restore original pointer V BtF -= NLAT*(imlim+1); BpF -= NLAT*(imlim+1); // restore original pointer #endif // NPHI > 1 as SHT_AXISYM is not defined. #if _GCC_VEC_ if (shtns->fftc_mode >= 0) { if (shtns->fftc_mode == 0) { Q fftw_execute_dft(shtns->ifftc, (cplx *) BrF, (cplx *) Vr); V fftw_execute_dft(shtns->ifftc, (cplx *) BtF, (cplx *) Vt); V fftw_execute_dft(shtns->ifftc, (cplx *) BpF, (cplx *) Vp); } else { // split dft Q fftw_execute_split_dft(shtns->ifftc,((double*)BrF)+1, ((double*)BrF), Vr+NPHI, Vr); V fftw_execute_split_dft(shtns->ifftc,((double*)BtF)+1, ((double*)BtF), Vt+NPHI, Vt); V fftw_execute_split_dft(shtns->ifftc,((double*)BpF)+1, ((double*)BpF), Vp+NPHI, Vp); Q VFREE(BrF); VX VFREE(BtF); // this frees also BpF. } } #else if (shtns->ncplx_fft >= 0) { Q fftw_execute_dft_c2r(shtns->ifft, (cplx *) BrF, Vr); V fftw_execute_dft_c2r(shtns->ifft, (cplx *) BtF, Vt); V fftw_execute_dft_c2r(shtns->ifft, (cplx *) BpF, Vp); if (shtns->ncplx_fft > 0) { // free memory Q VFREE(BrF); VX VFREE(BtF); // this frees also BpF. } } #endif #endif Q #undef BR0 V #undef BT0 V #undef BP0 Q #undef qr Q #undef qi S #undef sr S #undef si T #undef tr T #undef ti } #ifndef SHT_AXISYM 3 static void GEN3(SHqst_m_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, int im, cplx *Qlm, cplx *Slm, cplx *Tlm, cplx *Vr, cplx *Vt, cplx *Vp, long int llim) { QX static void GEN3(SH_m_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, int im, cplx *Qlm, cplx *Vr, long int llim) { #ifndef SHT_GRAD VX static void GEN3(SHsphtor_m_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, int im, cplx *Slm, cplx *Tlm, cplx *Vt, cplx *Vp, long int llim) { #else S static void GEN3(SHsph_m_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, int im, cplx *Slm, cplx *Vt, cplx *Vp, long int llim) { T static void GEN3(SHtor_m_to_spat_fly,NWAY,SUFFIX)(shtns_cfg shtns, int im, cplx *Tlm, cplx *Vt, cplx *Vp, long int llim) { #endif Q v2d *BrF; V v2d *BtF, *BpF; Q #define qr(l) vall(creal(Qlm[l])) Q #define qi(l) vall(cimag(Qlm[l])) S #define sr(l) vall(creal(Slm[l])) S #define si(l) vall(cimag(Slm[l])) T #define tr(l) vall(creal(Tlm[l])) T #define ti(l) vall(cimag(Tlm[l])) V double m_1; long int nk, k,l,m; double *alm, *al; s2d *ct, *st; Q BrF = (v2d*) Vr; V BtF = (v2d*) Vt; BpF = (v2d*) Vp; nk = NLAT_2; #if _GCC_VEC_ nk = ((unsigned)(nk+VSIZE2-1)) / VSIZE2; #endif ct = (s2d*) shtns->ct; st = (s2d*) shtns->st; if (im == 0) { Q double Ql0[llim+1]; S double Sl0[llim]; T double Tl0[llim]; #ifdef SHT_GRAD S k=0; do { BpF[k]=vdup(0.0); } while(++kalm; Q Ql0[0] = (double) Qlm[0]; // l=0 do { // for m=0, compress the complex Q,S,T to double Q Ql0[l] = (double) Qlm[l]; // Ql[l+1] = (double) Qlm[l+1]; S Sl0[l-1] = (double) Slm[l]; // Sl[l] = (double) Slm[l+1]; T Tl0[l-1] = (double) Tlm[l]; // Tl[l] = (double) Tlm[l+1]; ++l; } while(l<=llim); k=0; do { l=0; al = alm; rnd cost[NWAY], y0[NWAY], y1[NWAY]; V rnd sint[NWAY], dy0[NWAY], dy1[NWAY]; Q rnd re[NWAY], ro[NWAY]; S rnd te[NWAY], to[NWAY]; T rnd pe[NWAY], po[NWAY]; for (int j=0; j 0 m = im*MRES; V m_1 = 1.0/m; alm = shtns->alm + im*(2*LMAX -m+MRES); Q Qlm -= m; // virtual pointer for l=0 S Slm -= m; // virtual pointer for l=0 T Tlm -= m; k=0; l=shtns->tm[im]; while (k < l) { // polar optimization Q BrF[k] = vdup(0.0); BrF[NLAT-l+k] = vdup(0.0); V BtF[k] = vdup(0.0); BtF[NLAT-l+k] = vdup(0.0); V BpF[k] = vdup(0.0); BpF[NLAT-l+k] = vdup(0.0); ++k; } k = ((unsigned) l) / VSIZE2; do { al = alm; rnd cost[NWAY], y0[NWAY], y1[NWAY]; V rnd st2[NWAY], dy0[NWAY], dy1[NWAY]; Q rnd rer[NWAY], rei[NWAY], ror[NWAY], roi[NWAY]; V rnd ter[NWAY], tei[NWAY], tor[NWAY], toi[NWAY]; V rnd per[NWAY], pei[NWAY], por[NWAY], poi[NWAY]; for (int j=0; j>= 1); } else { long int nsint = 0; do { // sin(theta)^m (use rescaling to avoid underflow) if (l&1) { for (int j=0; j>= 1); } for (int j=0; j SHT_ACCURACY*SHT_SCALE_FACTOR + 1.0) { // rescale when value is significant ++ny; for (int j=0; j