source: CIVL/examples/omp/shtns/sht_func.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 23.2 KB
Line 
1/*
2 * Copyright (c) 2010-2015 Centre National de la Recherche Scientifique.
3 * written by Nathanael Schaeffer (CNRS, ISTerre, Grenoble, France).
4 *
5 * nathanael.schaeffer@ujf-grenoble.fr
6 *
7 * This software is governed by the CeCILL license under French law and
8 * abiding by the rules of distribution of free software. You can use,
9 * modify and/or redistribute the software under the terms of the CeCILL
10 * license as circulated by CEA, CNRS and INRIA at the following URL
11 * "http://www.cecill.info".
12 *
13 * The fact that you are presently reading this means that you have had
14 * knowledge of the CeCILL license and that you accept its terms.
15 *
16 */
17
18/** \internal \file sht_rot.c
19 * \brief Rotation of Spherical Harmonics.
20 */
21
22
23/** \addtogroup rotation Rotation of SH fields.
24Rotation around axis other than Z should be considered of beta quality (they have been tested but may still contain bugs).
25They also require \c mmax = \c lmax. They use an Algorithm inspired by the pseudospectral rotation described in
26Gimbutas Z. and Greengard L. 2009 "A fast and stable method for rotating spherical harmonic expansions" <i>Journal of Computational Physics</i>.
27doi:<a href="http://dx.doi.org/10.1016/j.jcp.2009.05.014">10.1016/j.jcp.2009.05.014</a>
28
29These functions do only require a call to \ref shtns_create, but not to \ref shtns_set_grid.
30*/
31//@{
32
33/// Rotate a SH representation Qlm around the z-axis by angle alpha (in radians),
34/// which is the same as rotating the reference frame by angle -alpha.
35/// Result is stored in Rlm (which can be the same array as Qlm).
36void SH_Zrotate(shtns_cfg shtns, cplx *Qlm, double alpha, cplx *Rlm)
37{
38 int im, l, lmax, mmax, mres;
39
40 lmax = shtns->lmax; mmax = shtns->mmax; mres = shtns->mres;
41
42 if (Rlm != Qlm) { // copy m=0 which does not change.
43 l=0; do { Rlm[l] = Qlm[l]; } while(++l <= lmax);
44 }
45 if (mmax > 0) {
46 im=1; do {
47 cplx eima = cos(im*mres*alpha) - I*sin(im*mres*alpha); // rotate reference frame by angle -alpha
48 for (l=im*mres; l<=lmax; ++l) Rlm[LiM(shtns, l, im)] = Qlm[LiM(shtns, l, im)] * eima;
49 } while(++im <= mmax);
50 }
51}
52
53//@}
54
55/** \internal rotation kernel used by SH_Yrotate90(), SH_Xrotate90() and SH_rotate().
56 Algorithm based on the pseudospectral rotation[1] :
57 - rotate around Z by angle dphi0.
58 - synthetize for each l the spatial description for phi=0 and phi=pi on an equispaced latitudinal grid.
59 - Fourier ananlyze as data on the equator to recover the m in the 90 degrees rotated frame.
60 - rotate around new Z by angle dphi1.
61 [1] Gimbutas Z. and Greengard L. 2009 "A fast and stable method for rotating spherical harmonic expansions" Journal of Computational Physics. **/
62static void SH_rotK90(shtns_cfg shtns, cplx *Qlm, cplx *Rlm, double dphi0, double dphi1)
63{
64 fftw_plan fft;
65 cplx *q;
66 double *q0;
67 long int k, m, l;
68 int lmax, ntheta;
69 int nrembed, ncembed;
70
71 lmax = shtns->lmax;
72 ntheta = ((lmax+2)>>1)*2;
73 m = 2* sizeof(double)*(2*ntheta+2)*lmax;
74 q0 = VMALLOC(m); memset(q0, 0, m); // alloc & zero out.
75
76 // rotate around Z by dphi0
77 if (dphi0 != 0.0) {
78 SH_Zrotate(shtns, Qlm, dphi0, Rlm);
79 Qlm = Rlm;
80 } else {
81 Rlm[0] = Qlm[0]; // l=0 is rotation invariant.
82 }
83
84 #pragma omp parallel private(k,m,l) num_threads(shtns->nthreads)
85 {
86 double yl[lmax+1];
87 // compute q(l) on the meridian phi=0 and phi=pi. (rotate around X)
88 #pragma omp for schedule(static)
89 for (k=0; k<ntheta/2; ++k) {
90 double cost= cos(((0.5*M_PI)*(2*k+1))/ntheta);
91 double sint_1 = 1.0/sqrt((1.0-cost)*(1.0+cost));
92 m=0;
93 legendre_sphPlm_array(shtns, lmax, m, cost, yl+m);
94 double sgnt = -1.0;
95 for (l=1; l<=lmax; ++l) {
96 double qr = creal(Qlm[LiM(shtns, l, m)]) * yl[l];
97 q0[k*2*lmax +2*(l-1)] = qr;
98 q0[(ntheta-1-k)*2*lmax +2*(l-1)] = sgnt*qr;
99 q0[(ntheta+k)*2*lmax +2*(l-1)] = sgnt*qr;
100 q0[(2*ntheta-1-k)*2*lmax +2*(l-1)] = qr;
101 sgnt *= -1.0;
102 }
103 #if _GCC_VEC_ && __SSE2__
104 s2d sgnm = SIGN_MASK_HI;
105 s2d sgnflip = SIGN_MASK_2;
106 for (m=1; m<=lmax; ++m) {
107 legendre_sphPlm_array(shtns, lmax, m, cost, yl+m);
108 s2d sgnt = vdup(0.0);
109 s2d m_st = vset(2.0, -2*m*sint_1); // x2 for m>0
110 sgnm = _mm_xor_pd(sgnm, sgnflip); // (-1)^m
111 for (l=m; l<=lmax; ++l) {
112 v2d qc = ((v2d*)Qlm)[LiM(shtns, l, m)] * vdup(yl[l]) * m_st; // (q0, dq0)
113 ((v2d*)q0)[k*lmax +(l-1)] += qc;
114 ((v2d*)q0)[(ntheta-1-k)*lmax +(l-1)] += (v2d)_mm_xor_pd(sgnt, qc);
115 qc = _mm_xor_pd(sgnm, qc);
116 ((v2d*)q0)[(ntheta+k)*lmax +(l-1)] += (v2d)_mm_xor_pd( sgnt, qc );
117 ((v2d*)q0)[(2*ntheta-1-k)*lmax +(l-1)] += qc;
118 sgnt = _mm_xor_pd(sgnt, sgnflip); // (-1)^(l+m)
119 }
120 }
121 #else
122 double sgnm = 1.0;
123 for (m=1; m<=lmax; ++m) {
124 legendre_sphPlm_array(shtns, lmax, m, cost, yl+m);
125 double sgnt = 1.0;
126 sgnm *= -1.0;
127 for (l=m; l<=lmax; ++l) {
128 double qr = creal(Qlm[LiM(shtns, l, m)]) * yl[l];
129 double qi = cimag(Qlm[LiM(shtns, l, m)]) * m*yl[l]*sint_1;
130 qr += qr; qi += qi; // x2 for m>0
131 q0[k*2*lmax +2*(l-1)] += qr; // q0
132 q0[k*2*lmax +2*(l-1)+1] -= qi; // dq0
133 q0[(ntheta-1-k)*2*lmax +2*(l-1)] += sgnt*qr;
134 q0[(ntheta-1-k)*2*lmax +2*(l-1)+1] -= sgnt*qi;
135 q0[(ntheta+k)*2*lmax +2*(l-1)] += (sgnm*sgnt)*qr;
136 q0[(ntheta+k)*2*lmax +2*(l-1)+1] += (sgnm*sgnt)*qi;
137 q0[(2*ntheta-1-k)*2*lmax +2*(l-1)] += sgnm*qr;
138 q0[(2*ntheta-1-k)*2*lmax +2*(l-1)+1] += sgnm*qi;
139 sgnt *= -1.0;
140 }
141 }
142 #endif
143 }
144 }
145
146 // perform FFT
147 #ifdef OMP_FFTW
148 k = (lmax < 63) ? 1 : shtns->nthreads;
149 fftw_plan_with_nthreads(k);
150 #endif
151 q = (cplx*) q0;
152 ntheta*=2; nrembed = ntheta+2; ncembed = nrembed/2;
153 fft = fftw_plan_many_dft_r2c(1, &ntheta, 2*lmax, q0, &nrembed, 2*lmax, 1, q, &ncembed, 2*lmax, 1, FFTW_ESTIMATE);
154 fftw_execute_dft_r2c(fft, q0, q);
155 fftw_destroy_plan(fft);
156
157 double yl[lmax+1]; double dyl[lmax+1];
158 m=0;
159 //legendre_sphPlm_deriv_array(shtns, lmax, m, 0.0, 1.0, yl+m, dyl+m);
160 legendre_sphPlm_deriv_array_equ(shtns, lmax, m, yl+m, dyl+m);
161 for (l=1; l<lmax; l+=2) {
162 Rlm[LiM(shtns, l,m)] = -creal(q[m*2*lmax +2*(l-1)+1])/(dyl[l]*ntheta);
163 Rlm[LiM(shtns, l+1,m)] = creal(q[m*2*lmax +2*l])/(yl[l+1]*ntheta);
164 }
165 if (l==lmax) {
166 Rlm[LiM(shtns, l,m)] = -creal(q[m*2*lmax +2*(l-1)+1])/(dyl[l]*ntheta);
167 }
168 dphi1 += M_PI/ntheta; // shift rotation angle by angle of first synthesis latitude.
169 for (m=1; m<=lmax; ++m) {
170 //legendre_sphPlm_deriv_array(shtns, lmax, m, 0.0, 1.0, yl+m, dyl+m);
171 legendre_sphPlm_deriv_array_equ(shtns, lmax, m, yl+m, dyl+m);
172 cplx eimdp = (cos(m*dphi1) - I*sin(m*dphi1))/(ntheta);
173 for (l=m; l<lmax; l+=2) {
174 Rlm[LiM(shtns, l,m)] = eimdp*q[m*2*lmax +2*(l-1)]*(1./yl[l]);
175 Rlm[LiM(shtns, l+1,m)] = eimdp*q[m*2*lmax +2*l+1]*(-1./dyl[l+1]);
176 }
177 if (l==lmax) {
178 Rlm[LiM(shtns, l,m)] = eimdp*q[m*2*lmax +2*(l-1)]*(1./yl[l]);
179 }
180 }
181 VFREE(q0);
182}
183
184
185/// \addtogroup rotation
186//@{
187
188/// rotate Qlm by 90 degrees around X axis and store the result in Rlm.
189/// shtns->mres MUST be 1, and lmax=mmax.
190void SH_Xrotate90(shtns_cfg shtns, cplx *Qlm, cplx *Rlm)
191{
192 int lmax= shtns->lmax;
193 if ((shtns->mres != 1) || (shtns->mmax < lmax)) shtns_runerr("truncature makes rotation not closed.");
194
195 if (lmax == 1) {
196 Rlm[0] = Qlm[0]; // l=0 is invariant.
197 int l=1; // rotation matrix for rotX(90), l=1 : m=[0, 1r, 1i]
198 double q0 = creal(Qlm[LiM(shtns, l, 0)]);
199 Rlm[LiM(shtns, l, 0)] = sqrt(2.0) * cimag(Qlm[LiM(shtns, l, 1)]); //[m=0] 0 0 sqrt(2)
200 Rlm[LiM(shtns, l ,1)] = creal(Qlm[LiM(shtns, l, 1)]) - I*(sqrt(0.5)*q0); //[m=1r] 0 1 0
201 return; //[m=1i] -sqrt(2)/2 0 0
202 }
203
204 SH_rotK90(shtns, Qlm, Rlm, 0.0, -M_PI/2);
205}
206
207/// rotate Qlm by 90 degrees around Y axis and store the result in Rlm.
208/// shtns->mres MUST be 1, and lmax=mmax.
209void SH_Yrotate90(shtns_cfg shtns, cplx *Qlm, cplx *Rlm)
210{
211 int lmax= shtns->lmax;
212 if ((shtns->mres != 1) || (shtns->mmax < lmax)) shtns_runerr("truncature makes rotation not closed.");
213
214 if (lmax == 1) {
215 Rlm[0] = Qlm[0]; // l=0 is invariant.
216 int l=1; // rotation matrix for rotY(90), l=1 : m=[0, 1r, 1i]
217 double q0 = creal(Qlm[LiM(shtns, l, 0)]); //[m=0] 0 0 sqrt(2)
218 Rlm[LiM(shtns, l, 0)] = sqrt(2.0) * creal(Qlm[LiM(shtns, l, 1)]); //[m=1r] -sqrt(2)/2 0 0
219 Rlm[LiM(shtns, l ,1)] = I*cimag(Qlm[LiM(shtns, l, 1)]) - sqrt(0.5) * q0; //[m=1i] 0 0 1
220 return;
221 }
222
223 SH_rotK90(shtns, Qlm, Rlm, -M_PI/2, 0.0);
224}
225
226/// rotate Qlm around Y axis by arbitrary angle, using composition of rotations. Store the result in Rlm.
227void SH_Yrotate(shtns_cfg shtns, cplx *Qlm, double alpha, cplx *Rlm)
228{
229 if ((shtns->mres != 1) || (shtns->mmax < shtns->lmax)) shtns_runerr("truncature makes rotation not closed.");
230
231 SH_rotK90(shtns, Qlm, Rlm, 0.0, M_PI/2 + alpha); // Zrotate(pi/2) + Yrotate90 + Zrotate(pi+alpha)
232 SH_rotK90(shtns, Rlm, Rlm, 0.0, M_PI/2); // Yrotate90 + Zrotate(pi/2)
233}
234
235//@}
236
237
238
239/** \addtogroup operators Special operators
240 * Apply special operators in spectral space: multiplication by cos(theta), sin(theta).d/dtheta.
241*/
242//@{
243
244
245
246
247/// fill mx with the coefficients for multiplication by cos(theta)
248/// \param mx : an array of 2*NLM double that will be filled with the matrix coefficients.
249/// xq[lm] = mx[2*lm] * q[lm-1] + mx[2*lm+1] * q[lm+1];
250void mul_ct_matrix(shtns_cfg shtns, double* mx)
251{
252 long int im,l,lm;
253 double a_1;
254
255 if (SHT_NORM == sht_schmidt) {
256 lm=0;
257 for (im=0; im<=MMAX; im++) {
258 double* al = alm_im(shtns,im);
259 long int m=im*MRES;
260 mx[2*lm] = 0.0;
261 a_1 = 1.0 / al[1];
262 l=m;
263 while(++l < LMAX) {
264 al+=2;
265 mx[2*lm+2] = a_1;
266 a_1 = 1.0 / al[1];
267 mx[2*lm+1] = -a_1*al[0]; // = -al[2*(lm+1)] / al[2*(lm+1)+1];
268 lm++;
269 }
270 if (l == LMAX) { // the last one needs to be computed.
271 mx[2*lm+2] = a_1;
272 mx[2*lm+1] = sqrt((l+m)*(l-m))/(2*l+1);
273 lm++;
274 }
275 mx[2*lm +1] = 0.0;
276 lm++;
277 }
278 } else {
279 lm=0;
280 for (im=0; im<=MMAX; im++) {
281 double* al = alm_im(shtns, im);
282 l=im*MRES;
283 mx[2*lm] = 0.0;
284 while(++l <= LMAX) {
285 a_1 = 1.0 / al[1];
286 mx[2*lm+1] = a_1; // specific to orthonormal.
287 mx[2*lm+2] = a_1;
288 lm++; al+=2;
289 }
290 mx[2*lm +1] = 0.0;
291 lm++;
292 }
293 }
294}
295
296/// fill mx with the coefficients of operator sin(theta).d/dtheta
297/// \param mx : an array of 2*NLM double that will be filled with the matrix coefficients.
298/// stdq[lm] = mx[2*lm] * q[lm-1] + mx[2*lm+1] * q[lm+1];
299void st_dt_matrix(shtns_cfg shtns, double* mx)
300{
301 mul_ct_matrix(shtns, mx);
302 for (int lm=0; lm<NLM; lm++) {
303 mx[2*lm] *= shtns->li[lm] - 1;
304 mx[2*lm+1] *= -(shtns->li[lm] + 2);
305 }
306}
307
308/// Multiplication of Qlm by a matrix involving l+1 and l-1 only.
309/// The result is stored in Rlm, which MUST be different from Qlm.
310/// mx is an array of 2*NLM values as returned by \ref mul_ct_matrix or \ref st_dt_matrix
311/// compute: Rlm[lm] = mx[2*lm] * Qlm[lm-1] + mx[2*lm+1] * Qlm[lm+1];
312void SH_mul_mx(shtns_cfg shtns, double* mx, cplx *Qlm, cplx *Rlm)
313{
314 long int nlmlim, lm;
315 v2d* vq = (v2d*) Qlm;
316 v2d* vr = (v2d*) Rlm;
317 nlmlim = NLM-1;
318 lm = 0;
319 s2d mxu = vdup(mx[1]);
320 vr[0] = mxu*vq[1];
321 for (lm=1; lm<nlmlim; lm++) {
322 s2d mxl = vdup(mx[2*lm]); s2d mxu = vdup(mx[2*lm+1]);
323 vr[lm] = mxl*vq[lm-1] + mxu*vq[lm+1];
324 }
325 lm = nlmlim;
326 s2d mxl = vdup(mx[2*lm]);
327 vr[lm] = mxl*vq[lm-1];
328}
329
330//@}
331
332// truncation at LMAX and MMAX
333#define LTR LMAX
334#define MTR MMAX
335
336/** \addtogroup local Local and partial evaluation of SH fields.
337 * These do only require a call to \ref shtns_create, but not to \ref shtns_set_grid.
338 * These functions are not optimized and can be relatively slow, but they provide good
339 * reference implemenation for the transforms.
340*/
341//@{
342
343/// Evaluate scalar SH representation \b Qlm at physical point defined by \b cost = cos(theta) and \b phi
344double SH_to_point(shtns_cfg shtns, cplx *Qlm, double cost, double phi)
345{
346 double yl[LMAX+1];
347 double vr0, vr1;
348 long int l,m,im;
349
350 vr0 = 0.0; vr1 = 0.0;
351 m=0; im=0;
352 legendre_sphPlm_array(shtns, LTR, im, cost, &yl[m]);
353 for (l=m; l<LTR; l+=2) {
354 vr0 += yl[l] * creal( Qlm[l] );
355 vr1 += yl[l+1] * creal( Qlm[l+1] );
356 }
357 if (l==LTR) {
358 vr0 += yl[l] * creal( Qlm[l] );
359 }
360 vr0 += vr1;
361 if (MTR>0) {
362 cplx eip, eimp;
363 eip = cos(phi*MRES) + I*sin(phi*MRES); eimp = 2.0;
364 im = 1; do {
365 m = im*MRES;
366 legendre_sphPlm_array(shtns, LTR, im, cost, &yl[m]);
367 v2d* Ql = (v2d*) &Qlm[LiM(shtns, 0,im)]; // virtual pointer for l=0 and im
368 v2d vrm0 = vdup(0.0); v2d vrm1 = vdup(0.0);
369 for (l=m; l<LTR; l+=2) {
370 vrm0 += vdup(yl[l]) * Ql[l];
371 vrm1 += vdup(yl[l+1]) * Ql[l+1];
372 }
373// eimp = 2.*(cos(m*phi) + I*sin(m*phi));
374 eimp *= eip; // not so accurate, but it should be enough for rendering uses.
375 vrm0 += vrm1;
376 if (l==LTR) {
377 vrm0 += vdup(yl[l]) * Ql[l];
378 }
379 vr0 += vcplx_real(vrm0)*creal(eimp) - vcplx_imag(vrm0)*cimag(eimp);
380 } while(++im <= MTR);
381 }
382 return vr0;
383}
384
385void SH_to_grad_point(shtns_cfg shtns, cplx *DrSlm, cplx *Slm, double cost, double phi,
386 double *gr, double *gt, double *gp)
387{
388 double yl[LMAX+1];
389 double dtyl[LMAX+1];
390 double vtt, vpp, vr0, vrm;
391 long int l,m,im;
392
393 const double sint = sqrt((1.-cost)*(1.+cost));
394 vtt = 0.; vpp = 0.; vr0 = 0.; vrm = 0.;
395 m=0; im=0;
396 legendre_sphPlm_deriv_array(shtns, LTR, im, cost, sint, &yl[m], &dtyl[m]);
397 for (l=m; l<=LTR; ++l) {
398 vr0 += yl[l] * creal( DrSlm[l] );
399 vtt += dtyl[l] * creal( Slm[l] );
400 }
401 if (MTR>0) {
402 cplx eip, eimp, imeimp;
403 eip = cos(phi*MRES) + I*sin(phi*MRES); eimp = 2.0;
404 im=1; do {
405 m = im*MRES;
406 legendre_sphPlm_deriv_array(shtns, LTR, im, cost, sint, &yl[m], &dtyl[m]);
407// eimp = 2.*(cos(m*phi) + I*sin(m*phi));
408 eimp *= eip; // not so accurate, but it should be enough for rendering uses.
409 imeimp = eimp*m*I;
410 l = LiM(shtns, 0,im);
411 v2d* Ql = (v2d*) &DrSlm[l]; v2d* Sl = (v2d*) &Slm[l];
412 v2d qm = vdup(0.0);
413 v2d dsdt = vdup(0.0); v2d dsdp = vdup(0.0);
414 for (l=m; l<=LTR; ++l) {
415 qm += vdup(yl[l]) * Ql[l];
416 dsdt += vdup(dtyl[l]) * Sl[l];
417 dsdp += vdup(yl[l]) * Sl[l];
418 }
419 vrm += vcplx_real(qm)*creal(eimp) - vcplx_imag(qm)*cimag(eimp); // dS/dr
420 vtt += vcplx_real(dsdt)*creal(eimp) - vcplx_imag(dsdt)*cimag(eimp); // dS/dt
421 vpp += vcplx_real(dsdp)*creal(imeimp) - vcplx_imag(dsdp)*cimag(imeimp); // + I.m/sint *S
422 } while (++im <= MTR);
423 vr0 += vrm*sint;
424 }
425 *gr = vr0; // Gr = dS/dr
426 *gt = vtt; // Gt = dS/dt
427 *gp = vpp; // Gp = I.m/sint *S
428}
429
430/// Evaluate vector SH representation \b Qlm at physical point defined by \b cost = cos(theta) and \b phi
431void SHqst_to_point(shtns_cfg shtns, cplx *Qlm, cplx *Slm, cplx *Tlm, double cost, double phi,
432 double *vr, double *vt, double *vp)
433{
434 double yl[LMAX+1];
435 double dtyl[LMAX+1];
436 double vtt, vpp, vr0, vrm;
437 long int l,m,im;
438
439 const double sint = sqrt((1.-cost)*(1.+cost));
440 vtt = 0.; vpp = 0.; vr0 = 0.; vrm = 0.;
441 m=0; im=0;
442 legendre_sphPlm_deriv_array(shtns, LTR, im, cost, sint, &yl[m], &dtyl[m]);
443 for (l=m; l<=LTR; ++l) {
444 vr0 += yl[l] * creal( Qlm[l] );
445 vtt += dtyl[l] * creal( Slm[l] );
446 vpp -= dtyl[l] * creal( Tlm[l] );
447 }
448 if (MTR>0) {
449 cplx eip, eimp, imeimp;
450 eip = cos(phi*MRES) + I*sin(phi*MRES); eimp = 2.0;
451 im=1; do {
452 m = im*MRES;
453 legendre_sphPlm_deriv_array(shtns, LTR, im, cost, sint, &yl[m], &dtyl[m]);
454// eimp = 2.*(cos(m*phi) + I*sin(m*phi));
455 eimp *= eip; // not so accurate, but it should be enough for rendering uses.
456 imeimp = eimp*m*I;
457 l = LiM(shtns, 0,im);
458 v2d* Ql = (v2d*) &Qlm[l]; v2d* Sl = (v2d*) &Slm[l]; v2d* Tl = (v2d*) &Tlm[l];
459 v2d qm = vdup(0.0);
460 v2d dsdt = vdup(0.0); v2d dtdt = vdup(0.0);
461 v2d dsdp = vdup(0.0); v2d dtdp = vdup(0.0);
462 for (l=m; l<=LTR; ++l) {
463 qm += vdup(yl[l]) * Ql[l];
464 dsdt += vdup(dtyl[l]) * Sl[l];
465 dtdt += vdup(dtyl[l]) * Tl[l];
466 dsdp += vdup(yl[l]) * Sl[l];
467 dtdp += vdup(yl[l]) * Tl[l];
468 }
469 vrm += vcplx_real(qm)*creal(eimp) - vcplx_imag(qm)*cimag(eimp);
470 vtt += (vcplx_real(dtdp)*creal(imeimp) - vcplx_imag(dtdp)*cimag(imeimp)) // + I.m/sint *T
471 + (vcplx_real(dsdt)*creal(eimp) - vcplx_imag(dsdt)*cimag(eimp)); // + dS/dt
472 vpp += (vcplx_real(dsdp)*creal(imeimp) - vcplx_imag(dsdp)*cimag(imeimp)) // + I.m/sint *S
473 - (vcplx_real(dtdt)*creal(eimp) - vcplx_imag(dtdt)*cimag(eimp)); // - dT/dt
474 } while (++im <= MTR);
475 vr0 += vrm*sint;
476 }
477 *vr = vr0;
478 *vt = vtt; // Bt = I.m/sint *T + dS/dt
479 *vp = vpp; // Bp = I.m/sint *S - dT/dt
480}
481//@}
482
483#undef LTR
484#undef MTR
485
486
487
488/*
489 SYNTHESIS AT A GIVEN LATITUDE
490 (does not require a previous call to shtns_set_grid)
491*/
492
493fftw_plan ifft_lat = NULL; ///< fftw plan for SHqst_to_lat
494int nphi_lat = 0; ///< nphi of previous SHqst_to_lat
495double* ylm_lat = NULL;
496double* dylm_lat;
497double ct_lat = 2.0;
498double st_lat;
499
500/// synthesis at a given latitude, on nphi equispaced longitude points.
501/// vr, vt, and vp arrays must have nphi+2 doubles allocated (fftw requirement).
502/// It does not require a previous call to shtns_set_grid, but it is NOT thread-safe.
503/// \ingroup local
504void SHqst_to_lat(shtns_cfg shtns, cplx *Qlm, cplx *Slm, cplx *Tlm, double cost,
505 double *vr, double *vt, double *vp, int nphi, int ltr, int mtr)
506{
507 cplx vst, vtt, vsp, vtp, vrr;
508 cplx *vrc, *vtc, *vpc;
509 long int m, l, j;
510
511 if (ltr > LMAX) ltr=LMAX;
512 if (mtr > MMAX) mtr=MMAX;
513 if (mtr*MRES > ltr) mtr=ltr/MRES;
514 if (mtr*2*MRES >= nphi) mtr = (nphi-1)/(2*MRES);
515
516 vrc = (cplx *) vr;
517 vtc = (cplx *) vt;
518 vpc = (cplx *) vp;
519
520 if ((nphi != nphi_lat)||(ifft_lat == NULL)) {
521 if (ifft_lat != NULL) fftw_destroy_plan(ifft_lat);
522 #ifdef OMP_FFTW
523 fftw_plan_with_nthreads(1);
524 #endif
525 ifft_lat = fftw_plan_dft_c2r_1d(nphi, vrc, vr, FFTW_ESTIMATE);
526 nphi_lat = nphi;
527 }
528 if (ylm_lat == NULL) {
529 ylm_lat = (double *) malloc(sizeof(double)* NLM*2);
530 dylm_lat = ylm_lat + NLM;
531 }
532 if (cost != ct_lat) { // don't recompute if same latitude (ie equatorial disc rendering)
533 st_lat = sqrt((1.-cost)*(1.+cost)); // sin(theta)
534 for (m=0,j=0; m<=mtr; ++m) {
535 legendre_sphPlm_deriv_array(shtns, ltr, m, cost, st_lat, &ylm_lat[j], &dylm_lat[j]);
536 j += LMAX -m*MRES +1;
537 }
538 }
539
540 for (m = 0; m<nphi/2+1; ++m) { // init with zeros
541 vrc[m] = 0.0; vtc[m] = 0.0; vpc[m] = 0.0;
542 }
543 j=0;
544 m=0;
545 vrr=0; vtt=0; vst=0;
546 for(l=m; l<=ltr; ++l, ++j) {
547 vrr += ylm_lat[j] * creal(Qlm[j]);
548 vst += dylm_lat[j] * creal(Slm[j]);
549 vtt += dylm_lat[j] * creal(Tlm[j]);
550 }
551 j += (LMAX-ltr);
552 vrc[m] = vrr;
553 vtc[m] = vst; // Vt = dS/dt
554 vpc[m] = -vtt; // Vp = - dT/dt
555 for (m=MRES; m<=mtr*MRES; m+=MRES) {
556 vrr=0; vtt=0; vst=0; vsp=0; vtp=0;
557 for(l=m; l<=ltr; ++l, ++j) {
558 vrr += ylm_lat[j] * Qlm[j];
559 vst += dylm_lat[j] * Slm[j];
560 vtt += dylm_lat[j] * Tlm[j];
561 vsp += ylm_lat[j] * Slm[j];
562 vtp += ylm_lat[j] * Tlm[j];
563 }
564 j+=(LMAX-ltr);
565 vrc[m] = vrr*st_lat;
566 vtc[m] = I*m*vtp + vst; // Vt = I.m/sint *T + dS/dt
567 vpc[m] = I*m*vsp - vtt; // Vp = I.m/sint *S - dT/dt
568 }
569 fftw_execute_dft_c2r(ifft_lat,vrc,vr);
570 fftw_execute_dft_c2r(ifft_lat,vtc,vt);
571 fftw_execute_dft_c2r(ifft_lat,vpc,vp);
572// free(ylm_lat);
573}
574
575/// synthesis at a given latitude, on nphi equispaced longitude points.
576/// vr arrays must have nphi+2 doubles allocated (fftw requirement).
577/// It does not require a previous call to shtns_set_grid, but it is NOT thread-safe.
578/// \ingroup local
579void SH_to_lat(shtns_cfg shtns, cplx *Qlm, double cost,
580 double *vr, int nphi, int ltr, int mtr)
581{
582 cplx vrr;
583 cplx *vrc;
584 long int m, l, j;
585
586 if (ltr > LMAX) ltr=LMAX;
587 if (mtr > MMAX) mtr=MMAX;
588 if (mtr*MRES > ltr) mtr=ltr/MRES;
589 if (mtr*2*MRES >= nphi) mtr = (nphi-1)/(2*MRES);
590
591 vrc = (cplx *) vr;
592
593 if ((nphi != nphi_lat)||(ifft_lat == NULL)) {
594 if (ifft_lat != NULL) fftw_destroy_plan(ifft_lat);
595 #ifdef OMP_FFTW
596 fftw_plan_with_nthreads(1);
597 #endif
598 ifft_lat = fftw_plan_dft_c2r_1d(nphi, vrc, vr, FFTW_ESTIMATE);
599 nphi_lat = nphi;
600 }
601 if (ylm_lat == NULL) {
602 ylm_lat = (double *) malloc(sizeof(double)* NLM*2);
603 dylm_lat = ylm_lat + NLM;
604 }
605 if (cost != ct_lat) { // don't recompute if same latitude (ie equatorial disc rendering)
606 st_lat = sqrt((1.-cost)*(1.+cost)); // sin(theta)
607 for (m=0,j=0; m<=mtr; ++m) {
608 legendre_sphPlm_deriv_array(shtns, ltr, m, cost, st_lat, &ylm_lat[j], &dylm_lat[j]);
609 j += LMAX -m*MRES +1;
610 }
611 }
612
613 for (m = 0; m<nphi/2+1; ++m) { // init with zeros
614 vrc[m] = 0.0;
615 }
616 j=0;
617 m=0;
618 vrr=0;
619 for(l=m; l<=ltr; ++l, ++j) {
620 vrr += ylm_lat[j] * creal(Qlm[j]);
621 }
622 j += (LMAX-ltr);
623 vrc[m] = vrr;
624 for (m=MRES; m<=mtr*MRES; m+=MRES) {
625 vrr=0;
626 for(l=m; l<=ltr; ++l, ++j) {
627 vrr += ylm_lat[j] * Qlm[j];
628 }
629 j+=(LMAX-ltr);
630 vrc[m] = vrr*st_lat;
631 }
632 fftw_execute_dft_c2r(ifft_lat,vrc,vr);
633// free(ylm_lat);
634}
635
636
637/// complex scalar transform.
638/// in: complex spatial field.
639/// out: alm[l*(l+1)+m] is the SH coefficients of order l and degree m (with -l <= m <= l)
640/// for a total of (LMAX+1)^2 coefficients.
641void spat_cplx_to_SH(shtns_cfg shtns, cplx *z, cplx *alm)
642{
643 long int nspat = shtns->nspat;
644 double *re, *im;
645 cplx *rlm, *ilm;
646
647 if (MMAX != LMAX) shtns_runerr("complex SH requires lmax=mmax and mres=1.");
648
649 // alloc temporary fields
650 re = (double*) VMALLOC( 2*(nspat + NLM*2)*sizeof(double) );
651 im = re + nspat;
652 rlm = (cplx*) (re + 2*nspat);
653 ilm = rlm + NLM;
654
655 // split z into real and imag parts.
656 for (int k=0; k<nspat; k++) {
657 re[k] = creal(z[k]); im[k] = cimag(z[k]);
658 }
659
660 // perform two real transforms:
661 spat_to_SH(shtns, re, rlm);
662 spat_to_SH(shtns, im, ilm);
663
664 // combine into complex coefficients
665 int ll = 0;
666 int lm = 0;
667 for (int l=0; l<=LMAX; l++) {
668 ll += 2*l; // ll = l*(l+1)
669 alm[ll] = creal(rlm[lm]) + I*creal(ilm[lm]); // m=0
670 lm++;
671 }
672 for (int m=1; m<=MMAX; m++) {
673 ll = (m-1)*m;
674 for (int l=m; l<=LMAX; l++) {
675 ll += 2*l; // ll = l*(l+1)
676 cplx rr = rlm[lm];
677 cplx ii = ilm[lm];
678 alm[ll+m] = rr + I*ii; // m>0
679 rr = conj(rr) + I*conj(ii); // m<0, m even
680 if (m&1) rr = -rr; // m<0, m odd
681 alm[ll-m] = rr;
682 lm++;
683 }
684 }
685
686 VFREE(re);
687}
688
689/// complex scalar transform.
690/// in: alm[l*(l+1)+m] is the SH coefficients of order l and degree m (with -l <= m <= l)
691/// for a total of (LMAX+1)^2 coefficients.
692/// out: complex spatial field.
693void SH_to_spat_cplx(shtns_cfg shtns, cplx *alm, cplx *z)
694{
695 long int nspat = shtns->nspat;
696 double *re, *im;
697 cplx *rlm, *ilm;
698
699 if (MMAX != LMAX) shtns_runerr("complex SH requires lmax=mmax and mres=1.");
700
701 // alloc temporary fields
702 re = (double*) VMALLOC( 2*(nspat + NLM*2)*sizeof(double) );
703 im = re + nspat;
704 rlm = (cplx*) (re + 2*nspat);
705 ilm = rlm + NLM;
706
707 // extract complex coefficients corresponding to real and imag
708 int ll = 0;
709 int lm = 0;
710 for (int l=0; l<=LMAX; l++) {
711 ll += 2*l; // ll = l*(l+1)
712 rlm[lm] = creal(alm[ll]); // m=0
713 ilm[lm] = cimag(alm[ll]);
714 lm++;
715 }
716 double half_parity = 0.5;
717 for (int m=1; m<=MMAX; m++) {
718 ll = (m-1)*m;
719 half_parity = -half_parity; // (-1)^m * 0.5
720 for (int l=m; l<=LMAX; l++) {
721 ll += 2*l; // ll = l*(l+1)
722 cplx b = alm[ll-m] * half_parity; // (-1)^m for m negative.
723 cplx a = alm[ll+m] * 0.5;
724 rlm[lm] = (conj(b) + a); // real part
725 ilm[lm] = (conj(b) - a)*I; // imag part
726 lm++;
727 }
728 }
729
730 // perform two real transforms:
731 SH_to_spat(shtns, rlm, re);
732 SH_to_spat(shtns, ilm, im);
733
734 // combine into z
735 for (int k=0; k<nspat; k++)
736 z[k] = re[k] + I*im[k];
737
738 VFREE(re);
739}
740
741/*
742void SH_to_spat_grad(shtns_cfg shtns, cplx *alm, double *gt, double *gp)
743{
744 double *mx;
745 cplx *blm, *clm;
746
747 blm = (cplx*) VMALLOC( 3*NLM*sizeof(cplx) );
748 clm = blm + NLM;
749 mx = (double*)(clm + NLM);
750
751 st_dt_matrix(shtns, mx);
752 SH_mul_mx(shtns, mx, alm, blm);
753 int lm=0;
754 for (int im=0; im<=MMAX; im++) {
755 int m = im*MRES;
756 for (int l=m; l<=LMAX; l++) {
757 clm[lm] = alm[lm] * I*m;
758 lm++;
759 }
760 }
761 SH_to_spat(shtns, blm, gt);
762 SH_to_spat(shtns, clm, gp);
763 for (int ip=0; ip<NPHI; ip++) {
764 for (int it=0; it<NLAT; it++) {
765 gt[ip*NLAT+it] /= shtns->st[it];
766 gp[ip*NLAT+it] /= shtns->st[it];
767 }
768 }
769 VFREE(blm);
770}
771*/
Note: See TracBrowser for help on using the repository browser.