Context Navigation

hyb_spat_to_SH.gen.c

main

Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 16.4 KB

Line
1	/*
2	* Copyright (c) 2010-2015 Centre National de la Recherche Scientifique.
3	* written by Nathanael Schaeffer (CNRS, ISTerre, Grenoble, France).
4	*
5	* nathanael.schaeffer@ujf-grenoble.fr
6	*
7	* This software is governed by the CeCILL license under French law and
8	* abiding by the rules of distribution of free software. You can use,
9	* modify and/or redistribute the software under the terms of the CeCILL
10	* license as circulated by CEA, CNRS and INRIA at the following URL
11	* "http://www.cecill.info".
12	*
13	* The fact that you are presently reading this means that you have had
14	* knowledge of the CeCILL license and that you accept its terms.
15	*
16	*/
17
18	# This file is meta-code for SHT.c (spherical harmonic transform).
19	# it is intended for "make" to generate C code for 3 similar SHT functions,
20	# (namely spat_to_SH [Q tag]), spat_to_SHsphtor [V tag], spat_to_SH3 [both Q&V tags])
21	# from one generic function + tags.
22	# Basically, there are tags at the beginning of lines (Q,V) that are information
23	# to keep or remove the line depending on the function to build. (Q for scalar, V for vector, # for comment)
24	#
25	//////////////////////////////////////////////////
26	#ifdef SHT_AXISYM
27	/// The spatial field is assumed to be \b axisymmetric (spatial size NLAT), and only the m=0 harmonics are written to output.
28	#endif
29
30	/// Truncation and spatial discretization are defined by \ref shtns_create and \ref shtns_set_grid_*
31	/// \param[in] shtns = a configuration created by \ref shtns_create with a grid set by shtns_set_grid_*
32	Q/// \param[in] Vr = spatial scalar field : double array.
33	V/// \param[in] Vt, Vp = spatial (theta, phi) vector components : double arrays.
34	Q/// \param[out] Qlm = spherical harmonics coefficients :
35	Q/// complex double arrays of size shtns->nlm.
36	V/// \param[out] Slm,Tlm = spherical harmonics coefficients of \b Spheroidal and \b Toroidal scalars :
37	V/// complex double arrays of size shtns->nlm.
38	#ifdef SHT_VAR_LTR
39	/// \param[in] llim = specify maximum degree of spherical harmonic. llim must be at most shtns->lmax, and all spherical harmonic degree higher than llim are set to zero.
40	#endif
41
42	QX static void GEN3(spat_to_SH_,ID_NME,SUFFIX)(shtns_cfg shtns, double Vr, cplx Qlm, long int llim) {
43	3 static void GEN3(spat_to_SHqst_,ID_NME,SUFFIX)(shtns_cfg shtns, double Vr, double Vt, double Vp, cplx Qlm, cplx Slm, cplx Tlm, long int llim) {
44	#ifndef SHT_GRAD
45	VX static void GEN3(spat_to_SHsphtor_,ID_NME,SUFFIX)(shtns_cfg shtns, double Vt, double Vp, cplx Slm, cplx Tlm, long int llim) {
46	#else
47	S static void GEN3(spat_to_SHsph_,ID_NME,SUFFIX)(shtns_cfg shtns, double Vt, cplx Slm, long int llim) {
48	T static void GEN3(spat_to_SHtor_,ID_NME,SUFFIX)(shtns_cfg shtns, double Vp, cplx Tlm, long int llim) {
49	#endif
50
51	Q double *zl;
52	V double *dzl0;
53	V struct DtDp *dzl;
54	long int i,i0, ni,l;
55	#ifndef SHT_AXISYM
56	unsigned im, imlim;
57	Q cplx *BrF; // contains the Fourier transformed data
58	V cplx BtF, BpF; // contains the Fourier transformed data
59	Q v2d reo[2*NLAT_2]; // symmetric (even) and anti-symmetric (odd) parts, interleaved.
60	V v2d tpeo[4*NLAT_2]; // theta and phi even and odd parts
61	Q #define reo0 ((double*)reo)
62	V #define tpeo0 ((double*)tpeo)
63	V #define te0(i) tpeo0[4*(i)]
64	V #define to0(i) tpeo0[4*(i)+1]
65	V #define pe0(i) tpeo0[4*(i)+2]
66	V #define po0(i) tpeo0[4*(i)+3]
67	V #define vteo0(i) tpeo[2*(i)]
68	V #define vpeo0(i) tpeo[2*(i)+1]
69	#else
70	Q double reo0[2*NLAT_2+2] SSE; // symmetric (even) and anti-symmetric (odd) parts, interleaved.
71	S double teo0[2*NLAT_2+2] SSE;
72	T double peo0[2*NLAT_2+2] SSE;
73	S #define te0(i) teo0[2*(i)]
74	S #define to0(i) teo0[2*(i)+1]
75	T #define pe0(i) peo0[2*(i)]
76	T #define po0(i) peo0[2*(i)+1]
77	S #define vteo0(i) ((s2d*) teo0)[i]
78	T #define vpeo0(i) ((s2d*) peo0)[i]
79	#endif
80
81	QX #define ZL(i) vdup(zl[i])
82	3 #define ZL(i) vdup(dzl[i].p)
83
84	// defines how to access even and odd parts of data
85	Q #define re(i) reo[2*(i)]
86	Q #define ro(i) reo[2*(i)+1]
87	V #define te(i) tpeo[4*(i)]
88	V #define to(i) tpeo[4*(i)+1]
89	V #define pe(i) tpeo[4*(i)+2]
90	V #define po(i) tpeo[4*(i)+3]
91	Q #define re0(i) reo0[2*(i)+1]
92	Q #define ro0(i) reo0[2*(i)]
93
94	#ifndef SHT_AXISYM
95	Q BrF = (cplx *) Vr;
96	S BtF = (cplx *) Vt;
97	T BpF = (cplx *) Vp;
98	if (shtns->ncplx_fft >= 0) {
99	if (shtns->ncplx_fft > 0) { // alloc memory for the FFT
100	QX BrF = VMALLOC( shtns->ncplx_fft * sizeof(cplx) );
101	VX BtF = VMALLOC( 2* shtns->ncplx_fft * sizeof(cplx) );
102	VX BpF = BtF + shtns->ncplx_fft;
103	3 BrF = VMALLOC( 3* shtns->ncplx_fft * sizeof(cplx) );
104	3 BtF = BrF + shtns->ncplx_fft; BpF = BtF + shtns->ncplx_fft;
105	}
106	Q fftw_execute_dft_r2c(shtns->fft,Vr, BrF);
107	V fftw_execute_dft_r2c(shtns->fft,Vt, BtF);
108	V fftw_execute_dft_r2c(shtns->fft,Vp, BpF);
109	}
110	imlim = MTR;
111	#ifdef SHT_VAR_LTR
112	if (imlim*MRES > (unsigned) llim) imlim = ((unsigned) llim)/MRES; // 32bit mul and div should be faster
113	#endif
114	#endif
115
116	ni = NLAT_2; // copy NLAT_2 to a local variable for faster access (inner loop limit)
117	// im = 0; // dzl.p = 0.0 : and evrything is REAL
118	#ifndef SHT_NO_DCT
119	V double* st_1 = shtns->st_1;
120	#ifndef SHT_AXISYM
121	Q #define BR0 ((double *)reo)
122	V #define BT0 ((double *)tpeo)
123	V #define BP0 ((double *)tpeo + NLAT)
124	V i=0; do { // we assume NPHI>1 (else SHT_AXISYM should be defined).
125	V double sin_1 = st_1[i];
126	V ((double )BtF)[i2] = sin_1; ((double )BpF)[i2] = sin_1;
127	V } while (++i<NLAT);
128	Q fftw_execute_r2r(shtns->dct_m0,(double *) BrF, BR0); // DCT out-of-place.
129	V fftw_execute_r2r(shtns->dct_m0,(double *) BtF, BT0); // DCT out-of-place.
130	V fftw_execute_r2r(shtns->dct_m0,(double *) BpF, BP0); // DCT out-of-place.
131	#else
132	Q #define BR0 reo0
133	S #define BT0 teo0
134	T #define BP0 peo0
135	V i=0; do {
136	V #ifdef _GCC_VEC_
137	V s2d sin_1 = ((s2d *)st_1)[i];
138	S ((s2d) Vt)[i] = sin_1;
139	T ((s2d) Vp)[i] = sin_1;
140	V #else
141	V double sin_1 = st_1[2i]; double sin_2 = st_1[2i+1];
142	S Vt[2i] = sin_1; Vt[2i+1] = sin_2;
143	T Vp[2i] = sin_1; Vp[2i+1] = sin_2;
144	V #endif
145	V } while (++i<ni);
146	Q fftw_execute_r2r(shtns->dct_m0,Vr, BR0); // DCT out-of-place.
147	S fftw_execute_r2r(shtns->dct_m0,Vt, BT0); // DCT out-of-place.
148	T fftw_execute_r2r(shtns->dct_m0,Vp, BP0); // DCT out-of-place.
149	#endif
150	l=0;
151	long int klim = shtns->klim;
152	#ifdef SHT_VAR_LTR
153	i = (llim * SHT_NL_ORDER) + 2; // sum truncation
154	if (i < klim) klim = i;
155	#endif
156	Q v2d* Ql = (v2d*) Qlm;
157	S v2d* Sl = (v2d*) Slm;
158	T v2d* Tl = (v2d*) Tlm;
159	Q zl = shtns->zlm_dct0;
160	V dzl0 = shtns->dzlm_dct0;
161	V #ifndef _GCC_VEC_
162	S cplx s1 = 0.0; // l=0 : Sl = 0
163	T cplx t1 = 0.0; // l=0 : Tl = 0
164	V #else
165	S v2d s = vdup(0.0); // l=0 : Sl = 0
166	T v2d t = vdup(0.0); // l=0 : Tl = 0
167	V #endif
168	QX BR0[klim] = 0; BR0[klim+1] = 0; // allow some overflow.
169	#ifdef SHT_VAR_LTR
170	while(l < llim) {
171	#else
172	do { // l < LMAX
173	#endif
174	i=l; // l < klim
175	#ifndef _GCC_VEC_
176	S Sl[l] = s1;
177	T Tl[l] = t1;
178	Q cplx q0 = 0.0; cplx q1 = 0.0;
179	S cplx s0 = 0.0; s1 = 0.0;
180	T cplx t0 = 0.0; t1 = 0.0;
181	do {
182	Q q0 += BR0[i] * zl[i];
183	Q q1 += BR0[i+1] * zl[i+1];
184	S s0 += BT0[i] * dzl0[i];
185	T t0 -= BP0[i] * dzl0[i];
186	S s1 += BT0[i+1] * dzl0[i+1];
187	T t1 -= BP0[i+1] * dzl0[i+1];
188	i+=2;
189	} while(i<klim);
190	Q Ql[l] = q0; Ql[l+1] = q1;
191	S Sl[l+1] = s0;
192	T Tl[l+1] = t0;
193	#else
194	S Sl[l] = vhi_to_cplx(s);
195	T Tl[l] = vhi_to_cplx(t);
196	S s = vdup(0.0);
197	T t = vdup(0.0);
198	Q s2d q = vdup(0.0);
199	QX s2d q1 = vdup(0.0);
200	i >>= 1; // i = i/2
201	do {
202	Q q += ((s2d) zl)[i] ((s2d*) BR0)[i];
203	S s += ((s2d) dzl0)[i] ((s2d*) BT0)[i];
204	T t -= ((s2d) dzl0)[i] ((s2d*) BP0)[i];
205	++i;
206	QX q1 += ((s2d) zl)[i] ((s2d*) BR0)[i];
207	QX ++i;
208	} while(2*i < klim);
209	QX q += q1;
210	Q Ql[l] = vlo_to_cplx(q); Ql[l+1] = vhi_to_cplx(q);
211	S Sl[l+1] = vlo_to_cplx(s);
212	T Tl[l+1] = vlo_to_cplx(t);
213	#endif
214	l+=2;
215	Q zl += (shtns->klim - l);
216	V dzl0 += (shtns->klim -l);
217	#ifndef SHT_VAR_LTR
218	} while(l<llim);
219	#else
220	}
221	#endif
222	if (l == llim) {
223	V #ifndef _GCC_VEC_
224	S Sl[l] = s1;
225	T Tl[l] = t1;
226	V #else
227	S ((v2d*) Sl)[l] = vhi_to_cplx(s);
228	T ((v2d*) Tl)[l] = vhi_to_cplx(t);
229	V #endif
230	Q cplx q0 = 0.0;
231	Q i=l; // l < klim
232	Q do {
233	Q q0 += BR0[i] * zl[i];
234	Q i+=2;
235	Q } while(i<klim);
236	Q ((cplx *) Ql)[l] = q0;
237	++l;
238	}
239	Q #undef BR0
240	S #undef BT0
241	T #undef BP0
242	#ifdef SHT_VAR_LTR
243	while( l<=LMAX ) {
244	Q Ql[l] = vdup(0.0);
245	S Sl[l] = vdup(0.0);
246	T Tl[l] = vdup(0.0);
247	++l;
248	}
249	#endif
250	#else // ifndef SHT_NO_DCT
251	i=0;
252	Q zl = shtns->zlm[0];
253	// stride of source data : we assume NPHI>1 (else SHT_AXISYM should be defined).
254	#ifndef SHT_AXISYM
255	Q #define BR0(i) ((double)BrF)[(i)2]
256	S #define BT0(i) ((double)BtF)[(i)2]
257	T #define BP0(i) ((double)BpF)[(i)2]
258	#else
259	Q #define BR0(i) Vr[i]
260	S #define BT0(i) Vt[i]
261	T #define BP0(i) Vp[i]
262	#endif
263	#if _GCC_VEC_ && __SSE3__
264	Q s2d r0v = vdup(0.0);
265	do { // compute symmetric and antisymmetric parts.
266	Q s2d a = vdup(BR0(i)); s2d b = vdup(BR0(NLAT-1-i));
267	QX s2d g = vdup(BR0(i+1)); s2d h = vdup(BR0(NLAT-2-i));
268	Q a = subadd(a,b);
269	QX g = subadd(g,h);
270	Q ((s2d*) reo0)[i] = a; // assume odd is first, then even.
271	3 r0v += vdup(zl[i]) * a; // even part is used.
272	QX ((s2d*) reo0)[i+1] = g; // assume odd is first, then even.
273	QX a = _mm_unpackhi_pd(a, g);
274	QX r0v += ((s2d)(zl+i)) * a; // even part is used, reduce data dependency
275	S s2d c = vdup(BT0(i)); s2d d = vdup(BT0(NLAT-1-i));
276	S c = subadd(c,d); vteo0(i) = vxchg(c);
277	T s2d e = vdup(BP0(i)); s2d f = vdup(BP0(NLAT-1-i));
278	T e = subadd(e,f); vpeo0(i) = vxchg(e);
279	V ++i;
280	QX i+=2;
281	} while(i<ni);
282	QX r0v += vxchg(r0v);
283	QX ((s2d*) reo0)[ni] = vdup(0.0); // allow some overflow.
284	Q ((v2d*)Qlm)[0] = vhi_to_cplx(r0v);
285	#else
286	Q double r0 = 0.0;
287	QX double r1 = 0.0;
288	do { // compute symmetric and antisymmetric parts.
289	Q double a = BR0(i); double b = BR0(NLAT-1-i);
290	Q ro0(i) = (a-b); re0(i) = (a+b);
291	Q r0 += zl[i] * (a+b);
292	S double c = BT0(i); double d = BT0(NLAT-1-i);
293	S te0(i) = (c+d); to0(i) = (c-d);
294	T double e = BP0(i); double f = BP0(NLAT-1-i);
295	T pe0(i) = (e+f); po0(i) = (e-f);
296	} while(++i<ni);
297	QX r0 += r1;
298	QX ro0(ni) = 0.0; re0(ni) = 0.0; // allow some overflow.
299	Q Qlm[0] = r0;
300	#endif
301	Q #undef BR0
302	S #undef BT0
303	T #undef BP0
304	Q zl += ni + (ni&1); // SSE alignement
305	l=1; // l=0 is zero for the vector transform.
306	Q v2d* Ql = (v2d*) Qlm; // virtual pointer for l=0 and im
307	S v2d* Sl = (v2d*) Slm; // virtual pointer for l=0 and im
308	T v2d* Tl = (v2d*) Tlm; // virtual pointer for l=0 and im
309	V dzl0 = (double *) shtns->dzlm[0]; // only theta derivative (d/dphi = 0 for m=0)
310	S Sl[0] = vdup(0.0); // l=0 is zero for the vector transform.
311	T Tl[0] = vdup(0.0); // l=0 is zero for the vector transform.
312	#ifdef SHT_VAR_LTR
313	while (l<llim) { // ops : NLAT/2 * (2*(LMAX-m+1) + 4) : almost twice as fast.
314	#else
315	do {
316	#endif
317	i=0;
318	#ifndef _GCC_VEC_
319	Q double q0 = 0.0;
320	Q double q1 = 0.0;
321	S double s0 = 0.0; double s1 = 0.0;
322	T double t0 = 0.0; double t1 = 0.0;
323	do {
324	Q q0 += zl[0] * ro0(i); // Qlm[LiM(l,im)] += zlm[im][(l-m)NLAT/2 + i] fp[i];
325	Q q1 += zl[1] * re0(i); // Qlm[LiM(l+1,im)] += zlm[im][(l+1-m)NLAT/2 + i] fm[i];
326	S s0 += dzl0[0] * te0(i);
327	T t0 -= dzl0[0] * pe0(i);
328	S s1 += dzl0[1] * to0(i);
329	T t1 -= dzl0[1] * po0(i);
330	Q zl +=2;
331	V dzl0 +=2;
332	} while(++i < ni);
333	Q Ql[l] = q0;
334	Q Ql[l+1] = q1;
335	S Sl[l] = s0; Sl[l+1] = s1;
336	T Tl[l] = t0; Tl[l+1] = t1;
337	#else
338	S s2d s = vdup(0.0);
339	T s2d t = vdup(0.0);
340	Q s2d q = vdup(0.0);
341	QX s2d q1 = vdup(0.0);
342	do {
343	S s += ((s2d) dzl0)[i] vteo0(i);
344	T t -= ((s2d) dzl0)[i] vpeo0(i);
345	Q q += ((s2d) zl)[i] ((s2d*) reo0)[i];
346	++i;
347	QX q1 += ((s2d) zl)[i] ((s2d*) reo0)[i]; // reduce dependency
348	QX ++i;
349	} while(i < ni);
350	QX q += q1;
351	Q zl += 2*ni;
352	V dzl0 += 2*ni;
353	Q Ql[l] = vlo_to_cplx(q); Ql[l+1] = vhi_to_cplx(q);
354	S Sl[l] = vlo_to_cplx(s); Sl[l+1] = vhi_to_cplx(s);
355	T Tl[l] = vlo_to_cplx(t); Tl[l+1] = vhi_to_cplx(t);
356	#endif
357	l+=2;
358	#ifndef SHT_VAR_LTR
359	} while (l<llim);
360	#else
361	}
362	#endif
363	if (l==llim) {
364	long int lstride=1;
365	#ifdef SHT_VAR_LTR
366	if (l != LMAX) lstride=2;
367	#endif
368	QX double q1 = 0.0; // reduce dependency by unrolling loop
369	Q double q0 = 0.0;
370	S double s0 = 0.0;
371	T double t0 = 0.0;
372	i=0; do {
373	Q q0 += zl[0] * ro0(i); // Qlm[LiM(l,im)] += zlm[im][(l-m)NLAT/2 + i] fp[i];
374	S s0 += dzl0[0] * te0(i);
375	T t0 -= dzl0[0] * pe0(i);
376	Q zl += lstride;
377	V dzl0 += lstride;
378	++i;
379	QX q1 += zl[0] * ro0(i);
380	QX zl += lstride;
381	QX ++i;
382	} while(i<ni);
383	QX q0 += q1;
384	Q ((cplx *)Ql)[l] = q0;
385	S ((cplx *)Sl)[l] = s0;
386	T ((cplx *)Tl)[l] = t0;
387	#ifdef SHT_VAR_LTR
388	++l;
389	}
390	while( l<=LMAX ) {
391	Q Ql[l] = vdup(0.0);
392	S Sl[l] = vdup(0.0);
393	T Tl[l] = vdup(0.0);
394	++l;
395	#endif
396	}
397	#endif // ifndef SHT_NO_DCT
398	#ifndef SHT_AXISYM
399	for (im=1;im<=imlim;++im) {
400	Q BrF += NLAT;
401	V BtF += NLAT; BpF += NLAT;
402	i0 = shtns->tm[im];
403	i=i0;
404	do { // compute symmetric and antisymmetric parts.
405	3 s2d sin = vdup(shtns->st[i]);
406	3 v2d q0 = ((v2d )BrF)[i]; v2d q1 = ((v2d )BrF)[NLAT-1-i]; re(i) = (q0+q1)sin; ro(i) = (q0-q1)sin;
407	QX v2d q0 = ((v2d )BrF)[i]; v2d q1 = ((v2d )BrF)[NLAT-1-i]; re(i) = q0+q1; ro(i) = q0-q1;
408	V v2d t0 = ((v2d )BtF)[i]; v2d t1 = ((v2d )BtF)[NLAT-1-i]; te(i) = t0+t1; to(i) = t0-t1;
409	V v2d s0 = ((v2d )BpF)[i]; v2d s1 = ((v2d )BpF)[NLAT-1-i]; pe(i) = s0+s1; po(i) = s0-s1;
410	} while (++i<ni);
411	l = LiM(shtns, 0,im);
412	Q v2d* Ql = (v2d*) &Qlm[l]; // virtual pointer for l=0 and im
413	V v2d* Sl = (v2d) &Slm[l]; v2d Tl = (v2d*) &Tlm[l];
414	l=im*MRES;
415	3 double m_1 = 1.0/l;
416	Q zl = shtns->zlm[im];
417	V dzl = shtns->dzlm[im];
418	while (l<llim) { // ops : NLAT/2 * (2*(LMAX-m+1) + 4) : almost twice as fast.
419	Q v2d q0 = vdup(0.0);
420	Q v2d q1 = vdup(0.0);
421	V v2d s0 = vdup(0.0); v2d t1 = vdup(0.0); v2d s0i = vdup(0.0); v2d t1i = vdup(0.0);
422	V v2d t0 = vdup(0.0); v2d s1 = vdup(0.0); v2d t0i = vdup(0.0); v2d s1i = vdup(0.0);
423	i=i0; do { // tm[im] : polar optimization
424	V s0 += vdup(dzl[0].t) *to(i); // ref: these E. Dormy p 72.
425	V t0 -= vdup(dzl[0].t) *po(i);
426	V s1 += vdup(dzl[1].t) *te(i);
427	V t1 -= vdup(dzl[1].t) *pe(i);
428	V s0i -= vdup(dzl[0].p) *pe(i);
429	V t0i -= vdup(dzl[0].p) *te(i);
430	V s1i -= vdup(dzl[1].p) *po(i);
431	V t1i -= vdup(dzl[1].p) *to(i);
432	Q q0 += re(i) * ZL(0); // Qlm[LiM(l,im)] += zlm[im][(l-m)NLAT/2 + i] fp[i];
433	Q q1 += ro(i) * ZL(1); // Qlm[LiM(l+1,im)] += zlm[im][(l+1-m)NLAT/2 + i] fm[i];
434	Q zl +=2;
435	V dzl +=2;
436	} while (++i < ni);
437	3 q0 = vdup((l(l+1))*m_1);
438	3 q1 = vdup(((l+1)(l+2))*m_1);
439	V Sl[l] = addi(s0,s0i); Tl[l+1] = addi(t1,t1i);
440	V Tl[l] = addi(t0,t0i); Sl[l+1] = addi(s1,s1i);
441	Q Ql[l] = q0;
442	Q Ql[l+1] = q1;
443	l+=2;
444	}
445	if (l==llim) {
446	long int lstride=1;
447	#ifdef SHT_VAR_LTR
448	if (l != LMAX) lstride=2;
449	#endif
450	Q v2d q0 = vdup(0.0); // Qlm[LiM(l,im)] = 0.0;
451	V v2d s0 = vdup(0.0); v2d s0i = vdup(0.0);
452	V v2d t0 = vdup(0.0); v2d t0i = vdup(0.0);
453	i=i0; do { // tm[im] : polar optimization
454	V s0 += vdup(dzl[0].t) *to(i);
455	V t0 -= vdup(dzl[0].t) *po(i);
456	V s0i -= vdup(dzl[0].p) *pe(i);
457	V t0i -= vdup(dzl[0].p) *te(i);
458	Q q0 += re(i) * ZL(0); // Qlm[LiM(l,im)] += zlm[im][(l-m)NLAT/2 + i] fp[i];
459	Q zl += lstride;
460	V dzl += lstride;
461	} while(++i<ni);
462	3 q0 = vdup((l(l+1))*m_1);
463	V Sl[l] = addi(s0,s0i);
464	V Tl[l] = addi(t0,t0i);
465	Q Ql[l] = q0;
466	#ifdef SHT_VAR_LTR
467	++l;
468	}
469	while( l<=LMAX ) {
470	Q Ql[l] = vdup(0.0);
471	V Sl[l] = vdup(0.0); Tl[l] = vdup(0.0);
472	++l;
473	#endif
474	}
475	}
476	#ifdef SHT_VAR_LTR
477	if (imlim < MMAX) {
478	im = imlim+1;
479	l = LiM(shtns, im*MRES, im);
480	do {
481	Q ((v2d*)Qlm)[l] = vdup(0.0);
482	V ((v2d)Slm)[l] = vdup(0.0); ((v2d)Tlm)[l] = vdup(0.0);
483	} while(++l < shtns->nlm);
484	}
485	#endif
486
487	if (shtns->ncplx_fft > 0) { // free memory
488	Q VFREE(BrF - NLAT*imlim);
489	VX VFREE(BtF - NLAT*imlim); // this frees also BpF.
490	}
491	#endif
492
493	Q #undef ZL
494	Q #undef re
495	Q #undef ro
496	V #undef te
497	V #undef to
498	V #undef pe
499	V #undef po
500	Q #undef re0
501	Q #undef ro0
502	V #undef te0
503	V #undef to0
504	V #undef pe0
505	V #undef po0
506	Q #undef reo0
507	V #undef teo0
508	V #undef peo0
509	Q #undef reo0
510	V #undef tpeo0
511	V #undef vteo0
512	V #undef vpeo0
513	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/omp/shtns/SHT/hyb_spat_to_SH.gen.c

Download in other formats: