Context Navigation

ziggurat_openmp.c@ bb03188

main test-branch

Last change on this file since bb03188 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 27.1 KB

Line
1	# include <stdlib.h>
2	# include <stdio.h>
3	# include <math.h>
4	# include <time.h>
5	# include <stdint.h>
6	# include <omp.h>
7
8	int main ( );
9	void test01 ( );
10	void test02 ( );
11	void test03 ( );
12	void test04 ( );
13	float r4_exp ( uint32_t *jsr, uint32_t ke[256], float fe[256], float we[256] );
14	void r4_exp_setup ( uint32_t ke[256], float fe[256], float we[256] );
15	float r4_nor ( uint32_t *jsr, uint32_t kn[128], float fn[128], float wn[128] );
16	void r4_nor_setup ( uint32_t kn[128], float fn[128], float wn[128] );
17	float r4_uni ( uint32_t *jsr );
18	uint32_t shr3_seeded ( uint32_t *jsr );
19	void timestamp ( );
20
21	/******************************************************************************/
22
23	int main ( )
24
25	/******************************************************************************/
26	/*
27	Purpose:
28
29	MAIN is the main program for ZIGGURAT_OPENMP.
30
31	Licensing:
32
33	This code is distributed under the GNU LGPL license.
34
35	Modified:
36
37	04 October 2013
38
39	Author:
40
41	John Burkardt
42	*/
43	{
44	timestamp ( );
45	printf ( "\n" );
46	printf ( "ZIGGURAT_OPENMP:\n" );
47	printf ( " C version\n" );
48
49	printf ( "\n" );
50	printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
51	printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
52
53	test01 ( );
54	test02 ( );
55	test03 ( );
56	test04 ( );
57	/*
58	Terminate.
59	*/
60	printf ( "\n" );
61	printf ( "ZIGGURAT_OPENMP:\n" );
62	printf ( " Normal end of execution.\n" );
63	printf ( "\n" );
64	timestamp ( );
65
66	return 0;
67	}
68	/******************************************************************************/
69
70	void test01 ( )
71
72	/******************************************************************************/
73	/*
74	Purpose:
75
76	TEST01 tests SHR3_SEEDED.
77
78	Licensing:
79
80	This code is distributed under the GNU LGPL license.
81
82	Modified:
83
84	04 October 2013
85
86	Author:
87
88	John Burkardt
89	*/
90	{
91	uint32_t jsr;
92	uint32_t jsr_value;
93	double mega_rate_par;
94	double mega_rate_seq;
95	int r;
96	int r_num = 1000;
97	int *result_par;
98	int *result_seq;
99	int s;
100	int s_num = 10000;
101	uint32_t *seed;
102	int thread;
103	int thread_num;
104	double wtime_par;
105	double wtime_seq;
106
107	printf ( "\n" );
108	printf ( "TEST01\n" );
109	printf ( " SHR3_SEEDED computes random integers.\n" );
110	printf ( " Since the output is completely determined\n" );
111	printf ( " by the input value of SEED, we can run in\n" );
112	printf ( " parallel as long as we make an array of seeds.\n" );
113	/*
114	Set up the SEED array, which will be used for both sequential and
115	parallel computations.
116	*/
117	# pragma omp parallel
118	{
119	# pragma omp master
120	{
121	thread_num = omp_get_num_threads ( );
122
123	printf ( "\n" );
124	printf ( " The number of threads is %d\n", thread_num );
125	}
126	}
127	seed = ( uint32_t * ) malloc ( thread_num * sizeof ( uint32_t ) );
128	result_seq = ( int * ) malloc ( thread_num * sizeof ( int ) );
129	result_par = ( int * ) malloc ( thread_num * sizeof ( int ) );
130	/*
131	Sequential execution.
132	The sequential execution will only match the parallel execution if we can
133	guarantee that the parallel threads are scheduled to execute the R loop
134	consecutively.
135	*/
136	jsr = 123456789;
137
138	for ( thread = 0; thread < thread_num; thread++ )
139	{
140	seed[thread] = shr3_seeded ( &jsr );
141	}
142
143	wtime_seq = omp_get_wtime ( );
144
145	for ( r = 0; r < r_num; r++ )
146	{
147	thread = ( r % thread_num );
148
149	jsr = seed[thread];
150
151	for ( s = 0; s < s_num; s++ )
152	{
153	jsr_value = shr3_seeded ( &jsr );
154	}
155	result_seq[thread] = jsr_value;
156	seed[thread] = jsr;
157	}
158
159	wtime_seq = omp_get_wtime ( ) - wtime_seq;
160
161	mega_rate_seq = ( double ) ( r_num ) * ( double ) ( s_num ) / wtime_seq
162	/ 1000000.0;
163	/*
164	Parallel.
165	*/
166	jsr = 123456789;
167
168	for ( thread = 0; thread < thread_num; thread++ )
169	{
170	seed[thread] = shr3_seeded ( &jsr );
171	}
172
173	wtime_par = omp_get_wtime ( );
174
175	# pragma omp parallel \
176	shared ( result_par, seed ) \
177	private ( jsr, jsr_value, r, s, thread )
178	{
179
180	# pragma omp for schedule ( static, 1 )
181
182	for ( r = 0; r < r_num; r++ )
183	{
184	thread = omp_get_thread_num ( );
185
186	jsr = seed[thread];
187
188	for ( s = 0; s < s_num; s++ )
189	{
190	jsr_value = shr3_seeded ( &jsr );
191	}
192	result_par[thread] = jsr_value;
193	seed[thread] = jsr;
194	}
195	}
196
197	wtime_par = omp_get_wtime ( ) - wtime_par;
198
199	mega_rate_par = ( double ) ( r_num ) * ( double ) ( s_num ) / wtime_par
200	/ 1000000.0;
201	/*
202	Report.
203	*/
204	printf ( "\n" );
205	printf ( " Correctness check:\n" );
206	printf ( "\n" );
207	printf ( " Computing values sequentially should reach the\n" );
208	printf ( " same result as doing it in parallel:\n" );
209	printf ( "\n" );
210	printf ( " THREAD Sequential Parallel Difference\n" );
211	printf ( "\n" );
212
213	for ( thread = 0; thread < thread_num; thread++ )
214	{
215	printf ( " %8d %12d %12d %12d\n", thread, result_seq[thread],
216	result_par[thread], result_seq[thread] - result_par[thread] );
217	}
218
219	printf ( "\n" );
220	printf ( " Efficiency check:\n" );
221	printf ( "\n" );
222	printf ( " Computing values in parallel should be faster:\n" );
223	printf ( "\n" );
224	printf ( " Sequential Parallel\n" );
225	printf ( "\n" );
226	printf ( " TIME: %14f %14f\n", wtime_seq, wtime_par );
227	printf ( " RATE: %14f %14f\n", mega_rate_seq, mega_rate_par );
228	/*
229	Free memory.
230	*/
231	free ( result_par );
232	free ( result_seq );
233	free ( seed );
234
235	return;
236	}
237	/******************************************************************************/
238
239	void test02 ( )
240
241	/******************************************************************************/
242	/*
243	Purpose:
244
245	TEST02 tests R4_UNI.
246
247	Licensing:
248
249	This code is distributed under the GNU LGPL license.
250
251	Modified:
252
253	04 October 2013
254
255	Author:
256
257	John Burkardt
258	*/
259	{
260	uint32_t jsr;
261	uint32_t jsr_value;
262	double mega_rate_par;
263	double mega_rate_seq;
264	int r;
265	int r_num = 1000;
266	float r4_value;
267	float *result_par;
268	float *result_seq;
269	int s;
270	int s_num = 10000;
271	uint32_t *seed;
272	int thread;
273	int thread_num;
274	double wtime_par;
275	double wtime_seq;
276
277	printf ( "\n" );
278	printf ( "TEST02\n" );
279	printf ( " R4_UNI computes uniformly random single precision real values.\n" );
280	printf ( " Since the output is completely determined\n" );
281	printf ( " by the input value of SEED, we can run in\n" );
282	printf ( " parallel as long as we make an array of seeds.\n" );
283	/*
284	Set up the SEED array, which will be used for both sequential and
285	parallel computations.
286	*/
287	# pragma omp parallel
288	{
289	# pragma omp master
290	{
291	thread_num = omp_get_num_threads ( );
292
293	printf ( "\n" );
294	printf ( " The number of threads is %d\n", thread_num );
295	}
296	}
297	seed = ( uint32_t * ) malloc ( thread_num * sizeof ( uint32_t ) );
298	result_seq = ( float * ) malloc ( thread_num * sizeof ( float ) );
299	result_par = ( float * ) malloc ( thread_num * sizeof ( float ) );
300	/*
301	Sequential execution.
302	The sequential execution will only match the parallel execution if we can
303	guarantee that the parallel threads are scheduled to execute the R loop
304	consecutively.
305	*/
306	jsr = 123456789;
307
308	for ( thread = 0; thread < thread_num; thread++ )
309	{
310	seed[thread] = shr3_seeded ( &jsr );
311	}
312
313	wtime_seq = omp_get_wtime ( );
314
315	for ( r = 0; r < r_num; r++ )
316	{
317	thread = ( r % thread_num );
318
319	jsr = seed[thread];
320
321	for ( s = 0; s < s_num; s++ )
322	{
323	r4_value = r4_uni ( &jsr );
324	}
325	result_seq[thread] = r4_value;
326	seed[thread] = jsr;
327	}
328
329	wtime_seq = omp_get_wtime ( ) - wtime_seq;
330
331	mega_rate_seq = ( double ) ( r_num ) * ( double ) ( s_num ) / wtime_seq / 1000000.0;
332	/*
333	Parallel.
334	*/
335	jsr = 123456789;
336
337	for ( thread = 0; thread < thread_num; thread++ )
338	{
339	seed[thread] = shr3_seeded ( &jsr );
340	}
341
342	wtime_par = omp_get_wtime ( );
343
344	# pragma omp parallel \
345	shared ( result_par, seed ) \
346	private ( jsr, r, r4_value, s, thread )
347	{
348
349	# pragma omp for schedule ( static, 1 )
350
351	for ( r = 0; r < r_num; r++ )
352	{
353	thread = omp_get_thread_num ( );
354
355	jsr = seed[thread];
356
357	for ( s = 0; s < s_num; s++ )
358	{
359	r4_value = r4_uni ( &jsr );
360	}
361	result_par[thread] = r4_value;
362	seed[thread] = jsr;
363	}
364	}
365
366	wtime_par = omp_get_wtime ( ) - wtime_par;
367
368	mega_rate_par = ( double ) ( r_num ) * ( double ) ( s_num ) / wtime_par / 1000000.0;
369	/*
370	Report.
371	*/
372	printf ( "\n" );
373	printf ( " Correctness check:\n" );
374	printf ( "\n" );
375	printf ( " Computing values sequentially should reach the\n" );
376	printf ( " same result as doing it in parallel:\n" );
377	printf ( "\n" );
378	printf ( " THREAD Sequential Parallel Difference\n" );
379	printf ( "\n" );
380
381	for ( thread = 0; thread < thread_num; thread++ )
382	{
383	printf ( " %8d %14f %14f %14f\n", thread, result_seq[thread],
384	result_par[thread], result_seq[thread] - result_par[thread] );
385	}
386
387	printf ( "\n" );
388	printf ( " Efficiency check:\n" );
389	printf ( "\n" );
390	printf ( " Computing values in parallel should be faster:'\n" );
391	printf ( "\n" );
392	printf ( " Sequential Parallel\n" );
393	printf ( "\n" );
394	printf ( " TIME: %14f %14f\n", wtime_seq, wtime_par );
395	printf ( " RATE: %14f %14f\n", mega_rate_seq, mega_rate_par );
396	/*
397	Free memory.
398	*/
399	free ( result_par );
400	free ( result_seq );
401	free ( seed );
402
403	return;
404	}
405	/******************************************************************************/
406
407	void test03 ( )
408
409	/******************************************************************************/
410	/*
411	Purpose:
412
413	TEST03 tests R4_NOR.
414
415	Discussion:
416
417	The arrays FN, KN and WN, once set up by R4_NOR_SETUP, are "read only" when
418	accessed by R4_NOR. So we only need to have one copy of these arrays,
419	and they can be shared.
420
421	Licensing:
422
423	This code is distributed under the GNU LGPL license.
424
425	Modified:
426
427	04 October 2013
428
429	Author:
430
431	John Burkardt
432	*/
433	{
434	float fn[128];
435	uint32_t jsr;
436	uint32_t jsr_value;
437	uint32_t kn[128];
438	double mega_rate_par;
439	double mega_rate_seq;
440	int r;
441	int r_num = 1000;
442	float r4_value;
443	float *result_par;
444	float *result_seq;
445	int s;
446	int s_num = 10000;
447	uint32_t *seed;
448	int thread;
449	int thread_num;
450	float wn[128];
451	double wtime_par;
452	double wtime_seq;
453
454	printf ( "\n" );
455	printf ( "TEST03\n" );
456	printf ( " R4_NOR computes normal random single precision real values.\n" );
457	printf ( " Since the output is completely determined\n" );
458	printf ( " by the input value of SEED and the tables, we can run in\n" );
459	printf ( " parallel as long as we make an array of seeds and share the tables.\n" );
460	/*
461	Set up the SEED array and the tables, which will be used for both sequential
462	and parallel computations.
463	*/
464	# pragma omp parallel
465	{
466	# pragma omp master
467	{
468	thread_num = omp_get_num_threads ( );
469
470	printf ( "\n" );
471	printf ( " The number of threads is %d\n", thread_num );
472	}
473	}
474	seed = ( uint32_t * ) malloc ( thread_num * sizeof ( uint32_t ) );
475	result_seq = ( float * ) malloc ( thread_num * sizeof ( float ) );
476	result_par = ( float * ) malloc ( thread_num * sizeof ( float ) );
477
478	r4_nor_setup ( kn, fn, wn );
479	/*
480	Sequential execution.
481	The sequential execution will only match the parallel execution if we can
482	guarantee that the parallel threads are scheduled to execute the R loop
483	consecutively.
484	*/
485	jsr = 123456789;
486
487	for ( thread = 0; thread < thread_num; thread++ )
488	{
489	seed[thread] = shr3_seeded ( &jsr );
490	}
491
492	wtime_seq = omp_get_wtime ( );
493
494	for ( r = 0; r < r_num; r++ )
495	{
496	thread = ( r % thread_num );
497
498	jsr = seed[thread];
499
500	for ( s = 0; s < s_num; s++ )
501	{
502	r4_value = r4_nor ( &jsr, kn, fn, wn );
503	}
504	result_seq[thread] = r4_value;
505	seed[thread] = jsr;
506	}
507
508	wtime_seq = omp_get_wtime ( ) - wtime_seq;
509
510	mega_rate_seq = ( double ) ( r_num ) * ( double ) ( s_num ) / wtime_seq / 1000000.0;
511	/*
512	Parallel.
513	*/
514	jsr = 123456789;
515
516	for ( thread = 0; thread < thread_num; thread++ )
517	{
518	seed[thread] = shr3_seeded ( &jsr );
519	}
520
521	wtime_par = omp_get_wtime ( );
522
523	# pragma omp parallel \
524	shared ( result_par, seed ) \
525	private ( jsr, r, r4_value, s, thread )
526	{
527
528	# pragma omp for schedule ( static, 1 )
529
530	for ( r = 0; r < r_num; r++ )
531	{
532	thread = omp_get_thread_num ( );
533
534	jsr = seed[thread];
535
536	for ( s = 0; s < s_num; s++ )
537	{
538	r4_value = r4_nor ( &jsr, kn, fn, wn );
539	}
540	result_par[thread] = r4_value;
541	seed[thread] = jsr;
542	}
543	}
544
545	wtime_par = omp_get_wtime ( ) - wtime_par;
546
547	mega_rate_par = ( double ) ( r_num ) * ( double ) ( s_num ) / wtime_par
548	/ 1000000.0;
549	/*
550	Report.
551	*/
552	printf ( "\n" );
553	printf ( " Correctness check:\n" );
554	printf ( "\n" );
555	printf ( " Computing values sequentially should reach the\n" );
556	printf ( " same result as doing it in parallel:\n" );
557	printf ( "\n" );
558	printf ( " THREAD Sequential Parallel Difference\n" );
559	printf ( "\n" );
560
561	for ( thread = 0; thread < thread_num; thread++ )
562	{
563	printf ( " %8d %14f %14f %14f\n", thread, result_seq[thread],
564	result_par[thread], result_seq[thread] - result_par[thread] );
565	}
566
567	printf ( "\n" );
568	printf ( " Efficiency check:\n" );
569	printf ( "\n" );
570	printf ( " Computing values in parallel should be faster:\n" );
571	wprintf ( "\n" );
572	printf ( " Sequential Parallel\n" );
573	printf ( "\n" );
574	printf ( " TIME: %14f %14f\n", wtime_seq, wtime_par );
575	printf ( " RATE: %14f %14f\n", mega_rate_seq, mega_rate_par );
576	/*
577	Free memory.
578	*/
579	free ( result_par );
580	free ( result_seq );
581	free ( seed );
582
583	return;
584	}
585	/******************************************************************************/
586
587	void test04 ( )
588
589	/******************************************************************************/
590	/*
591	Purpose:
592
593	TEST04 tests R4_EXP.
594
595	Discussion:
596
597	The arrays FE, KE and WE, once set up by R4_EXP_SETUP, are "read only" when
598	accessed by R4_EXP. So we only need to have one copy of these arrays,
599	and they can be shared.
600
601	Licensing:
602
603	This code is distributed under the GNU LGPL license.
604
605	Modified:
606
607	19 October 2013
608
609	Author:
610
611	John Burkardt
612	*/
613	{
614	float fe[256];
615	uint32_t jsr;
616	uint32_t jsr_value;
617	uint32_t ke[256];
618	double mega_rate_par;
619	double mega_rate_seq;
620	int r;
621	int r_num = 1000;
622	float r4_value;
623	float *result_par;
624	float *result_seq;
625	int s;
626	int s_num = 10000;
627	uint32_t *seed;
628	int thread;
629	int thread_num;
630	float we[256];
631	double wtime_par;
632	double wtime_seq;
633
634	printf ( "\n" );
635	printf ( "TEST04\n" );
636	printf ( " R4_EXP computes exponential random single precision real values.\n" );
637	printf ( " Since the output is completely determined\n" );
638	printf ( " by the input value of SEED and the tables, we can run in\n" );
639	printf ( " parallel as long as we make an array of seeds and share the tables.\n" );
640	/*
641	Set up the SEED array and the tables, which will be used for both sequential
642	and parallel computations.
643	*/
644	# pragma omp parallel
645	{
646	# pragma omp master
647	{
648	thread_num = omp_get_num_threads ( );
649
650	printf ( "\n" );
651	printf ( " The number of threads is %d\n", thread_num );
652	}
653	}
654	seed = ( uint32_t * ) malloc ( thread_num * sizeof ( uint32_t ) );
655	result_seq = ( float * ) malloc ( thread_num * sizeof ( float ) );
656	result_par = ( float * ) malloc ( thread_num * sizeof ( float ) );
657
658	r4_exp_setup ( ke, fe, we );
659	/*
660	Sequential execution.
661	The sequential execution will only match the parallel execution if we can
662	guarantee that the parallel threads are scheduled to execute the R loop
663	consecutively.
664	*/
665	jsr = 123456789;
666
667	for ( thread = 0; thread < thread_num; thread++ )
668	{
669	seed[thread] = shr3_seeded ( &jsr );
670	}
671
672	wtime_seq = omp_get_wtime ( );
673
674	for ( r = 0; r < r_num; r++ )
675	{
676	thread = ( r % thread_num );
677
678	jsr = seed[thread];
679
680	for ( s = 0; s < s_num; s++ )
681	{
682	r4_value = r4_exp ( &jsr, ke, fe, we );
683	}
684	result_seq[thread] = r4_value;
685	seed[thread] = jsr;
686	}
687
688	wtime_seq = omp_get_wtime ( ) - wtime_seq;
689
690	mega_rate_seq = ( double ) ( r_num ) * ( double ) ( s_num ) / wtime_seq
691	/ 1000000.0;
692	/*
693	Parallel.
694	*/
695	jsr = 123456789;
696
697	for ( thread = 0; thread < thread_num; thread++ )
698	{
699	seed[thread] = shr3_seeded ( &jsr );
700	}
701
702	wtime_par = omp_get_wtime ( );
703
704	# pragma omp parallel \
705	shared ( result_par, seed ) \
706	private ( jsr, r, r4_value, s, thread )
707	{
708
709	# pragma omp for schedule ( static, 1 )
710
711	for ( r = 0; r < r_num; r++ )
712	{
713	thread = omp_get_thread_num ( );
714
715	jsr = seed[thread];
716
717	for ( s = 0; s < s_num; s++ )
718	{
719	r4_value = r4_exp ( &jsr, ke, fe, we );
720	}
721	result_par[thread] = r4_value;
722	seed[thread] = jsr;
723	}
724	}
725
726	wtime_par = omp_get_wtime ( ) - wtime_par;
727
728	mega_rate_par = ( double ) ( r_num ) * ( double ) ( s_num ) / wtime_par
729	/ 1000000.0;
730	/*
731	Report.
732	*/
733	printf ( "\n" );
734	printf ( " Correctness check:\n" );
735	printf ( "\n" );
736	printf ( " Computing values sequentially should reach the\n" );
737	printf ( " same result as doing it in parallel:\n" );
738	printf ( "\n" );
739	printf ( " THREAD Sequential Parallel Difference\n" );
740	printf ( "\n" );
741
742	for ( thread = 0; thread < thread_num; thread++ )
743	{
744	printf ( " %8d %14f %14f %14f\n", thread, result_seq[thread],
745	result_par[thread], result_seq[thread] - result_par[thread] );
746	}
747
748	printf ( "\n" );
749	printf ( " Efficiency check:\n" );
750	printf ( "\n" );
751	printf ( " Computing values in parallel should be faster:\n" );
752	printf ( "\n" );
753	printf ( " Sequential Parallel\n" );
754	printf ( "\n" );
755	printf ( " TIME: %14f %14f\n", wtime_seq, wtime_par );
756	printf ( " RATE: %14f %14f\n", mega_rate_seq, mega_rate_par );
757	/*
758	Free memory.
759	*/
760	free ( result_par );
761	free ( result_seq );
762	free ( seed );
763
764	return;
765	}
766	/******************************************************************************/
767
768	float r4_exp ( uint32_t *jsr, uint32_t ke[256], float fe[256], float we[256] )
769
770	/******************************************************************************/
771	/*
772	Purpose:
773
774	R4_EXP returns an exponentially distributed single precision real value.
775
776	Discussion:
777
778	The underlying algorithm is the ziggurat method.
779
780	Before the first call to this function, the user must call R4_EXP_SETUP
781	to determine the values of KE, FE and WE.
782
783	Licensing:
784
785	This code is distributed under the GNU LGPL license.
786
787	Modified:
788
789	15 October 2013
790
791	Author:
792
793	John Burkardt
794
795	Reference:
796
797	George Marsaglia, Wai Wan Tsang,
798	The Ziggurat Method for Generating Random Variables,
799	Journal of Statistical Software,
800	Volume 5, Number 8, October 2000, seven pages.
801
802	Parameters:
803
804	Input/output, uint32_t *JSR, the seed.
805
806	Input, uint32_t KE[256], data computed by R4_EXP_SETUP.
807
808	Input, float FE[256], WE[256], data computed by R4_EXP_SETUP.
809
810	Output, float R4_EXP, an exponentially distributed random value.
811	*/
812	{
813	uint32_t iz;
814	uint32_t jz;
815	float value;
816	float x;
817
818	jz = shr3_seeded ( jsr );
819	iz = ( jz & 255 );
820
821	if ( jz < ke[iz] )
822	{
823	value = ( float ) ( jz ) * we[iz];
824	}
825	else
826	{
827	for ( ; ; )
828	{
829	if ( iz == 0 )
830	{
831	value = 7.69711 - log ( r4_uni ( jsr ) );
832	break;
833	}
834
835	x = ( float ) ( jz ) * we[iz];
836
837	if ( fe[iz] + r4_uni ( jsr ) * ( fe[iz-1] - fe[iz] ) < exp ( - x ) )
838	{
839	value = x;
840	break;
841	}
842
843	jz = shr3_seeded ( jsr );
844	iz = ( jz & 255 );
845
846	if ( jz < ke[iz] )
847	{
848	value = ( float ) ( jz ) * we[iz];
849	break;
850	}
851	}
852	}
853	return value;
854	}
855	/******************************************************************************/
856
857	void r4_exp_setup ( uint32_t ke[256], float fe[256], float we[256] )
858
859	/******************************************************************************/
860	/*
861	Purpose:
862
863	R4_EXP_SETUP sets data needed by R4_EXP.
864
865	Licensing:
866
867	This code is distributed under the GNU LGPL license.
868
869	Modified:
870
871	14 October 2013
872
873	Author:
874
875	John Burkardt
876
877	Reference:
878
879	George Marsaglia, Wai Wan Tsang,
880	The Ziggurat Method for Generating Random Variables,
881	Journal of Statistical Software,
882	Volume 5, Number 8, October 2000, seven pages.
883
884	Parameters:
885
886	Output, uint32_t KE[256], data needed by R4_EXP.
887
888	Output, float FE[256], WE[256], data needed by R4_EXP.
889	*/
890	{
891	double de = 7.697117470131487;
892	int i;
893	const double m2 = 2147483648.0;
894	double q;
895	double te = 7.697117470131487;
896	const double ve = 3.949659822581572E-03;
897
898	q = ve / exp ( - de );
899
900	ke[0] = ( uint32_t ) ( ( de / q ) * m2 );
901	ke[1] = 0;
902
903	we[0] = ( float ) ( q / m2 );
904	we[255] = ( float ) ( de / m2 );
905
906	fe[0] = 1.0;
907	fe[255] = ( float ) ( exp ( - de ) );
908
909	for ( i = 254; 1 <= i; i-- )
910	{
911	de = - log ( ve / de + exp ( - de ) );
912	ke[i+1] = ( uint32_t ) ( ( de / te ) * m2 );
913	te = de;
914	fe[i] = ( float ) ( exp ( - de ) );
915	we[i] = ( float ) ( de / m2 );
916	}
917	return;
918	}
919	/******************************************************************************/
920
921	float r4_nor ( uint32_t *jsr, uint32_t kn[128], float fn[128], float wn[128] )
922
923	/******************************************************************************/
924	/*
925	Purpose:
926
927	R4_NOR returns a normally distributed single precision real value.
928
929	Discussion:
930
931	The value returned is generated from a distribution with mean 0 and
932	variance 1.
933
934	The underlying algorithm is the ziggurat method.
935
936	Before the first call to this function, the user must call R4_NOR_SETUP
937	to determine the values of KN, FN and WN.
938
939	Thanks to Chad Wagner, 21 July 2014, for noticing a bug of the form
940	if ( x * x <= y * y ); <-- Stray semicolon!
941	{
942	break;
943	}
944
945	Licensing:
946
947	This code is distributed under the GNU LGPL license.
948
949	Modified:
950
951	21 July 2014
952
953	Author:
954
955	John Burkardt
956
957	Reference:
958
959	George Marsaglia, Wai Wan Tsang,
960	The Ziggurat Method for Generating Random Variables,
961	Journal of Statistical Software,
962	Volume 5, Number 8, October 2000, seven pages.
963
964	Parameters:
965
966	Input/output, uint32_t *JSR, the seed.
967
968	Input, uint32_t KN[128], data computed by R4_NOR_SETUP.
969
970	Input, float FN[128], WN[128], data computed by R4_NOR_SETUP.
971
972	Output, float R4_NOR, a normally distributed random value.
973	*/
974	{
975	int hz;
976	uint32_t iz;
977	const float r = 3.442620;
978	float value;
979	float x;
980	float y;
981
982	hz = ( int ) shr3_seeded ( jsr );
983	iz = ( hz & 127 );
984
985	if ( fabs ( hz ) < kn[iz] )
986	{
987	value = ( float ) ( hz ) * wn[iz];
988	}
989	else
990	{
991	for ( ; ; )
992	{
993	if ( iz == 0 )
994	{
995	for ( ; ; )
996	{
997	x = - 0.2904764 * log ( r4_uni ( jsr ) );
998	y = - log ( r4_uni ( jsr ) );
999	if ( x * x <= y + y )
1000	{
1001	break;
1002	}
1003	}
1004
1005	if ( hz <= 0 )
1006	{
1007	value = - r - x;
1008	}
1009	else
1010	{
1011	value = + r + x;
1012	}
1013	break;
1014	}
1015
1016	x = ( float ) ( hz ) * wn[iz];
1017
1018	if ( fn[iz] + r4_uni ( jsr ) * ( fn[iz-1] - fn[iz] )
1019	< exp ( - 0.5 * x * x ) )
1020	{
1021	value = x;
1022	break;
1023	}
1024
1025	hz = ( int ) shr3_seeded ( jsr );
1026	iz = ( hz & 127 );
1027
1028	if ( fabs ( hz ) < kn[iz] )
1029	{
1030	value = ( float ) ( hz ) * wn[iz];
1031	break;
1032	}
1033	}
1034	}
1035
1036	return value;
1037	}
1038	/******************************************************************************/
1039
1040	void r4_nor_setup ( uint32_t kn[128], float fn[128], float wn[128] )
1041
1042	/******************************************************************************/
1043	/*
1044	Purpose:
1045
1046	R4_NOR_SETUP sets data needed by R4_NOR.
1047
1048	Licensing:
1049
1050	This code is distributed under the GNU LGPL license.
1051
1052	Modified:
1053
1054	14 October 2013
1055
1056	Author:
1057
1058	John Burkardt
1059
1060	Reference:
1061
1062	George Marsaglia, Wai Wan Tsang,
1063	The Ziggurat Method for Generating Random Variables,
1064	Journal of Statistical Software,
1065	Volume 5, Number 8, October 2000, seven pages.
1066
1067	Parameters:
1068
1069	Output, uint32_t KN[128], data needed by R4_NOR.
1070
1071	Output, float FN[128], WN[128], data needed by R4_NOR.
1072	*/
1073	{
1074	double dn = 3.442619855899;
1075	int i;
1076	const double m1 = 2147483648.0;
1077	double q;
1078	double tn = 3.442619855899;
1079	const double vn = 9.91256303526217E-03;
1080
1081	q = vn / exp ( - 0.5 * dn * dn );
1082
1083	kn[0] = ( uint32_t ) ( ( dn / q ) * m1 );
1084	kn[1] = 0;
1085
1086	wn[0] = ( float ) ( q / m1 );
1087	wn[127] = ( float ) ( dn / m1 );
1088
1089	fn[0] = 1.0;
1090	fn[127] = ( float ) ( exp ( - 0.5 * dn * dn ) );
1091
1092	for ( i = 126; 1 <= i; i-- )
1093	{
1094	dn = sqrt ( - 2.0 * log ( vn / dn + exp ( - 0.5 * dn * dn ) ) );
1095	kn[i+1] = ( uint32_t ) ( ( dn / tn ) * m1 );
1096	tn = dn;
1097	fn[i] = ( float ) ( exp ( - 0.5 * dn * dn ) );
1098	wn[i] = ( float ) ( dn / m1 );
1099	}
1100
1101	return;
1102	}
1103	/******************************************************************************/
1104
1105	float r4_uni ( uint32_t *jsr )
1106
1107	/******************************************************************************/
1108	/*
1109	Purpose:
1110
1111	R4_UNI returns a uniformly distributed real value.
1112
1113	Licensing:
1114
1115	This code is distributed under the GNU LGPL license.
1116
1117	Modified:
1118
1119	04 October 2013
1120
1121	Author:
1122
1123	John Burkardt
1124
1125	Reference:
1126
1127	George Marsaglia, Wai Wan Tsang,
1128	The Ziggurat Method for Generating Random Variables,
1129	Journal of Statistical Software,
1130	Volume 5, Number 8, October 2000, seven pages.
1131
1132	Parameters:
1133
1134	Input/output, uint32_t *JSR, the seed.
1135
1136	Output, float R4_UNI, a uniformly distributed random value in
1137	the range [0,1].
1138	*/
1139	{
1140	uint32_t jsr_input;
1141	float value;
1142
1143	jsr_input = *jsr;
1144
1145	jsr = ( jsr ^ ( *jsr << 13 ) );
1146	jsr = ( jsr ^ ( *jsr >> 17 ) );
1147	jsr = ( jsr ^ ( *jsr << 5 ) );
1148
1149	value = fmod ( 0.5 + ( float ) ( jsr_input + *jsr ) / 65536.0 / 65536.0, 1.0 );
1150
1151	return value;
1152	}
1153	/******************************************************************************/
1154
1155	uint32_t shr3_seeded ( uint32_t *jsr )
1156
1157	/******************************************************************************/
1158	/*
1159	Purpose:
1160
1161	SHR3_SEEDED evaluates the SHR3 generator for integers.
1162
1163	Discussion:
1164
1165	Thanks to Dirk Eddelbuettel for pointing out that this code needed to
1166	use the uint32_t data type in order to execute properly in 64 bit mode,
1167	03 October 2013.
1168
1169	Licensing:
1170
1171	This code is distributed under the GNU LGPL license.
1172
1173	Modified:
1174
1175	04 October 2013
1176
1177	Author:
1178
1179	John Burkardt
1180
1181	Reference:
1182
1183	George Marsaglia, Wai Wan Tsang,
1184	The Ziggurat Method for Generating Random Variables,
1185	Journal of Statistical Software,
1186	Volume 5, Number 8, October 2000, seven pages.
1187
1188	Parameters:
1189
1190	Input/output, uint32_t *JSR, the seed, which is updated
1191	on each call.
1192
1193	Output, uint32_t SHR3_SEEDED, the new value.
1194	*/
1195	{
1196	uint32_t value;
1197
1198	value = *jsr;
1199
1200	jsr = ( jsr ^ ( *jsr << 13 ) );
1201	jsr = ( jsr ^ ( *jsr >> 17 ) );
1202	jsr = ( jsr ^ ( *jsr << 5 ) );
1203
1204	value = value + *jsr;
1205
1206	return value;
1207	}
1208	/******************************************************************************/
1209
1210	void timestamp ( void )
1211
1212	/******************************************************************************/
1213	/*
1214	Purpose:
1215
1216	TIMESTAMP prints the current YMDHMS date as a time stamp.
1217
1218	Example:
1219
1220	31 May 2001 09:45:54 AM
1221
1222	Licensing:
1223
1224	This code is distributed under the GNU LGPL license.
1225
1226	Modified:
1227
1228	24 September 2003
1229
1230	Author:
1231
1232	John Burkardt
1233
1234	Parameters:
1235
1236	None
1237	*/
1238	{
1239	# define TIME_SIZE 40
1240
1241	static char time_buffer[TIME_SIZE];
1242	const struct tm *tm;
1243	size_t len;
1244	time_t now;
1245
1246	now = time ( NULL );
1247	tm = localtime ( &now );
1248
1249	len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
1250
1251	printf ( "%s\n", time_buffer );
1252
1253	return;
1254	# undef TIME_SIZE
1255	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/omp/ziggurat_openmp.c@ bb03188

Download in other formats: