Context Navigation

sgefa_openmp.c

main

Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 29.7 KB

Rev	Line
[5f600f6]	1	# include <stdlib.h>
	2	# include <stdio.h>
	3	# include <math.h>
	4	# include <omp.h>
	5	# include <time.h>
	6
	7	int main ( void );
	8	void test01 ( int n );
	9	void test02 ( int n );
	10	void test03 ( int n );
	11
	12	int isamax ( int n, float x[], int incx );
	13	void matgen ( int lda, int n, float a[], float x[], float b[] );
	14	void msaxpy ( int nr, int nc, float a[], int n, float x[], float y[] );
	15	void msaxpy2 ( int nr, int nc, float a[], int n, float x[], float y[] );
	16	int msgefa ( float a[], int lda, int n, int ipvt[] );
	17	int msgefa2 ( float a[], int lda, int n, int ipvt[] );
	18	void saxpy ( int n, float a, float x[], int incx, float y[], int incy );
	19	float sdot ( int n, float x[], int incx, float y[], int incy );
	20	int sgefa ( float a[], int lda, int n, int ipvt[] );
	21	void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job );
	22	void sscal ( int n, float a, float x[], int incx );
	23	void sswap ( int n, float x[], int incx, float y[], int incy );
	24	void timestamp ( void );
	25
	26	/******************************************************************************/
	27
	28	int main ( void )
	29
	30	/******************************************************************************/
	31	/*
	32	Purpose:
	33
	34	MAIN is the main program for the SGEFA_OPENMP test program.
	35
	36	Discussion:
	37
	38	We want to compare methods of solving the linear system A*x=b.
	39
	40	The first way uses the standard sequential algorithm "SGEFA".
	41
	42	The second way uses a variant of SGEFA that has been modified to
	43	take advantage of OpenMP.
	44
	45	The third way reruns the variant code, but with OpenMP turned off.
	46
	47	Modified:
	48
	49	17 April 2009
	50
	51	Author:
	52
	53	John Burkardt
	54	*/
	55	{
	56	int n;
	57
	58	timestamp ( );
	59
	60	printf ( "\n" );
	61	printf ( "SGEFA_OPENMP\n" );
	62	printf ( " C + OpenMP version\n" );
	63
	64	printf ( "\n" );
	65	printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
	66	printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
	67
	68	printf ( "\n" );
	69	printf ( " Algorithm Mode N Error Time\n" );
	70
	71	printf ( "\n" );
	72	n = 10;
	73	test01 ( n );
	74	test02 ( n );
	75	test03 ( n );
[e479e1e]	76	/*
[5f600f6]	77	printf ( "\n" );
	78	n = 100;
	79	test01 ( n );
	80	test02 ( n );
	81	test03 ( n );
	82
	83	printf ( "\n" );
	84	n = 1000;
	85	test01 ( n );
	86	test02 ( n );
	87	test03 ( n );
[e479e1e]	88	*/
[5f600f6]	89	printf ( "\n" );
	90	printf ( "SGEFA_OPENMP\n" );
	91	printf ( " Normal end of execution.\n" );
	92
	93	printf ( "\n" );
	94	timestamp ( );
	95
	96	return 0;
	97	}
	98	/******************************************************************************/
	99
	100	void test01 ( int n )
	101
	102	/******************************************************************************/
	103	/*
	104	Purpose:
	105
	106	TEST01 runs the sequential version of SGEFA.
	107
	108	Modified:
	109
	110	07 April 2008
	111
	112	Author:
	113
	114	John Burkardt
	115	*/
	116	{
	117	float *a;
	118	float *b;
	119	float err;
	120	int i;
	121	int info;
	122	int *ipvt;
	123	int job;
	124	int lda;
	125	double wtime;
	126	float *x;
	127	/*
	128	Generate the linear system A * x = b.
	129	*/
	130	lda = n;
	131	a = ( float * ) malloc ( lda * n * sizeof ( float ) );
	132	b = ( float * ) malloc ( n * sizeof ( float ) );
	133	x = ( float * ) malloc ( n * sizeof ( float ) );
	134
	135	matgen ( lda, n, a, x, b );
	136	/*
	137	Factor the linear system.
	138	*/
	139	ipvt = ( int * ) malloc ( n * sizeof ( int ) );
	140
	141	wtime = omp_get_wtime ( );
	142	info = sgefa ( a, lda, n, ipvt );
	143	wtime = omp_get_wtime ( ) - wtime;
	144
	145	if ( info != 0 )
	146	{
	147	printf ( "\n" );
	148	printf ( "TEST01 - Fatal error!\n" );
	149	printf ( " SGEFA reports the matrix is singular.\n" );
	150	exit ( 1 );
	151	}
	152	/*
	153	Solve the linear system.
	154	*/
	155	job = 0;
	156	sgesl ( a, lda, n, ipvt, b, job );
	157
	158	err = 0.0;
	159	for ( i = 0; i < n; i++ )
	160	{
	161	err = err + fabs ( x[i] - b[i] );
	162	}
	163	printf ( " Original Sequential %8d %10.4e %10.4e\n", n, err, wtime );
	164
	165	free ( a );
	166	free ( b );
	167	free ( ipvt );
	168	free ( x );
	169
	170	return;
	171	}
	172	/******************************************************************************/
	173
	174	void test02 ( int n )
	175
	176	/******************************************************************************/
	177	/*
	178	Purpose:
	179
	180	TEST02 runs the revised version of SGEFA in parallel.
	181
	182	Modified:
	183
	184	07 April 2008
	185
	186	Author:
	187
	188	John Burkardt
	189	*/
	190	{
	191	float *a;
	192	float *b;
	193	float err;
	194	int i;
	195	int info;
	196	int *ipvt;
	197	int job;
	198	int lda;
	199	double wtime;
	200	float *x;
	201	/*
	202	Generate the linear system A * x = b.
	203	*/
	204	lda = n;
	205	a = ( float * ) malloc ( lda * n * sizeof ( float ) );
	206	b = ( float * ) malloc ( n * sizeof ( float ) );
	207	x = ( float * ) malloc ( n * sizeof ( float ) );
	208
	209	matgen ( lda, n, a, x, b );
	210	/*
	211	Factor the linear system.
	212	*/
	213	ipvt = ( int * ) malloc ( n * sizeof ( int ) );
	214
	215	wtime = omp_get_wtime ( );
	216	info = msgefa ( a, lda, n, ipvt );
	217	wtime = omp_get_wtime ( ) - wtime;
	218
	219	if ( info != 0 )
	220	{
	221	printf ( "\n" );
	222	printf ( "TEST02 - Fatal error!\n" );
	223	printf ( " MSGEFA reports the matrix is singular.\n" );
	224	exit ( 1 );
	225	}
	226	/*
	227	Solve the linear system.
	228	*/
	229	job = 0;
	230	sgesl ( a, lda, n, ipvt, b, job );
	231
	232	err = 0.0;
	233	for ( i = 0; i < n; i++ )
	234	{
	235	err = err + fabs ( x[i] - b[i] );
	236	}
	237
	238	printf ( " Revised Parallel %8d %10.4e %10.4e\n", n, err, wtime );
	239
	240	free ( a );
	241	free ( b );
	242	free ( ipvt );
	243	free ( x );
	244
	245	return;
	246	}
	247	/******************************************************************************/
	248
	249	void test03 ( int n )
	250
	251	/******************************************************************************/
	252	/*
	253	Purpose:
	254
	255	TEST03 runs the revised version of SGEFA in sequential mode.
	256
	257	Modified:
	258
	259	07 April 2008
	260
	261	Author:
	262
	263	John Burkardt
	264	*/
	265	{
	266	float *a;
	267	float *b;
	268	float err;
	269	int i;
	270	int info;
	271	int *ipvt;
	272	int job;
	273	int lda;
	274	double wtime;
	275	float *x;
	276	/*
	277	Generate the linear system A * x = b.
	278	*/
	279	lda = n;
	280	a = ( float * ) malloc ( lda * n * sizeof ( float ) );
	281	b = ( float * ) malloc ( n * sizeof ( float ) );
	282	x = ( float * ) malloc ( n * sizeof ( float ) );
	283
	284	matgen ( lda, n, a, x, b );
	285	/*
	286	Factor the linear system.
	287	*/
	288	ipvt = ( int * ) malloc ( n * sizeof ( int ) );
	289
	290	wtime = omp_get_wtime ( );
	291	info = msgefa2 ( a, lda, n, ipvt );
	292	wtime = omp_get_wtime ( ) - wtime;
	293
	294	if ( info != 0 )
	295	{
	296	printf ( "\n" );
	297	printf ( "TEST03 - Fatal error!\n" );
	298	printf ( " MSGEFA2 reports the matrix is singular.\n" );
	299	exit ( 1 );
	300	}
	301	/*
	302	Solve the linear system.
	303	*/
	304	job = 0;
	305	sgesl ( a, lda, n, ipvt, b, job );
	306
	307	err = 0.0;
	308	for ( i = 0; i < n; i++ )
	309	{
	310	err = err + fabs ( x[i] - b[i] );
	311	}
	312
	313	printf ( " Revised Sequential %8d %10.4e %10.4e\n", n, err, wtime );
	314
	315	free ( a );
	316	free ( b );
	317	free ( ipvt );
	318	free ( x );
	319
	320	return;
	321	}
	322	/******************************************************************************/
	323
	324	int isamax ( int n, float x[], int incx )
	325
	326	/******************************************************************************/
	327	/*
	328	Purpose:
	329
	330	ISAMAX finds the index of the vector element of maximum absolute value.
	331
	332	Discussion:
	333
	334	WARNING: This index is a 1-based index, not a 0-based index!
	335
	336	Modified:
	337
	338	07 April 2008
	339
	340	Author:
	341
	342	FORTRAN77 original version by Lawson, Hanson, Kincaid, Krogh.
	343	C version by John Burkardt
	344
	345	Reference:
	346
	347	Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
	348	LINPACK User's Guide,
	349	SIAM, 1979,
	350	ISBN13: 978-0-898711-72-1,
	351	LC: QA214.L56.
	352
	353	Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
	354	Algorithm 539:
	355	Basic Linear Algebra Subprograms for Fortran Usage,
	356	ACM Transactions on Mathematical Software,
	357	Volume 5, Number 3, September 1979, pages 308-323.
	358
	359	Parameters:
	360
	361	Input, int N, the number of entries in the vector.
	362
	363	Input, float X[*], the vector to be examined.
	364
	365	Input, int INCX, the increment between successive entries of SX.
	366
	367	Output, int ISAMAX, the index of the element of maximum
	368	absolute value.
	369	*/
	370	{
	371	float xmax;
	372	int i;
	373	int ix;
	374	int value;
	375
	376	value = 0;
	377
	378	if ( n < 1 \|\| incx <= 0 )
	379	{
	380	return value;
	381	}
	382
	383	value = 1;
	384
	385	if ( n == 1 )
	386	{
	387	return value;
	388	}
	389
	390	if ( incx == 1 )
	391	{
	392	xmax = fabs ( x[0] );
	393
	394	for ( i = 1; i < n; i++ )
	395	{
	396	if ( xmax < fabs ( x[i] ) )
	397	{
	398	value = i + 1;
	399	xmax = fabs ( x[i] );
	400	}
	401	}
	402	}
	403	else
	404	{
	405	ix = 0;
	406	xmax = fabs ( x[0] );
	407	ix = ix + incx;
	408
	409	for ( i = 1; i < n; i++ )
	410	{
	411	if ( xmax < fabs ( x[ix] ) )
	412	{
	413	value = i + 1;
	414	xmax = fabs ( x[ix] );
	415	}
	416	ix = ix + incx;
	417	}
	418	}
	419
	420	return value;
	421	}
	422	/*******************************************************************************/
	423
	424	void matgen ( int lda, int n, float a[], float x[], float b[] )
	425
	426	/*******************************************************************************/
	427	/*
	428	Purpose:
	429
	430	MATGEN generates a "random" matrix for testing.
	431
	432	Modified:
	433
	434	27 April 2008
	435
	436	Author:
	437
	438	John Burkardt
	439
	440	Parameters:
	441
	442	Input, int LDA, the leading dimension of the matrix.
	443
	444	Input, int N, the order of the matrix, and the length of the vector.
	445
	446	Output, float A[LDA*N], the matrix.
	447
	448	Output, float X[N], the solution vector.
	449
	450	Output, float B[N], the right hand side vector.
	451	*/
	452	{
	453	int i;
	454	int j;
	455	int seed;
	456	float value;
	457
	458	seed = 1325;
	459	/*
	460	Set the matrix A.
	461	*/
	462	for ( j = 0; j < n; j++ )
	463	{
	464	for ( i = 0; i < n; i++ )
	465	{
	466	seed = ( 3125 * seed ) % 65536;
	467	value = ( ( float ) seed - 32768.0 ) / 16384.0;
	468	a[i+j*lda] = value;
	469	}
	470	}
	471	/*
	472	Set x.
	473	*/
	474	for ( i = 0; i < n; i++ )
	475	{
	476	x[i] = ( float ) ( i + 1 ) / ( ( float ) n );
	477	}
	478	/*
	479	Set b = A * x.
	480	*/
	481	for ( i = 0; i < n; i++ )
	482	{
	483	b[i] = 0.0;
	484	for ( j = 0; j < n; j++ )
	485	{
	486	b[i] = b[i] + a[i+jlda] x[j];
	487	}
	488	}
	489	return;
	490	}
	491	/******************************************************************************/
	492
	493	void msaxpy ( int nr, int nc, float a[], int n, float x[], float y[] )
	494
	495	/******************************************************************************/
	496	/*
	497	Purpose:
	498
	499	MSAXPY carries out multiple "SAXPY" operations.
	500
	501	Discussion:
	502
	503	This routine carries out the step of Gaussian elimination where multiples
	504	of the pivot row are added to the rows below the pivot row.
	505
	506	A single call to MSAXPY replaces multiple calls to SAXPY.
	507
	508	Modified:
	509
	510	07 April 2008
	511
	512	Author:
	513
	514	Wesley Petersen
	515
	516	Parameters:
	517
	518	Input, int NR, NC, the number of rows and columns in the matrix.
	519
	520	Input, float A[*], ...
	521
	522	Input, int N, ...
	523
	524	Input, float X[*], ...
	525
	526	Output, float Y[*], ...
	527	*/
	528	{
	529	int i,j;
	530
	531	# pragma omp parallel \
	532	shared ( a, nc, nr, x, y ) \
	533	private ( i, j )
	534
	535	# pragma omp for
	536	for ( j = 0; j < nc; j++)
	537	{
	538	for ( i = 0; i < nr; i++ )
	539	{
	540	y[i+jn] += a[jn] * x[i];
	541	}
	542	}
	543	return;
	544	}
	545	/******************************************************************************/
	546
	547	void msaxpy2 ( int nr, int nc, float a[], int n, float x[], float y[] )
	548
	549	/******************************************************************************/
	550	/*
	551	Purpose:
	552
	553	MSAXPY2 carries out multiple "SAXPY" operations.
	554
	555	Discussion:
	556
	557	This routine carries out the step of Gaussian elimination where multiples
	558	of the pivot row are added to the rows below the pivot row.
	559
	560	A single call to MSAXPY replaces multiple calls to SAXPY.
	561
	562	Modified:
	563
	564	07 April 2008
	565
	566	Author:
	567
	568	Wesley Petersen
	569
	570	Parameters:
	571
	572	Input, int NR, NC, the number of rows and columns in the matrix.
	573
	574	Input, float A[*], ...
	575
	576	Input, int N, ...
	577
	578	Input, float X[*], ...
	579
	580	Output, float Y[*], ...
	581	*/
	582	{
	583	int i,j;
	584
	585	for ( j = 0; j < nc; j++)
	586	{
	587	for ( i = 0; i < nr; i++ )
	588	{
	589	y[i+jn] += a[jn] * x[i];
	590	}
	591	}
	592	return;
	593	}
	594	/******************************************************************************/
	595
	596	int msgefa ( float a[], int lda, int n, int ipvt[] )
	597
	598	/******************************************************************************/
	599	/*
	600	Purpose:
	601
	602	MSGEFA factors a matrix by gaussian elimination.
	603
	604	Discussion:
	605
	606	Matrix references which would, mathematically, be written A(I,J)
	607	must be written here as:
	608	* A[I+J*LDA], when the value is needed, or
	609	* A+I+J*LDA, when the address is needed.
	610
	611	This variant of SGEFA uses OpenMP for improved parallel execution.
	612	The step in which multiples of the pivot row are added to individual
	613	rows has been replaced by a single call which updates the entire
	614	matrix sub-block.
	615
	616	Modified:
	617
	618	07 March 2008
	619
	620	Author:
	621
	622	FORTRAN77 original version by Cleve Moler.
	623	C version by Wesley Petersen.
	624
	625	Reference:
	626
	627	Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
	628	LINPACK User's Guide,
	629	SIAM, 1979,
	630	ISBN13: 978-0-898711-72-1,
	631	LC: QA214.L56.
	632
	633	Parameters:
	634
	635	Input/output, float A[LDA*N]. On input, the matrix to be factored.
	636	On output, an upper triangular matrix and the multipliers which were
	637	used to obtain it. The factorization can be written A = L * U where
	638	L is a product of permutation and unit lower triangular matrices and
	639	U is upper triangular.
	640
	641	Input, int LDA, the leading dimension of the matrix.
	642
	643	Input, int N, the order of the matrix.
	644
	645	Output, int IPVT[N], the pivot indices.
	646
	647	Output, int MSGEFA, indicates singularity.
	648	If 0, this is the normal value, and the algorithm succeeded.
	649	If K, then on the K-th elimination step, a zero pivot was encountered.
	650	The matrix is numerically not invertible.
	651	*/
	652	{
	653	int info;
	654	int k,kp1,l,nm1;
	655	float t;
	656
	657	info = 0;
	658	nm1 = n - 1;
	659	for ( k = 0; k < nm1; k++ )
	660	{
	661	kp1 = k + 1;
	662	l = isamax ( n-k, a+k+k*lda, 1 ) + k - 1;
	663	ipvt[k] = l + 1;
	664
	665	if ( a[l+k*lda] == 0.0 )
	666	{
	667	info = k + 1;
	668	return info;
	669	}
	670
	671	if ( l != k )
	672	{
	673	t = a[l+k*lda];
	674	a[l+klda] = a[k+klda];
	675	a[k+k*lda] = t;
	676	}
	677	t = -1.0 / a[k+k*lda];
	678	sscal ( n-k-1, t, a+kp1+k*lda, 1 );
	679	/*
	680	Interchange the pivot row and the K-th row.
	681	*/
	682	if ( l != k )
	683	{
	684	sswap ( n-k-1, a+l+kp1lda, lda, a+k+kp1lda, lda );
	685	}
	686	/*
	687	Add multiples of the K-th row to rows K+1 through N.
	688	*/
	689	msaxpy ( n-k-1, n-k-1, a+k+kp1lda, n, a+kp1+klda, a+kp1+kp1*lda );
	690	}
	691
	692	ipvt[n-1] = n;
	693
	694	if ( a[n-1+(n-1)*lda] == 0.0 )
	695	{
	696	info = n;
	697	}
	698
	699	return info;
	700	}
	701	/******************************************************************************/
	702
	703	int msgefa2 ( float a[], int lda, int n, int ipvt[] )
	704
	705	/******************************************************************************/
	706	/*
	707	Purpose:
	708
	709	MSGEFA2 factors a matrix by gaussian elimination.
	710
	711	Discussion:
	712
	713	Matrix references which would, mathematically, be written A(I,J)
	714	must be written here as:
	715	* A[I+J*LDA], when the value is needed, or
	716	* A+I+J*LDA, when the address is needed.
	717
	718	This variant of SGEFA uses OpenMP for improved parallel execution.
	719	The step in which multiples of the pivot row are added to individual
	720	rows has been replaced by a single call which updates the entire
	721	matrix sub-block.
	722
	723	Modified:
	724
	725	07 March 2008
	726
	727	Author:
	728
	729	FORTRAN77 original version by Cleve Moler.
	730	C version by Wesley Petersen.
	731
	732	Reference:
	733
	734	Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
	735	LINPACK User's Guide,
	736	SIAM, 1979,
	737	ISBN13: 978-0-898711-72-1,
	738	LC: QA214.L56.
	739
	740	Parameters:
	741
	742	Input/output, float A[LDA*N]. On input, the matrix to be factored.
	743	On output, an upper triangular matrix and the multipliers which were
	744	used to obtain it. The factorization can be written A = L * U where
	745	L is a product of permutation and unit lower triangular matrices and
	746	U is upper triangular.
	747
	748	Input, int LDA, the leading dimension of the matrix.
	749
	750	Input, int N, the order of the matrix.
	751
	752	Output, int IPVT[N], the pivot indices.
	753
	754	Output, int MSGEFA, indicates singularity.
	755	If 0, this is the normal value, and the algorithm succeeded.
	756	If K, then on the K-th elimination step, a zero pivot was encountered.
	757	The matrix is numerically not invertible.
	758	*/
	759	{
	760	int info;
	761	int k,kp1,l,nm1;
	762	float t;
	763
	764	info = 0;
	765	nm1 = n - 1;
	766	for ( k = 0; k < nm1; k++ )
	767	{
	768	kp1 = k + 1;
	769	l = isamax ( n-k, a+k+k*lda, 1 ) + k - 1;
	770	ipvt[k] = l + 1;
	771
	772	if ( a[l+k*lda] == 0.0 )
	773	{
	774	info = k + 1;
	775	return info;
	776	}
	777
	778	if ( l != k )
	779	{
	780	t = a[l+k*lda];
	781	a[l+klda] = a[k+klda];
	782	a[k+k*lda] = t;
	783	}
	784	t = -1.0 / a[k+k*lda];
	785	sscal ( n-k-1, t, a+kp1+k*lda, 1 );
	786	/*
	787	Interchange the pivot row and the K-th row.
	788	*/
	789	if ( l != k )
	790	{
	791	sswap ( n-k-1, a+l+kp1lda, lda, a+k+kp1lda, lda );
	792	}
	793	/*
	794	Add multiples of the K-th row to rows K+1 through N.
	795	*/
	796	msaxpy2 ( n-k-1, n-k-1, a+k+kp1lda, n, a+kp1+klda, a+kp1+kp1*lda );
	797	}
	798
	799	ipvt[n-1] = n;
	800
	801	if ( a[n-1+(n-1)*lda] == 0.0 )
	802	{
	803	info = n;
	804	}
	805
	806	return info;
	807	}
	808	/******************************************************************************/
	809
	810	void saxpy ( int n, float a, float x[], int incx, float y[], int incy )
	811
	812	/******************************************************************************/
	813	/*
	814	Purpose:
	815
	816	SAXPY computes float constant times a vector plus a vector.
	817
	818	Discussion:
	819
	820	This routine uses unrolled loops for increments equal to one.
	821
	822	Modified:
	823
	824	23 February 2006
	825
	826	Author:
	827
	828	FORTRAN77 original version by Dongarra, Moler, Bunch, Stewart.
	829	C version by John Burkardt
	830
	831	Reference:
	832
	833	Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
	834	LINPACK User's Guide,
	835	SIAM, 1979,
	836	ISBN13: 978-0-898711-72-1,
	837	LC: QA214.L56.
	838
	839	Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
	840	Basic Linear Algebra Subprograms for Fortran Usage,
	841	Algorithm 539,
	842	ACM Transactions on Mathematical Software,
	843	Volume 5, Number 3, September 1979, pages 308-323.
	844
	845	Parameters:
	846
	847	Input, int N, the number of elements in X and Y.
	848
	849	Input, float A, the multiplier of X.
	850
	851	Input, float X[*], the first vector.
	852
	853	Input, int INCX, the increment between successive entries of X.
	854
	855	Input/output, float Y[*], the second vector.
	856	On output, Y[] has been replaced by Y[] + A * X[*].
	857
	858	Input, int INCY, the increment between successive entries of Y.
	859	*/
	860	{
	861	int i;
	862	int ix;
	863	int iy;
	864	int m;
	865
	866	if ( n <= 0 )
	867	{
	868	return;
	869	}
	870
	871	if ( a == 0.0 )
	872	{
	873	return;
	874	}
	875	/*
	876	Code for unequal increments or equal increments
	877	not equal to 1.
	878	*/
	879	if ( incx != 1 \|\| incy != 1 )
	880	{
	881	if ( 0 <= incx )
	882	{
	883	ix = 0;
	884	}
	885	else
	886	{
	887	ix = ( - n + 1 ) * incx;
	888	}
	889
	890	if ( 0 <= incy )
	891	{
	892	iy = 0;
	893	}
	894	else
	895	{
	896	iy = ( - n + 1 ) * incy;
	897	}
	898
	899	for ( i = 0; i < n; i++ )
	900	{
	901	y[iy] = y[iy] + a * x[ix];
	902	ix = ix + incx;
	903	iy = iy + incy;
	904	}
	905	}
	906	/*
	907	Code for both increments equal to 1.
	908	*/
	909	else
	910	{
	911	m = n % 4;
	912
	913	for ( i = 0; i < m; i++ )
	914	{
	915	y[i] = y[i] + a * x[i];
	916	}
	917
	918	for ( i = m; i < n; i = i + 4 )
	919	{
	920	y[i ] = y[i ] + a * x[i ];
	921	y[i+1] = y[i+1] + a * x[i+1];
	922	y[i+2] = y[i+2] + a * x[i+2];
	923	y[i+3] = y[i+3] + a * x[i+3];
	924	}
	925	}
	926
	927	return;
	928	}
	929	/******************************************************************************/
	930
	931	float sdot ( int n, float x[], int incx, float y[], int incy )
	932
	933	/******************************************************************************/
	934	/*
	935	Purpose:
	936
	937	SDOT forms the dot product of two vectors.
	938
	939	Discussion:
	940
	941	This routine uses unrolled loops for increments equal to one.
	942
	943	Modified:
	944
	945	23 February 2006
	946
	947	Author:
	948
	949	FORTRAN77 original version by Dongarra, Moler, Bunch, Stewart
	950	C version by John Burkardt
	951
	952	Reference:
	953
	954	Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
	955	LINPACK User's Guide,
	956	SIAM, 1979.
	957
	958	Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
	959	Basic Linear Algebra Subprograms for Fortran Usage,
	960	Algorithm 539,
	961	ACM Transactions on Mathematical Software,
	962	Volume 5, Number 3, September 1979, pages 308-323.
	963
	964	Parameters:
	965
	966	Input, int N, the number of entries in the vectors.
	967
	968	Input, float X[*], the first vector.
	969
	970	Input, int INCX, the increment between successive entries in X.
	971
	972	Input, float Y[*], the second vector.
	973
	974	Input, int INCY, the increment between successive entries in Y.
	975
	976	Output, float SDOT, the sum of the product of the corresponding
	977	entries of X and Y.
	978	*/
	979	{
	980	int i;
	981	int ix;
	982	int iy;
	983	int m;
	984	float temp;
	985
	986	temp = 0.0;
	987
	988	if ( n <= 0 )
	989	{
	990	return temp;
	991	}
	992	/*
	993	Code for unequal increments or equal increments
	994	not equal to 1.
	995	*/
	996	if ( incx != 1 \|\| incy != 1 )
	997	{
	998	if ( 0 <= incx )
	999	{
	1000	ix = 0;
	1001	}
	1002	else
	1003	{
	1004	ix = ( - n + 1 ) * incx;
	1005	}
	1006
	1007	if ( 0 <= incy )
	1008	{
	1009	iy = 0;
	1010	}
	1011	else
	1012	{
	1013	iy = ( - n + 1 ) * incy;
	1014	}
	1015
	1016	for ( i = 0; i < n; i++ )
	1017	{
	1018	temp = temp + x[ix] * y[iy];
	1019	ix = ix + incx;
	1020	iy = iy + incy;
	1021	}
	1022	}
	1023	/*
	1024	Code for both increments equal to 1.
	1025	*/
	1026	else
	1027	{
	1028	m = n % 5;
	1029
	1030	for ( i = 0; i < m; i++ )
	1031	{
	1032	temp = temp + x[i] * y[i];
	1033	}
	1034
	1035	for ( i = m; i < n; i = i + 5 )
	1036	{
	1037	temp = temp + x[i ] * y[i ]
	1038	+ x[i+1] * y[i+1]
	1039	+ x[i+2] * y[i+2]
	1040	+ x[i+3] * y[i+3]
	1041	+ x[i+4] * y[i+4];
	1042	}
	1043	}
	1044
	1045	return temp;
	1046	}
	1047	/*******************************************************************************/
	1048
	1049	int sgefa ( float a[], int lda, int n, int ipvt[] )
	1050
	1051	/*******************************************************************************/
	1052	/*
	1053	Purpose:
	1054
	1055	SGEFA factors a matrix by gaussian elimination.
	1056
	1057	Discussion:
	1058
	1059	Matrix references which would, mathematically, be written A(I,J)
	1060	must be written here as:
	1061	* A[I+J*LDA], when the value is needed, or
	1062	* A+I+J*LDA, when the address is needed.
	1063
	1064	Modified:
	1065
	1066	07 March 2008
	1067
	1068	Author:
	1069
	1070	FORTRAN77 original version by Cleve Moler.
	1071	C version by John Burkardt.
	1072
	1073	Reference:
	1074
	1075	Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
	1076	LINPACK User's Guide,
	1077	SIAM, 1979,
	1078	ISBN13: 978-0-898711-72-1,
	1079	LC: QA214.L56.
	1080
	1081	Parameters:
	1082
	1083	Input/output, float A[LDA*N]. On input, the matrix to be factored.
	1084	On output, an upper triangular matrix and the multipliers which were
	1085	used to obtain it. The factorization can be written A = L * U where
	1086	L is a product of permutation and unit lower triangular matrices and
	1087	U is upper triangular.
	1088
	1089	Input, int LDA, the leading dimension of the matrix.
	1090
	1091	Input, int N, the order of the matrix.
	1092
	1093	Output, int IPVT[N], the pivot indices.
	1094
	1095	Output, int SGEFA, indicates singularity.
	1096	If 0, this is the normal value, and the algorithm succeeded.
	1097	If K, then on the K-th elimination step, a zero pivot was encountered.
	1098	The matrix is numerically not invertible.
	1099	*/
	1100	{
	1101	int j;
	1102	int info;
	1103	int k;
	1104	int kp1;
	1105	int l;
	1106	int nm1;
	1107	float t;
	1108
	1109	info = 0;
	1110
	1111	for ( k = 1; k <= n - 1; k++ )
	1112	{
	1113	/*
	1114	Find l = pivot index.
	1115	*/
	1116	l = isamax ( n-k+1, &a[k-1+(k-1)*lda], 1 ) + k - 1;
	1117	ipvt[k-1] = l;
	1118	/*
	1119	Zero pivot implies this column already triangularized.
	1120	*/
	1121	if ( a[l-1+(k-1)*lda] != 0.0 )
	1122	{
	1123	/*
	1124	Interchange if necessary.
	1125	*/
	1126	if ( l != k )
	1127	{
	1128	t = a[l-1+(k-1)*lda];
	1129	a[l-1+(k-1)lda] = a[k-1+(k-1)lda];
	1130	a[k-1+(k-1)*lda] = t;
	1131	}
	1132	/*
	1133	Compute multipliers.
	1134	*/
	1135	t = - 1.0 / a[k-1+(k-1)*lda];
	1136	sscal ( n-k, t, &a[k+(k-1)*lda], 1 );
	1137	/*
	1138	Row elimination with column indexing.
	1139	*/
	1140	for ( j = k + 1; j <= n; j++ )
	1141	{
	1142	t = a[l-1+(j-1)*lda];
	1143	if (l != k)
	1144	{
	1145	a[l-1+(j-1)lda] = a[k-1+(j-1)lda];
	1146	a[k-1+(j-1)*lda] = t;
	1147	}
	1148	saxpy ( n-k, t, &a[k+(k-1)lda], 1, &a[k+(j-1)lda], 1 );
	1149	}
	1150	}
	1151	else
	1152	{
	1153	info = k;
	1154	}
	1155	}
	1156	ipvt[n-1] = n;
	1157
	1158	if (a[n-1+(n-1)*lda] == 0.0 )
	1159	{
	1160	info = n - 1;
	1161	}
	1162	return info;
	1163	}
	1164	/******************************************************************************/
	1165
	1166	void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job )
	1167
	1168	/******************************************************************************/
	1169	/*
	1170	Purpose:
	1171
	1172	SGESL solves a real general linear system A * X = B.
	1173
	1174	Discussion:
	1175
	1176	SGESL can solve either of the systems A * X = B or A' * X = B.
	1177
	1178	The system matrix must have been factored by SGECO or SGEFA.
	1179
	1180	A division by zero will occur if the input factor contains a
	1181	zero on the diagonal. Technically this indicates singularity
	1182	but it is often caused by improper arguments or improper
	1183	setting of LDA. It will not occur if the subroutines are
	1184	called correctly and if SGECO has set 0.0 < RCOND
	1185	or SGEFA has set INFO == 0.
	1186
	1187	Modified:
	1188
	1189	04 April 2006
	1190
	1191	Author:
	1192
	1193	FORTRAN77 original by Dongarra, Moler, Bunch and Stewart.
	1194	C translation by John Burkardt.
	1195
	1196	Reference:
	1197
	1198	Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
	1199	LINPACK User's Guide,
	1200	SIAM, (Society for Industrial and Applied Mathematics),
	1201	3600 University City Science Center,
	1202	Philadelphia, PA, 19104-2688.
	1203	ISBN: 0-89871-172-X
	1204
	1205	Parameters:
	1206
	1207	Input, float A[LDA*N], the output from SGECO or SGEFA.
	1208
	1209	Input, int LDA, the leading dimension of A.
	1210
	1211	Input, int N, the order of the matrix A.
	1212
	1213	Input, int IPVT[N], the pivot vector from SGECO or SGEFA.
	1214
	1215	Input/output, float B[N].
	1216	On input, the right hand side vector.
	1217	On output, the solution vector.
	1218
	1219	Input, int JOB.
	1220	0, solve A * X = B;
	1221	nonzero, solve A' * X = B.
	1222	*/
	1223	{
	1224	int k;
	1225	int l;
	1226	float t;
	1227	/*
	1228	Solve A * X = B.
	1229	*/
	1230	if ( job == 0 )
	1231	{
	1232	for ( k = 1; k <= n-1; k++ )
	1233	{
	1234	l = ipvt[k-1];
	1235	t = b[l-1];
	1236
	1237	if ( l != k )
	1238	{
	1239	b[l-1] = b[k-1];
	1240	b[k-1] = t;
	1241	}
	1242	saxpy ( n-k, t, a+k+(k-1)*lda, 1, b+k, 1 );
	1243	}
	1244
	1245	for ( k = n; 1 <= k; k-- )
	1246	{
	1247	b[k-1] = b[k-1] / a[k-1+(k-1)*lda];
	1248	t = -b[k-1];
	1249	saxpy ( k-1, t, a+0+(k-1)*lda, 1, b, 1 );
	1250	}
	1251	}
	1252	/*
	1253	Solve A' * X = B.
	1254	*/
	1255	else
	1256	{
	1257	for ( k = 1; k <= n; k++ )
	1258	{
	1259	t = sdot ( k-1, a+0+(k-1)*lda, 1, b, 1 );
	1260	b[k-1] = ( b[k-1] - t ) / a[k-1+(k-1)*lda];
	1261	}
	1262
	1263	for ( k = n-1; 1 <= k; k-- )
	1264	{
	1265	b[k-1] = b[k-1] + sdot ( n-k, a+k+(k-1)*lda, 1, b+k, 1 );
	1266	l = ipvt[k-1];
	1267
	1268	if ( l != k )
	1269	{
	1270	t = b[l-1];
	1271	b[l-1] = b[k-1];
	1272	b[k-1] = t;
	1273	}
	1274	}
	1275	}
	1276	return;
	1277	}
	1278	/******************************************************************************/
	1279
	1280	void sscal ( int n, float sa, float x[], int incx )
	1281
	1282	/******************************************************************************/
	1283	/*
	1284	Purpose:
	1285
	1286	SSCAL scales a float vector by a constant.
	1287
	1288	Modified:
	1289
	1290	23 February 2006
	1291
	1292	Author:
	1293
	1294	Jack Dongarra
	1295	C version by John Burkardt
	1296
	1297	Reference:
	1298
	1299	Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
	1300	LINPACK User's Guide,
	1301	SIAM, 1979,
	1302	ISBN13: 978-0-898711-72-1,
	1303	LC: QA214.L56.
	1304
	1305	Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
	1306	Basic Linear Algebra Subprograms for Fortran Usage,
	1307	Algorithm 539,
	1308	ACM Transactions on Mathematical Software,
	1309	Volume 5, Number 3, September 1979, pages 308-323.
	1310
	1311	Parameters:
	1312
	1313	Input, int N, the number of entries in the vector.
	1314
	1315	Input, float SA, the multiplier.
	1316
	1317	Input/output, float X[*], the vector to be scaled.
	1318
	1319	Input, int INCX, the increment between successive entries of X.
	1320	*/
	1321	{
	1322	int i;
	1323	int ix;
	1324	int m;
	1325
	1326	if ( n <= 0 )
	1327	{
	1328	}
	1329	else if ( incx == 1 )
	1330	{
	1331	m = n % 5;
	1332
	1333	for ( i = 0; i < m; i++ )
	1334	{
	1335	x[i] = sa * x[i];
	1336	}
	1337
	1338	for ( i = m; i < n; i = i + 5 )
	1339	{
	1340	x[i] = sa * x[i];
	1341	x[i+1] = sa * x[i+1];
	1342	x[i+2] = sa * x[i+2];
	1343	x[i+3] = sa * x[i+3];
	1344	x[i+4] = sa * x[i+4];
	1345	}
	1346	}
	1347	else
	1348	{
	1349	if ( 0 <= incx )
	1350	{
	1351	ix = 0;
	1352	}
	1353	else
	1354	{
	1355	ix = ( - n + 1 ) * incx;
	1356	}
	1357
	1358	for ( i = 0; i < n; i++ )
	1359	{
	1360	x[ix] = sa * x[ix];
	1361	ix = ix + incx;
	1362	}
	1363
	1364	}
	1365
	1366	return;
	1367	}
	1368	/******************************************************************************/
	1369
	1370	void sswap ( int n, float x[], int incx, float y[], int incy )
	1371
	1372	/******************************************************************************/
	1373	/*
	1374	Purpose:
	1375
	1376	SSWAP interchanges two float vectors.
	1377
	1378	Modified:
	1379
	1380	23 February 2006
	1381
	1382	Author:
	1383
	1384	C version by John Burkardt
	1385
	1386	Reference:
	1387
	1388	Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
	1389	LINPACK User's Guide,
	1390	SIAM, 1979,
	1391	ISBN13: 978-0-898711-72-1,
	1392	LC: QA214.L56.
	1393
	1394	Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
	1395	Basic Linear Algebra Subprograms for Fortran Usage,
	1396	Algorithm 539,
	1397	ACM Transactions on Mathematical Software,
	1398	Volume 5, Number 3, September 1979, pages 308-323.
	1399
	1400	Parameters:
	1401
	1402	Input, int N, the number of entries in the vectors.
	1403
	1404	Input/output, float X[*], one of the vectors to swap.
	1405
	1406	Input, int INCX, the increment between successive entries of X.
	1407
	1408	Input/output, float Y[*], one of the vectors to swap.
	1409
	1410	Input, int INCY, the increment between successive elements of Y.
	1411	*/
	1412	{
	1413	int i;
	1414	int ix;
	1415	int iy;
	1416	int m;
	1417	float temp;
	1418
	1419	if ( n <= 0 )
	1420	{
	1421	}
	1422	else if ( incx == 1 && incy == 1 )
	1423	{
	1424	m = n % 3;
	1425
	1426	for ( i = 0; i < m; i++ )
	1427	{
	1428	temp = x[i];
	1429	x[i] = y[i];
	1430	y[i] = temp;
	1431	}
	1432
	1433	for ( i = m; i < n; i = i + 3 )
	1434	{
	1435	temp = x[i];
	1436	x[i] = y[i];
	1437	y[i] = temp;
	1438
	1439	temp = x[i+1];
	1440	x[i+1] = y[i+1];
	1441	y[i+1] = temp;
	1442
	1443	temp = x[i+2];
	1444	x[i+2] = y[i+2];
	1445	y[i+2] = temp;
	1446	}
	1447	}
	1448	else
	1449	{
	1450	if ( 0 <= incx )
	1451	{
	1452	ix = 0;
	1453	}
	1454	else
	1455	{
	1456	ix = ( - n + 1 ) * incx;
	1457	}
	1458
	1459	if ( 0 <= incy )
	1460	{
	1461	iy = 0;
	1462	}
	1463	else
	1464	{
	1465	iy = ( - n + 1 ) * incy;
	1466	}
	1467
	1468	for ( i = 0; i < n; i++ )
	1469	{
	1470	temp = x[ix];
	1471	x[ix] = y[iy];
	1472	y[iy] = temp;
	1473	ix = ix + incx;
	1474	iy = iy + incy;
	1475	}
	1476	}
	1477	return;
	1478	}
	1479	/******************************************************************************/
	1480
	1481	void timestamp ( void )
	1482
	1483	/******************************************************************************/
	1484	/*
	1485	Purpose:
	1486
	1487	TIMESTAMP prints the current YMDHMS date as a time stamp.
	1488
	1489	Example:
	1490
	1491	31 May 2001 09:45:54 AM
	1492
	1493	Modified:
	1494
	1495	24 September 2003
	1496
	1497	Author:
	1498
	1499	John Burkardt
	1500
	1501	Parameters:
	1502
	1503	None
	1504	*/
	1505	{
	1506	# define TIME_SIZE 40
	1507
	1508	static char time_buffer[TIME_SIZE];
	1509	const struct tm *tm;
	1510	size_t len;
	1511	time_t now;
	1512
[e479e1e]	1513	//now = time ( NULL );
	1514	//tm = localtime ( &now );
[5f600f6]	1515
[e479e1e]	1516	//len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
[5f600f6]	1517
[e479e1e]	1518	//printf ( "%s\n", time_buffer );
[5f600f6]	1519
	1520	return;
	1521	# undef TIME_SIZE
	1522	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/omp/sgefa_openmp.c

Download in other formats: