Context Navigation

csr_matvec.c

main

Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 18.5 KB

Line
1	/BHEADER*********************************************************************
2	* Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3	* Produced at the Lawrence Livermore National Laboratory.
4	* This file is part of HYPRE. See file COPYRIGHT for details.
5	*
6	* HYPRE is free software; you can redistribute it and/or modify it under the
7	* terms of the GNU Lesser General Public License (as published by the Free
8	* Software Foundation) version 2.1 dated February 1999.
9	*
10	* $Revision: 2.4 $
11	**********************************************************************EHEADER/
12
13
14	/******************************************************************************
15	*
16	* Matvec functions for hypre_CSRMatrix class.
17	*
18	*****************************************************************************/
19
20	#include "headers.h"
21	#include <assert.h>
22
23	/*--------------------------------------------------------------------------
24	* hypre_CSRMatrixMatvec
25	--------------------------------------------------------------------------/
26
27	int
28	hypre_CSRMatrixMatvec( double alpha,
29	hypre_CSRMatrix *A,
30	hypre_Vector *x,
31	double beta,
32	hypre_Vector *y )
33	{
34	double *A_data = hypre_CSRMatrixData(A);
35	int *A_i = hypre_CSRMatrixI(A);
36	int *A_j = hypre_CSRMatrixJ(A);
37	int num_rows = hypre_CSRMatrixNumRows(A);
38	int num_cols = hypre_CSRMatrixNumCols(A);
39
40	int *A_rownnz = hypre_CSRMatrixRownnz(A);
41	int num_rownnz = hypre_CSRMatrixNumRownnz(A);
42
43	double *x_data = hypre_VectorData(x);
44	double *y_data = hypre_VectorData(y);
45	int x_size = hypre_VectorSize(x);
46	int y_size = hypre_VectorSize(y);
47	int num_vectors = hypre_VectorNumVectors(x);
48	int idxstride_y = hypre_VectorIndexStride(y);
49	int vecstride_y = hypre_VectorVectorStride(y);
50	int idxstride_x = hypre_VectorIndexStride(x);
51	int vecstride_x = hypre_VectorVectorStride(x);
52
53	double temp, tempx;
54
55	int i, j, jj;
56
57	int m;
58
59	double xpar=0.7;
60
61	int ierr = 0;
62
63
64	/*---------------------------------------------------------------------
65	* Check for size compatibility. Matvec returns ierr = 1 if
66	* length of X doesn't equal the number of columns of A,
67	* ierr = 2 if the length of Y doesn't equal the number of rows
68	* of A, and ierr = 3 if both are true.
69	*
70	* Because temporary vectors are often used in Matvec, none of
71	* these conditions terminates processing, and the ierr flag
72	* is informational only.
73	--------------------------------------------------------------------/
74
75	//hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
76
77	if (num_cols != x_size)
78	ierr = 1;
79
80	if (num_rows != y_size)
81	ierr = 2;
82
83	if (num_cols != x_size && num_rows != y_size)
84	ierr = 3;
85
86	/*-----------------------------------------------------------------------
87	* Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
88	-----------------------------------------------------------------------/
89
90	if (alpha == 0.0)
91	{
92	#ifdef HYPRE_USING_OPENMP
93	#pragma omp parallel for private(i) schedule(static)
94	#endif
95	for (i = 0; i < num_rows*num_vectors; i++)
96	y_data[i] *= beta;
97
98	return ierr;
99	}
100
101	/*-----------------------------------------------------------------------
102	* y = (beta/alpha)*y
103	-----------------------------------------------------------------------/
104
105	temp = beta / alpha;
106
107	if (temp != 1.0)
108	{
109	if (temp == 0.0)
110	{
111	#ifdef HYPRE_USING_OPENMP
112	#pragma omp parallel for private(i) schedule(static)
113	#endif
114	for (i = 0; i < num_rows*num_vectors; i++)
115	y_data[i] = 0.0;
116	}
117	else
118	{
119	#ifdef HYPRE_USING_OPENMP
120	#pragma omp parallel for private(i) schedule(static)
121	#endif
122	for (i = 0; i < num_rows*num_vectors; i++)
123	y_data[i] *= temp;
124	}
125	}
126
127	/*-----------------------------------------------------------------
128	* y += A*x
129	-----------------------------------------------------------------/
130
131	if (num_rownnz < xpar*(num_rows))
132	{
133
134	/* use rownnz pointer to do the Ax multiplication when num_rownnz is smaller than num_rows /
135	#ifdef HYPRE_USING_OPENMP
136	#pragma omp parallel for private(i,jj,j,m,tempx) schedule(static)
137	#endif
138	for (i = 0; i < num_rownnz; i++)
139	{
140	m = A_rownnz[i];
141
142	/*
143	* for (jj = A_i[m]; jj < A_i[m+1]; jj++)
144	* {
145	* j = A_j[jj];
146	* y_data[m] += A_data[jj] * x_data[j];
147	* } */
148	if ( num_vectors==1 )
149	{
150	tempx = y_data[m];
151	for (jj = A_i[m]; jj < A_i[m+1]; jj++)
152	tempx += A_data[jj] * x_data[A_j[jj]];
153	y_data[m] = tempx;
154	}
155	else
156	for ( j=0; j<num_vectors; ++j )
157	{
158	tempx = y_data[ jvecstride_y + midxstride_y ];
159	for (jj = A_i[m]; jj < A_i[m+1]; jj++)
160	tempx += A_data[jj] * x_data[ jvecstride_x + A_j[jj]idxstride_x ];
161	y_data[ jvecstride_y + midxstride_y] = tempx;
162	}
163	}
164
165	}
166	else
167	{
168	#ifdef HYPRE_USING_OPENMP
169	#pragma omp parallel for private(i,jj,temp,j) schedule(static)
170	#endif
171	for (i = 0; i < num_rows; i++)
172	{
173	if ( num_vectors==1 )
174	{
175	temp = y_data[i];
176	for (jj = A_i[i]; jj < A_i[i+1]; jj++)
177	temp += A_data[jj] * x_data[A_j[jj]];
178	y_data[i] = temp;
179	}
180	else
181	for ( j=0; j<num_vectors; ++j )
182	{
183	temp = y_data[ jvecstride_y + iidxstride_y ];
184	for (jj = A_i[i]; jj < A_i[i+1]; jj++)
185	{
186	temp += A_data[jj] * x_data[ jvecstride_x + A_j[jj]idxstride_x ];
187	}
188	y_data[ jvecstride_y + iidxstride_y ] = temp;
189	}
190	}
191	}
192
193
194	/*-----------------------------------------------------------------
195	* y = alpha*y
196	-----------------------------------------------------------------/
197
198	if (alpha != 1.0)
199	{
200	#ifdef HYPRE_USING_OPENMP
201	#pragma omp parallel for private(i) schedule(static)
202	#endif
203	for (i = 0; i < num_rows*num_vectors; i++)
204	y_data[i] *= alpha;
205	}
206
207	return ierr;
208	}
209
210	/*--------------------------------------------------------------------------
211	* hypre_CSRMatrixMatvecT
212	*
213	* This version is using a different (more efficient) threading scheme
214
215	* Performs y <- alpha * A^T * x + beta * y
216	*
217	* From Van Henson's modification of hypre_CSRMatrixMatvec.
218	--------------------------------------------------------------------------/
219
220	int
221	hypre_CSRMatrixMatvecT( double alpha,
222	hypre_CSRMatrix *A,
223	hypre_Vector *x,
224	double beta,
225	hypre_Vector *y )
226	{
227	double *A_data = hypre_CSRMatrixData(A);
228	int *A_i = hypre_CSRMatrixI(A);
229	int *A_j = hypre_CSRMatrixJ(A);
230	int num_rows = hypre_CSRMatrixNumRows(A);
231	int num_cols = hypre_CSRMatrixNumCols(A);
232
233	double *x_data = hypre_VectorData(x);
234	double *y_data = hypre_VectorData(y);
235	int x_size = hypre_VectorSize(x);
236	int y_size = hypre_VectorSize(y);
237	int num_vectors = hypre_VectorNumVectors(x);
238	int idxstride_y = hypre_VectorIndexStride(y);
239	int vecstride_y = hypre_VectorVectorStride(y);
240	int idxstride_x = hypre_VectorIndexStride(x);
241	int vecstride_x = hypre_VectorVectorStride(x);
242
243	double temp;
244
245	double *y_data_expand = NULL;
246	int offset = 0;
247	#ifdef HYPRE_USING_OPENMP
248	int my_thread_num = 0;
249	#endif
250
251	int i, j, jv, jj;
252	int num_threads;
253
254	int ierr = 0;
255
256	/*---------------------------------------------------------------------
257	* Check for size compatibility. MatvecT returns ierr = 1 if
258	* length of X doesn't equal the number of rows of A,
259	* ierr = 2 if the length of Y doesn't equal the number of
260	* columns of A, and ierr = 3 if both are true.
261	*
262	* Because temporary vectors are often used in MatvecT, none of
263	* these conditions terminates processing, and the ierr flag
264	* is informational only.
265	--------------------------------------------------------------------/
266
267	//hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
268
269	if (num_rows != x_size)
270	ierr = 1;
271
272	if (num_cols != y_size)
273	ierr = 2;
274
275	if (num_rows != x_size && num_cols != y_size)
276	ierr = 3;
277	/*-----------------------------------------------------------------------
278	* Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
279	-----------------------------------------------------------------------/
280
281	if (alpha == 0.0)
282	{
283	#ifdef HYPRE_USING_OPENMP
284	#pragma omp parallel for private(i) schedule(static)
285	#endif
286	for (i = 0; i < num_cols*num_vectors; i++)
287	y_data[i] *= beta;
288
289	return ierr;
290	}
291
292	/*-----------------------------------------------------------------------
293	* y = (beta/alpha)*y
294	-----------------------------------------------------------------------/
295
296	temp = beta / alpha;
297
298	if (temp != 1.0)
299	{
300	if (temp == 0.0)
301	{
302	#ifdef HYPRE_USING_OPENMP
303	#pragma omp parallel for private(i) schedule(static)
304	#endif
305	for (i = 0; i < num_cols*num_vectors; i++)
306	y_data[i] = 0.0;
307	}
308	else
309	{
310	#ifdef HYPRE_USING_OPENMP
311	#pragma omp parallel for private(i) schedule(static)
312	#endif
313	for (i = 0; i < num_cols*num_vectors; i++)
314	y_data[i] *= temp;
315	}
316	}
317
318	/*-----------------------------------------------------------------
319	* y += A^T*x
320	-----------------------------------------------------------------/
321	num_threads = hypre_NumThreads();
322	if (num_threads > 1)
323	{
324	y_data_expand = hypre_CTAlloc(double, num_threads*y_size);
325
326	if ( num_vectors==1 )
327	{
328
329	#ifdef HYPRE_USING_OPENMP
330	#pragma omp parallel private(i,jj,j, my_thread_num, offset)
331	{
332	my_thread_num = omp_get_thread_num();
333	offset = y_size*my_thread_num;
334	#pragma omp for schedule(static)
335	#endif
336	for (i = 0; i < num_rows; i++)
337	{
338	for (jj = A_i[i]; jj < A_i[i+1]; jj++)
339	{
340	j = A_j[jj];
341	y_data_expand[offset + j] += A_data[jj] * x_data[i];
342	}
343	}
344	#ifdef HYPRE_USING_OPENMP
345	/* implied barrier */
346	#pragma omp for schedule(static)
347	#endif
348	for (i = 0; i < y_size; i++)
349	{
350	for (j = 0; j < num_threads; j++)
351	{
352	y_data[i] += y_data_expand[j*y_size + i];
353	/y_data_expand[jy_size + i] = 0; //zero out for next time */
354	}
355	}
356	#ifdef HYPRE_USING_OPENMP
357	} /* end parallel region */
358	#endif
359	hypre_TFree(y_data_expand);
360	}
361	else
362	{
363	/* MULTIPLE VECTORS NOT THREADED YET */
364	for (i = 0; i < num_rows; i++)
365	{
366	for ( jv=0; jv<num_vectors; ++jv )
367	{
368	for (jj = A_i[i]; jj < A_i[i+1]; jj++)
369	{
370	j = A_j[jj];
371	y_data[ jidxstride_y + jvvecstride_y ] +=
372	A_data[jj] * x_data[ iidxstride_x + jvvecstride_x];
373	}
374	}
375	}
376	}
377
378	hypre_TFree(y_data_expand);
379	}
380	else
381	{
382	for (i = 0; i < num_rows; i++)
383	{
384	if ( num_vectors==1 )
385	{
386	for (jj = A_i[i]; jj < A_i[i+1]; jj++)
387	{
388	j = A_j[jj];
389	y_data[j] += A_data[jj] * x_data[i];
390	}
391	}
392	else
393	{
394	for ( jv=0; jv<num_vectors; ++jv )
395	{
396	for (jj = A_i[i]; jj < A_i[i+1]; jj++)
397	{
398	j = A_j[jj];
399	y_data[ jidxstride_y + jvvecstride_y ] +=
400	A_data[jj] * x_data[ iidxstride_x + jvvecstride_x ];
401	}
402	}
403	}
404	}
405	}
406	/*-----------------------------------------------------------------
407	* y = alpha*y
408	-----------------------------------------------------------------/
409
410	if (alpha != 1.0)
411	{
412	#ifdef HYPRE_USING_OPENMP
413	#pragma omp parallel for private(i) schedule(static)
414	#endif
415	for (i = 0; i < num_cols*num_vectors; i++)
416	y_data[i] *= alpha;
417	}
418
419	return ierr;
420	}
421
422
423	/*--------------------------------------------------------------------------
424	* hypre_CSRMatrixMatvec_FF
425	--------------------------------------------------------------------------/
426
427	int
428	hypre_CSRMatrixMatvec_FF( double alpha,
429	hypre_CSRMatrix *A,
430	hypre_Vector *x,
431	double beta,
432	hypre_Vector *y,
433	int *CF_marker_x,
434	int *CF_marker_y,
435	int fpt )
436	{
437	double *A_data = hypre_CSRMatrixData(A);
438	int *A_i = hypre_CSRMatrixI(A);
439	int *A_j = hypre_CSRMatrixJ(A);
440	int num_rows = hypre_CSRMatrixNumRows(A);
441	int num_cols = hypre_CSRMatrixNumCols(A);
442
443	double *x_data = hypre_VectorData(x);
444	double *y_data = hypre_VectorData(y);
445	int x_size = hypre_VectorSize(x);
446	int y_size = hypre_VectorSize(y);
447
448	double temp;
449
450	int i, jj;
451
452	int ierr = 0;
453
454
455	/*---------------------------------------------------------------------
456	* Check for size compatibility. Matvec returns ierr = 1 if
457	* length of X doesn't equal the number of columns of A,
458	* ierr = 2 if the length of Y doesn't equal the number of rows
459	* of A, and ierr = 3 if both are true.
460	*
461	* Because temporary vectors are often used in Matvec, none of
462	* these conditions terminates processing, and the ierr flag
463	* is informational only.
464	--------------------------------------------------------------------/
465
466	if (num_cols != x_size)
467	ierr = 1;
468
469	if (num_rows != y_size)
470	ierr = 2;
471
472	if (num_cols != x_size && num_rows != y_size)
473	ierr = 3;
474
475	/*-----------------------------------------------------------------------
476	* Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
477	-----------------------------------------------------------------------/
478
479	if (alpha == 0.0)
480	{
481	#ifdef HYPRE_USING_OPENMP
482	#pragma omp parallel for private(i) schedule(static)
483	#endif
484	for (i = 0; i < num_rows; i++)
485	if (CF_marker_x[i] == fpt) y_data[i] *= beta;
486
487	return ierr;
488	}
489
490	/*-----------------------------------------------------------------------
491	* y = (beta/alpha)*y
492	-----------------------------------------------------------------------/
493
494	temp = beta / alpha;
495
496	if (temp != 1.0)
497	{
498	if (temp == 0.0)
499	{
500	#ifdef HYPRE_USING_OPENMP
501	#pragma omp parallel for private(i) schedule(static)
502	#endif
503	for (i = 0; i < num_rows; i++)
504	if (CF_marker_x[i] == fpt) y_data[i] = 0.0;
505	}
506	else
507	{
508	#ifdef HYPRE_USING_OPENMP
509	#pragma omp parallel for private(i) schedule(static)
510	#endif
511	for (i = 0; i < num_rows; i++)
512	if (CF_marker_x[i] == fpt) y_data[i] *= temp;
513	}
514	}
515
516	/*-----------------------------------------------------------------
517	* y += A*x
518	-----------------------------------------------------------------/
519
520	#ifdef HYPRE_USING_OPENMP
521	#pragma omp parallel for private(i,jj,temp) schedule(static)
522	#endif
523	for (i = 0; i < num_rows; i++)
524	{
525	if (CF_marker_x[i] == fpt)
526	{
527	temp = y_data[i];
528	for (jj = A_i[i]; jj < A_i[i+1]; jj++)
529	if (CF_marker_y[A_j[jj]] == fpt) temp += A_data[jj] * x_data[A_j[jj]];
530	y_data[i] = temp;
531	}
532	}
533
534
535	/*-----------------------------------------------------------------
536	* y = alpha*y
537	-----------------------------------------------------------------/
538
539	if (alpha != 1.0)
540	{
541	#ifdef HYPRE_USING_OPENMP
542	#pragma omp parallel for private(i) schedule(static)
543	#endif
544	for (i = 0; i < num_rows; i++)
545	if (CF_marker_x[i] == fpt) y_data[i] *= alpha;
546	}
547
548	return ierr;
549	}
550

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/mpi-omp/AMG2013/seq_mv/csr_matvec.c

Download in other formats: