source: CIVL/examples/mpi-omp/AMG2013/seq_mv/csr_matvec.c

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 18.5 KB
Line 
1/*BHEADER**********************************************************************
2 * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3 * Produced at the Lawrence Livermore National Laboratory.
4 * This file is part of HYPRE. See file COPYRIGHT for details.
5 *
6 * HYPRE is free software; you can redistribute it and/or modify it under the
7 * terms of the GNU Lesser General Public License (as published by the Free
8 * Software Foundation) version 2.1 dated February 1999.
9 *
10 * $Revision: 2.4 $
11 ***********************************************************************EHEADER*/
12
13
14/******************************************************************************
15 *
16 * Matvec functions for hypre_CSRMatrix class.
17 *
18 *****************************************************************************/
19
20#include "headers.h"
21#include <assert.h>
22
23/*--------------------------------------------------------------------------
24 * hypre_CSRMatrixMatvec
25 *--------------------------------------------------------------------------*/
26
27int
28hypre_CSRMatrixMatvec( double alpha,
29 hypre_CSRMatrix *A,
30 hypre_Vector *x,
31 double beta,
32 hypre_Vector *y )
33{
34 double *A_data = hypre_CSRMatrixData(A);
35 int *A_i = hypre_CSRMatrixI(A);
36 int *A_j = hypre_CSRMatrixJ(A);
37 int num_rows = hypre_CSRMatrixNumRows(A);
38 int num_cols = hypre_CSRMatrixNumCols(A);
39
40 int *A_rownnz = hypre_CSRMatrixRownnz(A);
41 int num_rownnz = hypre_CSRMatrixNumRownnz(A);
42
43 double *x_data = hypre_VectorData(x);
44 double *y_data = hypre_VectorData(y);
45 int x_size = hypre_VectorSize(x);
46 int y_size = hypre_VectorSize(y);
47 int num_vectors = hypre_VectorNumVectors(x);
48 int idxstride_y = hypre_VectorIndexStride(y);
49 int vecstride_y = hypre_VectorVectorStride(y);
50 int idxstride_x = hypre_VectorIndexStride(x);
51 int vecstride_x = hypre_VectorVectorStride(x);
52
53 double temp, tempx;
54
55 int i, j, jj;
56
57 int m;
58
59 double xpar=0.7;
60
61 int ierr = 0;
62
63
64 /*---------------------------------------------------------------------
65 * Check for size compatibility. Matvec returns ierr = 1 if
66 * length of X doesn't equal the number of columns of A,
67 * ierr = 2 if the length of Y doesn't equal the number of rows
68 * of A, and ierr = 3 if both are true.
69 *
70 * Because temporary vectors are often used in Matvec, none of
71 * these conditions terminates processing, and the ierr flag
72 * is informational only.
73 *--------------------------------------------------------------------*/
74
75 //hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
76
77 if (num_cols != x_size)
78 ierr = 1;
79
80 if (num_rows != y_size)
81 ierr = 2;
82
83 if (num_cols != x_size && num_rows != y_size)
84 ierr = 3;
85
86 /*-----------------------------------------------------------------------
87 * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
88 *-----------------------------------------------------------------------*/
89
90 if (alpha == 0.0)
91 {
92#ifdef HYPRE_USING_OPENMP
93#pragma omp parallel for private(i) schedule(static)
94#endif
95 for (i = 0; i < num_rows*num_vectors; i++)
96 y_data[i] *= beta;
97
98 return ierr;
99 }
100
101 /*-----------------------------------------------------------------------
102 * y = (beta/alpha)*y
103 *-----------------------------------------------------------------------*/
104
105 temp = beta / alpha;
106
107 if (temp != 1.0)
108 {
109 if (temp == 0.0)
110 {
111#ifdef HYPRE_USING_OPENMP
112#pragma omp parallel for private(i) schedule(static)
113#endif
114 for (i = 0; i < num_rows*num_vectors; i++)
115 y_data[i] = 0.0;
116 }
117 else
118 {
119#ifdef HYPRE_USING_OPENMP
120#pragma omp parallel for private(i) schedule(static)
121#endif
122 for (i = 0; i < num_rows*num_vectors; i++)
123 y_data[i] *= temp;
124 }
125 }
126
127 /*-----------------------------------------------------------------
128 * y += A*x
129 *-----------------------------------------------------------------*/
130
131 if (num_rownnz < xpar*(num_rows))
132 {
133
134/* use rownnz pointer to do the A*x multiplication when num_rownnz is smaller than num_rows */
135#ifdef HYPRE_USING_OPENMP
136#pragma omp parallel for private(i,jj,j,m,tempx) schedule(static)
137#endif
138 for (i = 0; i < num_rownnz; i++)
139 {
140 m = A_rownnz[i];
141
142 /*
143 * for (jj = A_i[m]; jj < A_i[m+1]; jj++)
144 * {
145 * j = A_j[jj];
146 * y_data[m] += A_data[jj] * x_data[j];
147 * } */
148 if ( num_vectors==1 )
149 {
150 tempx = y_data[m];
151 for (jj = A_i[m]; jj < A_i[m+1]; jj++)
152 tempx += A_data[jj] * x_data[A_j[jj]];
153 y_data[m] = tempx;
154 }
155 else
156 for ( j=0; j<num_vectors; ++j )
157 {
158 tempx = y_data[ j*vecstride_y + m*idxstride_y ];
159 for (jj = A_i[m]; jj < A_i[m+1]; jj++)
160 tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ];
161 y_data[ j*vecstride_y + m*idxstride_y] = tempx;
162 }
163 }
164
165 }
166 else
167 {
168#ifdef HYPRE_USING_OPENMP
169#pragma omp parallel for private(i,jj,temp,j) schedule(static)
170#endif
171 for (i = 0; i < num_rows; i++)
172 {
173 if ( num_vectors==1 )
174 {
175 temp = y_data[i];
176 for (jj = A_i[i]; jj < A_i[i+1]; jj++)
177 temp += A_data[jj] * x_data[A_j[jj]];
178 y_data[i] = temp;
179 }
180 else
181 for ( j=0; j<num_vectors; ++j )
182 {
183 temp = y_data[ j*vecstride_y + i*idxstride_y ];
184 for (jj = A_i[i]; jj < A_i[i+1]; jj++)
185 {
186 temp += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ];
187 }
188 y_data[ j*vecstride_y + i*idxstride_y ] = temp;
189 }
190 }
191 }
192
193
194 /*-----------------------------------------------------------------
195 * y = alpha*y
196 *-----------------------------------------------------------------*/
197
198 if (alpha != 1.0)
199 {
200#ifdef HYPRE_USING_OPENMP
201#pragma omp parallel for private(i) schedule(static)
202#endif
203 for (i = 0; i < num_rows*num_vectors; i++)
204 y_data[i] *= alpha;
205 }
206
207 return ierr;
208}
209
210/*--------------------------------------------------------------------------
211 * hypre_CSRMatrixMatvecT
212 *
213 * This version is using a different (more efficient) threading scheme
214
215 * Performs y <- alpha * A^T * x + beta * y
216 *
217 * From Van Henson's modification of hypre_CSRMatrixMatvec.
218 *--------------------------------------------------------------------------*/
219
220int
221hypre_CSRMatrixMatvecT( double alpha,
222 hypre_CSRMatrix *A,
223 hypre_Vector *x,
224 double beta,
225 hypre_Vector *y )
226{
227 double *A_data = hypre_CSRMatrixData(A);
228 int *A_i = hypre_CSRMatrixI(A);
229 int *A_j = hypre_CSRMatrixJ(A);
230 int num_rows = hypre_CSRMatrixNumRows(A);
231 int num_cols = hypre_CSRMatrixNumCols(A);
232
233 double *x_data = hypre_VectorData(x);
234 double *y_data = hypre_VectorData(y);
235 int x_size = hypre_VectorSize(x);
236 int y_size = hypre_VectorSize(y);
237 int num_vectors = hypre_VectorNumVectors(x);
238 int idxstride_y = hypre_VectorIndexStride(y);
239 int vecstride_y = hypre_VectorVectorStride(y);
240 int idxstride_x = hypre_VectorIndexStride(x);
241 int vecstride_x = hypre_VectorVectorStride(x);
242
243 double temp;
244
245 double *y_data_expand = NULL;
246 int offset = 0;
247#ifdef HYPRE_USING_OPENMP
248 int my_thread_num = 0;
249#endif
250
251 int i, j, jv, jj;
252 int num_threads;
253
254 int ierr = 0;
255
256 /*---------------------------------------------------------------------
257 * Check for size compatibility. MatvecT returns ierr = 1 if
258 * length of X doesn't equal the number of rows of A,
259 * ierr = 2 if the length of Y doesn't equal the number of
260 * columns of A, and ierr = 3 if both are true.
261 *
262 * Because temporary vectors are often used in MatvecT, none of
263 * these conditions terminates processing, and the ierr flag
264 * is informational only.
265 *--------------------------------------------------------------------*/
266
267 //hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
268
269 if (num_rows != x_size)
270 ierr = 1;
271
272 if (num_cols != y_size)
273 ierr = 2;
274
275 if (num_rows != x_size && num_cols != y_size)
276 ierr = 3;
277 /*-----------------------------------------------------------------------
278 * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
279 *-----------------------------------------------------------------------*/
280
281 if (alpha == 0.0)
282 {
283#ifdef HYPRE_USING_OPENMP
284#pragma omp parallel for private(i) schedule(static)
285#endif
286 for (i = 0; i < num_cols*num_vectors; i++)
287 y_data[i] *= beta;
288
289 return ierr;
290 }
291
292 /*-----------------------------------------------------------------------
293 * y = (beta/alpha)*y
294 *-----------------------------------------------------------------------*/
295
296 temp = beta / alpha;
297
298 if (temp != 1.0)
299 {
300 if (temp == 0.0)
301 {
302#ifdef HYPRE_USING_OPENMP
303#pragma omp parallel for private(i) schedule(static)
304#endif
305 for (i = 0; i < num_cols*num_vectors; i++)
306 y_data[i] = 0.0;
307 }
308 else
309 {
310#ifdef HYPRE_USING_OPENMP
311#pragma omp parallel for private(i) schedule(static)
312#endif
313 for (i = 0; i < num_cols*num_vectors; i++)
314 y_data[i] *= temp;
315 }
316 }
317
318 /*-----------------------------------------------------------------
319 * y += A^T*x
320 *-----------------------------------------------------------------*/
321 num_threads = hypre_NumThreads();
322 if (num_threads > 1)
323 {
324 y_data_expand = hypre_CTAlloc(double, num_threads*y_size);
325
326 if ( num_vectors==1 )
327 {
328
329#ifdef HYPRE_USING_OPENMP
330#pragma omp parallel private(i,jj,j, my_thread_num, offset)
331 {
332 my_thread_num = omp_get_thread_num();
333 offset = y_size*my_thread_num;
334#pragma omp for schedule(static)
335#endif
336 for (i = 0; i < num_rows; i++)
337 {
338 for (jj = A_i[i]; jj < A_i[i+1]; jj++)
339 {
340 j = A_j[jj];
341 y_data_expand[offset + j] += A_data[jj] * x_data[i];
342 }
343 }
344#ifdef HYPRE_USING_OPENMP
345 /* implied barrier */
346#pragma omp for schedule(static)
347#endif
348 for (i = 0; i < y_size; i++)
349 {
350 for (j = 0; j < num_threads; j++)
351 {
352 y_data[i] += y_data_expand[j*y_size + i];
353 /*y_data_expand[j*y_size + i] = 0; //zero out for next time */
354 }
355 }
356#ifdef HYPRE_USING_OPENMP
357 } /* end parallel region */
358#endif
359 hypre_TFree(y_data_expand);
360 }
361 else
362 {
363 /* MULTIPLE VECTORS NOT THREADED YET */
364 for (i = 0; i < num_rows; i++)
365 {
366 for ( jv=0; jv<num_vectors; ++jv )
367 {
368 for (jj = A_i[i]; jj < A_i[i+1]; jj++)
369 {
370 j = A_j[jj];
371 y_data[ j*idxstride_y + jv*vecstride_y ] +=
372 A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x];
373 }
374 }
375 }
376 }
377
378 hypre_TFree(y_data_expand);
379 }
380 else
381 {
382 for (i = 0; i < num_rows; i++)
383 {
384 if ( num_vectors==1 )
385 {
386 for (jj = A_i[i]; jj < A_i[i+1]; jj++)
387 {
388 j = A_j[jj];
389 y_data[j] += A_data[jj] * x_data[i];
390 }
391 }
392 else
393 {
394 for ( jv=0; jv<num_vectors; ++jv )
395 {
396 for (jj = A_i[i]; jj < A_i[i+1]; jj++)
397 {
398 j = A_j[jj];
399 y_data[ j*idxstride_y + jv*vecstride_y ] +=
400 A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ];
401 }
402 }
403 }
404 }
405 }
406 /*-----------------------------------------------------------------
407 * y = alpha*y
408 *-----------------------------------------------------------------*/
409
410 if (alpha != 1.0)
411 {
412#ifdef HYPRE_USING_OPENMP
413#pragma omp parallel for private(i) schedule(static)
414#endif
415 for (i = 0; i < num_cols*num_vectors; i++)
416 y_data[i] *= alpha;
417 }
418
419 return ierr;
420}
421
422
423/*--------------------------------------------------------------------------
424 * hypre_CSRMatrixMatvec_FF
425 *--------------------------------------------------------------------------*/
426
427int
428hypre_CSRMatrixMatvec_FF( double alpha,
429 hypre_CSRMatrix *A,
430 hypre_Vector *x,
431 double beta,
432 hypre_Vector *y,
433 int *CF_marker_x,
434 int *CF_marker_y,
435 int fpt )
436{
437 double *A_data = hypre_CSRMatrixData(A);
438 int *A_i = hypre_CSRMatrixI(A);
439 int *A_j = hypre_CSRMatrixJ(A);
440 int num_rows = hypre_CSRMatrixNumRows(A);
441 int num_cols = hypre_CSRMatrixNumCols(A);
442
443 double *x_data = hypre_VectorData(x);
444 double *y_data = hypre_VectorData(y);
445 int x_size = hypre_VectorSize(x);
446 int y_size = hypre_VectorSize(y);
447
448 double temp;
449
450 int i, jj;
451
452 int ierr = 0;
453
454
455 /*---------------------------------------------------------------------
456 * Check for size compatibility. Matvec returns ierr = 1 if
457 * length of X doesn't equal the number of columns of A,
458 * ierr = 2 if the length of Y doesn't equal the number of rows
459 * of A, and ierr = 3 if both are true.
460 *
461 * Because temporary vectors are often used in Matvec, none of
462 * these conditions terminates processing, and the ierr flag
463 * is informational only.
464 *--------------------------------------------------------------------*/
465
466 if (num_cols != x_size)
467 ierr = 1;
468
469 if (num_rows != y_size)
470 ierr = 2;
471
472 if (num_cols != x_size && num_rows != y_size)
473 ierr = 3;
474
475 /*-----------------------------------------------------------------------
476 * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
477 *-----------------------------------------------------------------------*/
478
479 if (alpha == 0.0)
480 {
481#ifdef HYPRE_USING_OPENMP
482#pragma omp parallel for private(i) schedule(static)
483#endif
484 for (i = 0; i < num_rows; i++)
485 if (CF_marker_x[i] == fpt) y_data[i] *= beta;
486
487 return ierr;
488 }
489
490 /*-----------------------------------------------------------------------
491 * y = (beta/alpha)*y
492 *-----------------------------------------------------------------------*/
493
494 temp = beta / alpha;
495
496 if (temp != 1.0)
497 {
498 if (temp == 0.0)
499 {
500#ifdef HYPRE_USING_OPENMP
501#pragma omp parallel for private(i) schedule(static)
502#endif
503 for (i = 0; i < num_rows; i++)
504 if (CF_marker_x[i] == fpt) y_data[i] = 0.0;
505 }
506 else
507 {
508#ifdef HYPRE_USING_OPENMP
509#pragma omp parallel for private(i) schedule(static)
510#endif
511 for (i = 0; i < num_rows; i++)
512 if (CF_marker_x[i] == fpt) y_data[i] *= temp;
513 }
514 }
515
516 /*-----------------------------------------------------------------
517 * y += A*x
518 *-----------------------------------------------------------------*/
519
520#ifdef HYPRE_USING_OPENMP
521#pragma omp parallel for private(i,jj,temp) schedule(static)
522#endif
523 for (i = 0; i < num_rows; i++)
524 {
525 if (CF_marker_x[i] == fpt)
526 {
527 temp = y_data[i];
528 for (jj = A_i[i]; jj < A_i[i+1]; jj++)
529 if (CF_marker_y[A_j[jj]] == fpt) temp += A_data[jj] * x_data[A_j[jj]];
530 y_data[i] = temp;
531 }
532 }
533
534
535 /*-----------------------------------------------------------------
536 * y = alpha*y
537 *-----------------------------------------------------------------*/
538
539 if (alpha != 1.0)
540 {
541#ifdef HYPRE_USING_OPENMP
542#pragma omp parallel for private(i) schedule(static)
543#endif
544 for (i = 0; i < num_rows; i++)
545 if (CF_marker_x[i] == fpt) y_data[i] *= alpha;
546 }
547
548 return ierr;
549}
550
Note: See TracBrowser for help on using the repository browser.