Context Navigation

par_relax_more.c

main

Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 75.6 KB

Line
1	/BHEADER*********************************************************************
2	* Copyright (c) 2008, Lawrence Livermore National Security, LLC.
3	* Produced at the Lawrence Livermore National Laboratory.
4	* This file is part of HYPRE. See file COPYRIGHT for details.
5	*
6	* HYPRE is free software; you can redistribute it and/or modify it under the
7	* terms of the GNU Lesser General Public License (as published by the Free
8	* Software Foundation) version 2.1 dated February 1999.
9	*
10	* $Revision: 2.4 $
11	**********************************************************************EHEADER/
12
13
14
15
16	/******************************************************************************
17	*
18	* a few more relaxation schemes: Chebychev, FCF-Jacobi, CG and Steepest
19	* Descent
20	*
21	*****************************************************************************/
22
23	#include "headers.h"
24	#include "float.h"
25
26	#define DBL_EPSILON 2.2204460492503131e-16
27
28	int hypre_LINPACKcgtql1(int,double ,double ,int );
29
30	/******************************************************************************
31	*
32	*use max norm to estimate largest eigenvalue
33	*
34	*****************************************************************************/
35
36
37	int hypre_ParCSRMaxEigEstimate(hypre_ParCSRMatrix A, / matrix to relax with */
38	int scale, /* scale by diagonal?*/
39	double *max_eig)
40	{
41
42	double e_max;
43	double row_sum, max_norm;
44	double *col_val;
45	double temp;
46	double diag_value;
47
48	int pos_diag, neg_diag;
49	HYPRE_BigInt start_row, end_row;
50	int row_length;
51	HYPRE_BigInt *col_ind;
52	int j;
53	HYPRE_BigInt i;
54
55
56	/* estimate with the inf-norm of A - should be ok for SPD matrices */
57
58	start_row = hypre_ParCSRMatrixFirstRowIndex(A);
59	end_row = hypre_ParCSRMatrixLastRowIndex(A);
60
61	max_norm = 0.0;
62
63	pos_diag = neg_diag = 0;
64
65	for ( i = start_row; i <= end_row; i++ )
66	{
67	HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) A, i, &row_length, &col_ind, &col_val);
68
69	row_sum = 0.0;
70
71	for (j = 0; j < row_length; j++)
72	{
73	if (j==0) diag_value = fabs(col_val[j]);
74
75	row_sum += fabs(col_val[j]);
76
77	if ( col_ind[j] == i && col_val[j] > 0.0 ) pos_diag++;
78	if ( col_ind[j] == i && col_val[j] < 0.0 ) neg_diag++;
79	}
80	if (scale)
81	{
82	if (diag_value != 0.0)
83	row_sum = row_sum/diag_value;
84	}
85
86
87	if ( row_sum > max_norm ) max_norm = row_sum;
88
89	HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) A, i, &row_length, &col_ind, &col_val);
90	}
91
92	/* get max across procs */
93	MPI_Allreduce(&max_norm, &temp, 1, MPI_DOUBLE, MPI_MAX, hypre_ParCSRMatrixComm(A));
94	max_norm = temp;
95
96	/* from Charles */
97	if ( pos_diag == 0 && neg_diag > 0 ) max_norm = - max_norm;
98
99	/* eig estimates */
100	e_max = max_norm;
101
102	/* return */
103	*max_eig = e_max;
104
105	return hypre_error_flag;
106
107	}
108
109	/******************************************************************************
110	use CG to get the eigenvalue estimate
111	scale means get eig est of (D^{-1/2} A D^{-1/2}
112	******************************************************************************/
113
114	int hypre_ParCSRMaxEigEstimateCG(hypre_ParCSRMatrix A, / matrix to relax with */
115	int scale, /* scale by diagonal?*/
116	int max_iter,
117	double *max_eig,
118	double *min_eig)
119	{
120
121	int i, j, err;
122
123	hypre_ParVector *p;
124	hypre_ParVector *s;
125	hypre_ParVector *r;
126	hypre_ParVector *ds;
127	hypre_ParVector *u;
128
129
130	double *tridiag;
131	double *trioffd;
132
133	double lambda_max, max_row_sum;
134
135	double beta, gamma = 0.0, alpha, sdotp, gamma_old, alphainv;
136
137	double diag;
138
139	double lambda_min;
140
141	double s_data, p_data, ds_data, u_data;
142
143	int local_size = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
144
145	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
146	double *A_diag_data = hypre_CSRMatrixData(A_diag);
147	int *A_diag_i = hypre_CSRMatrixI(A_diag);
148
149
150	/* check the size of A - don't iterate more than the size */
151	HYPRE_BigInt size = hypre_ParCSRMatrixGlobalNumRows(A);
152
153	if (size < (HYPRE_BigInt) max_iter)
154	max_iter = (int) size;
155
156	/* create some temp vectors: p, s, r , ds, u*/
157
158	r = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
159	hypre_ParCSRMatrixGlobalNumRows(A),
160	hypre_ParCSRMatrixRowStarts(A));
161	hypre_ParVectorInitialize(r);
162	hypre_ParVectorSetPartitioningOwner(r,0);
163
164	p = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
165	hypre_ParCSRMatrixGlobalNumRows(A),
166	hypre_ParCSRMatrixRowStarts(A));
167	hypre_ParVectorInitialize(p);
168	hypre_ParVectorSetPartitioningOwner(p,0);
169
170
171	s = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
172	hypre_ParCSRMatrixGlobalNumRows(A),
173	hypre_ParCSRMatrixRowStarts(A));
174	hypre_ParVectorInitialize(s);
175	hypre_ParVectorSetPartitioningOwner(s,0);
176
177
178	ds = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
179	hypre_ParCSRMatrixGlobalNumRows(A),
180	hypre_ParCSRMatrixRowStarts(A));
181	hypre_ParVectorInitialize(ds);
182	hypre_ParVectorSetPartitioningOwner(ds,0);
183
184	u = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
185	hypre_ParCSRMatrixGlobalNumRows(A),
186	hypre_ParCSRMatrixRowStarts(A));
187	hypre_ParVectorInitialize(u);
188	hypre_ParVectorSetPartitioningOwner(u,0);
189
190
191	/* point to local data */
192	s_data = hypre_VectorData(hypre_ParVectorLocalVector(s));
193	p_data = hypre_VectorData(hypre_ParVectorLocalVector(p));
194	ds_data = hypre_VectorData(hypre_ParVectorLocalVector(ds));
195	u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
196
197	/* make room for tri-diag matrix */
198	tridiag = hypre_CTAlloc(double, max_iter+1);
199	trioffd = hypre_CTAlloc(double, max_iter+1);
200	for (i=0; i < max_iter + 1; i++)
201	{
202	tridiag[i] = 0;
203	trioffd[i] = 0;
204	}
205
206
207	/* set residual to random */
208	hypre_ParVectorSetRandomValues(r,1);
209
210	if (scale)
211	{
212	for (i = 0; i < local_size; i++)
213	{
214	diag = A_diag_data[A_diag_i[i]];
215	ds_data[i] = 1/sqrt(diag);
216	}
217
218	}
219	else
220	{
221	/* set ds to 1 */
222	hypre_ParVectorSetConstantValues(ds,1.0);
223	}
224
225
226	/* gamma = <r,Cr> */
227	gamma = hypre_ParVectorInnerProd(r,p);
228
229	/* for the initial filling of the tridiag matrix */
230	beta = 1.0;
231	max_row_sum = 0.0;
232
233	i = 0;
234	while (i < max_iter)
235	{
236
237	/* s = Cr /
238	/* TO DO: C = diag scale */
239	hypre_ParVectorCopy(r, s);
240
241	/gamma = <r,Cr> /
242	gamma_old = gamma;
243	gamma = hypre_ParVectorInnerProd(r,s);
244
245	if (i==0)
246	{
247	beta = 1.0;
248	/* p_0 = Cr /
249	hypre_ParVectorCopy(s, p);
250	}
251	else
252	{
253	/* beta = gamma / gamma_old */
254	beta = gamma / gamma_old;
255
256	/* p = s + beta p */
257	#ifdef HYPRE_USING_OPENMP
258	#pragma omp parallel for private(j) schedule(static)
259	#endif
260	for (j=0; j < local_size; j++)
261	{
262	p_data[j] = s_data[j] + beta*p_data[j];
263	}
264	}
265
266	if (scale)
267	{
268	/* s = D^{-1/2}AD^{-1/2}p */
269	for (j = 0; j < local_size; j++)
270	{
271	u_data[j] = ds_data[j] * p_data[j];
272	}
273	hypre_ParCSRMatrixMatvec(1.0, A, u, 0.0, s);
274	for (j = 0; j < local_size; j++)
275	{
276	s_data[j] = ds_data[j] * s_data[j];
277	}
278
279
280	}
281	else
282	{
283	/* s = Ap /
284	hypre_ParCSRMatrixMatvec(1.0, A, p, 0.0, s);
285	}
286
287	/* <s,p> */
288	sdotp = hypre_ParVectorInnerProd(s,p);
289
290	/* alpha = gamma / <s,p> */
291	alpha = gamma/sdotp;
292
293	/* get tridiagonal matrix */
294	alphainv = 1.0/alpha;
295
296	tridiag[i+1] = alphainv;
297	tridiag[i] *= beta;
298	tridiag[i] += alphainv;
299
300	trioffd[i+1] = alphainv;
301	trioffd[i] *= sqrt(beta);
302
303	/* x = x + alphap /
304	/* don't need */
305
306	/* r = r - alphas /
307	hypre_ParVectorAxpy( -alpha, s, r);
308
309	i++;
310
311	}
312
313	/* eispack routine - eigenvalues return in tridiag and ordered*/
314	hypre_LINPACKcgtql1(&i,tridiag,trioffd,&err);
315
316	lambda_max = tridiag[i-1];
317	lambda_min = tridiag[0];
318	/* printf("linpack max eig est = %g\n", lambda_max);*/
319	/* printf("linpack min eig est = %g\n", lambda_min);*/
320
321
322	hypre_ParVectorDestroy(r);
323	hypre_ParVectorDestroy(s);
324	hypre_ParVectorDestroy(p);
325	hypre_ParVectorDestroy(ds);
326	hypre_ParVectorDestroy(u);
327
328	/* return */
329	*max_eig = lambda_max;
330	*min_eig = lambda_min;
331
332	return hypre_error_flag;
333
334	}
335
336	/******************************************************************************
337
338
339	Chebyshev relaxation - iterative implementation
340	(See Saad "Iterative Methods for Sparse Systems", Alg. 12.1
341	plus we can scale residual by inv(M) = 1/diag(A) so that we have Chebyshev
342	accelerated jacobi)
343
344
345	NOT USED CURRENTLY
346
347	******************************************************************************/
348
349	int hypre_ParCSRRelax_Cheby3(hypre_ParCSRMatrix A, / matrix to relax with */
350	hypre_ParVector f, / right-hand side */
351	double max_eig, /* u.b = max. e-val est.1.1 /
352	double eig_ratio, /* l.b = max_eig/eig ratio */
353	int order, /* polynomial order */
354	hypre_ParVector u, / initial/updated approximation */
355	hypre_ParVector v / temporary vector */,
356	hypre_ParVector v2 /another temp vector */ )
357	{
358
359	/* See Saad "Iterative Methods for Sparse Systems", Alg. 12.1 */
360	/* plus we can scale residual by inv(M) = 1/diag(A) so that we have Chebyshev
361	accelerated jacobi */
362
363	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
364	double *A_diag_data = hypre_CSRMatrixData(A_diag);
365	int *A_diag_i = hypre_CSRMatrixI(A_diag);
366
367	double *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
368	double *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));
369
370	double *dk = hypre_VectorData(hypre_ParVectorLocalVector(v2));
371
372	double theta, delta, sigma;
373	double p_k, p_kp1, temp1, temp2, diag, scale;
374	double upper_bound, lower_bound;
375
376	int i, j;
377	int num_rows = hypre_CSRMatrixNumRows(A_diag);
378
379
380	/* make sure we are large enough - Adams et al. 2003 */
381	upper_bound = max_eig * 1.1;
382	lower_bound = max_eig/eig_ratio;
383
384
385	/* parameters */
386	theta = (upper_bound + lower_bound)/2;
387	delta = (upper_bound - lower_bound)/2;
388	sigma = theta/delta;
389
390	/* set v = f */
391	hypre_ParVectorCopy(f, v);
392
393	/* get residual: v = f-Au /
394	hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, v);
395
396	/* p_0*/
397	p_k = 1/sigma;
398
399	/first order /
400	temp1 = 1/theta;
401
402	/d_0 = 1/theta * inv(M)r_0 - M is Jacobi*/
403	/* x_1 = x_0 + d_0 */
404	#ifdef HYPRE_USING_OPENMP
405	#pragma omp parallel for private(i,diag,scale) schedule(static)
406	#endif
407	for (i = 0; i < num_rows; i++)
408	{
409	diag = A_diag_data[A_diag_i[i]];
410
411	scale = temp1/diag;
412	dk[i] = scale*v_data[i];
413	u_data[i] += dk[i];
414
415	}
416
417	/* higher order */
418	for (j = 1; j < order; j++)
419	{
420	/* get residual: v = f-Au /
421	hypre_ParVectorCopy(f, v);
422	hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, v);
423
424	p_kp1 = 1.0/(2.0*sigma - p_k);
425	temp1 = p_kp1*p_k;
426	temp2 = 2.0*p_kp1/delta;
427	#ifdef HYPRE_USING_OPENMP
428	#pragma omp parallel for private(i,diag,scale) schedule(static)
429	#endif
430	for (i = 0; i < num_rows; i++)
431	{
432	diag = A_diag_data[A_diag_i[i]];
433
434	scale = temp2/diag;
435	dk[i] = temp1dk[i] + scalev_data[i];
436	u_data[i] += dk[i];
437	}
438	p_k = p_kp1;
439	}
440
441
442	return hypre_error_flag;
443
444
445	}
446
447
448
449	/******************************************************************************
450
451	Chebyshev relaxation
452
453
454	Can specify order 1-4 (this is the order of the resid polynomial)- here we
455	explicitly code the coefficients (instead of
456	iteratively determining)
457
458
459	variant 0: standard chebyshev
460	this is rlx 11 if scale = 0, and 16 if scale == 1
461
462	variant 1: modified cheby: T(t)* f(t) where f(t) = (1-b/t)
463	this is rlx 15 if scale = 0, and 17 if scale == 1
464
465	ratio indicates the percentage of the whole spectrum to use (so .5
466	means half, and .1 means 10percent)
467
468
469	*******************************************************************************/
470
471	int hypre_ParCSRRelax_Cheby(hypre_ParCSRMatrix A, / matrix to relax with */
472	hypre_ParVector f, / right-hand side */
473	double max_eig,
474	double min_eig,
475	double eig_ratio,
476	int order, /* polynomial order */
477	int scale, /* scale by diagonal?*/
478	int variant,
479	hypre_ParVector u, / initial/updated approximation */
480	hypre_ParVector v / temporary vector */,
481	hypre_ParVector r /another temp vector */ )
482	{
483
484
485	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
486	double *A_diag_data = hypre_CSRMatrixData(A_diag);
487	int *A_diag_i = hypre_CSRMatrixI(A_diag);
488
489	double *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
490	double *f_data = hypre_VectorData(hypre_ParVectorLocalVector(f));
491	double *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));
492
493	double *r_data = hypre_VectorData(hypre_ParVectorLocalVector(r));
494
495	double theta, delta;
496
497	double den;
498	double upper_bound, lower_bound;
499
500	int i, j;
501	int num_rows = hypre_CSRMatrixNumRows(A_diag);
502
503	double coefs[5];
504	double mult;
505	double *orig_u;
506
507	double tmp_d;
508
509	int cheby_order;
510
511	double ds_data, tmp_data;
512	double diag;
513
514	hypre_ParVector *ds;
515	hypre_ParVector *tmp_vec;
516
517	/* u = u + p(A)r */
518
519	if (order > 4)
520	order = 4;
521	if (order < 1)
522	order = 1;
523
524	/* we are using the order of p(A) */
525	cheby_order = order -1;
526
527	/* make sure we are large enough - Adams et al. 2003 */
528	upper_bound = max_eig * 1.1;
529	/* lower_bound = max_eig/eig_ratio; */
530	lower_bound = (upper_bound - min_eig)* eig_ratio + min_eig;
531
532
533	/* theta and delta */
534	theta = (upper_bound + lower_bound)/2;
535	delta = (upper_bound - lower_bound)/2;
536
537	if (variant == 1 )
538	{
539	switch ( cheby_order ) /* these are the corresponding cheby polynomials: u = u_o + s(A)r_0 - so order is
540	one less that resid poly: r(t) = 1 - ts(t) /
541	{
542	case 0:
543	coefs[0] = 1.0/theta;
544
545	break;
546
547	case 1: /* (del - t + 2th)/(th^2 + delth) */
548	den = (thetatheta + deltatheta);
549
550	coefs[0] = (delta + 2*theta)/den;
551	coefs[1] = -1.0/den;
552
553	break;
554
555	case 2: /* (4delth - del^2 - t(2del + 6th) + 2t^2 + 6th^2)/(2delth^2 - del^2th - del^3 + 2th^3)/
556	den = 2deltathetatheta - deltadeltatheta - pow(delta,3) + 2pow(theta,3);
557
558	coefs[0] = (4deltatheta - pow(delta,2) + 6*pow(theta,2))/den;
559	coefs[1] = -(2delta + 6theta)/den;
560	coefs[2] = 2/den;
561
562	break;
563
564	case 3: /* -(6del^2th - 12delth^2 - t^2(4del + 16th) + t(12delth - 3del^2 + 24th^2) + 3del^3 + 4t^3 - 16th^3)/(4delth^3 - 3del^2th^2 - 3del^3th + 4th^4)*/
565	den = - (4deltapow(theta,3) - 3pow(delta,2)pow(theta,2) - 3pow(delta,3)theta + 4*pow(theta,4) );
566
567	coefs[0] = (6pow(delta,2)theta - 12deltapow(theta,2) + 3pow(delta,3) - 16pow(theta,3) )/den;
568	coefs[1] = (12deltatheta - 3pow(delta,2) + 24pow(theta,2))/den;
569	coefs[2] = -( 4delta + 16theta)/den;
570	coefs[3] = 4/den;
571
572	break;
573	}
574	}
575
576	else /* standard chebyshev */
577	{
578
579	switch ( cheby_order ) /* these are the corresponding cheby polynomials: u = u_o + s(A)r_0 - so order is
580	one less thatn resid poly: r(t) = 1 - ts(t) /
581	{
582	case 0:
583	coefs[0] = 1.0/theta;
584	break;
585
586	case 1: /* ( 2t - 4th)/(del^2 - 2th^2) /
587	den = deltadelta - 2theta*theta;
588
589	coefs[0] = -4*theta/den;
590	coefs[1] = 2/den;
591
592	break;
593
594	case 2: /* (3del^2 - 4t^2 + 12tth - 12th^2)/(3del^2th - 4th^3)*/
595	den = 3(deltadelta)theta - 4(thetathetatheta);
596
597	coefs[0] = (3deltadelta - 12 thetatheta)/den;
598	coefs[1] = 12*theta/den;
599	coefs[2] = -4/den;
600
601	break;
602
603	case 3: /(t(8del^2 - 48th^2) - 16del^2th + 32t^2th - 8t^3 + 32th^3)/(del^4 - 8del^2th^2 + 8th^4)/
604	den = pow(delta,4) - 8deltadeltathetatheta + 8*pow(theta,4);
605
606	coefs[0] = (32pow(theta,3)- 16deltadeltatheta)/den;
607	coefs[1] = (8deltadelta - 48thetatheta)/den;
608	coefs[2] = 32*theta/den;
609	coefs[3] = -8/den;
610
611	break;
612	}
613	}
614
615	orig_u = hypre_CTAlloc(double, num_rows);
616
617	if (!scale)
618	{
619	/* get residual: r = f - Au /
620	hypre_ParVectorCopy(f, r);
621	hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, r);
622
623
624
625	for ( i = 0; i < num_rows; i++ )
626	{
627	orig_u[i] = u_data[i];
628	u_data[i] = r_data[i] * coefs[cheby_order];
629	}
630	for (i = cheby_order - 1; i >= 0; i-- )
631	{
632	hypre_ParCSRMatrixMatvec(1.0, A, u, 0.0, v);
633	mult = coefs[i];
634
635	#ifdef HYPRE_USING_OPENMP
636	#pragma omp parallel for private(j) schedule(static)
637	#endif
638
639	for ( j = 0; j < num_rows; j++ )
640	{
641	u_data[j] = mult * r_data[j] + v_data[j];
642	}
643
644	}
645
646	#ifdef HYPRE_USING_OPENMP
647	#pragma omp parallel for private(i) schedule(static)
648	#endif
649	for ( i = 0; i < num_rows; i++ )
650	{
651	u_data[i] = orig_u[i] + u_data[i];
652	}
653
654
655	}
656	else /* scaling! */
657	{
658
659	/grab 1/sqrt(diagonal) /
660
661	ds = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
662	hypre_ParCSRMatrixGlobalNumRows(A),
663	hypre_ParCSRMatrixRowStarts(A));
664	hypre_ParVectorInitialize(ds);
665	hypre_ParVectorSetPartitioningOwner(ds,0);
666	ds_data = hypre_VectorData(hypre_ParVectorLocalVector(ds));
667
668	tmp_vec = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
669	hypre_ParCSRMatrixGlobalNumRows(A),
670	hypre_ParCSRMatrixRowStarts(A));
671	hypre_ParVectorInitialize(tmp_vec);
672	hypre_ParVectorSetPartitioningOwner(tmp_vec,0);
673	tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(tmp_vec));
674
675	/* get ds_data and get scaled residual: r = D^(-1/2)f -
676	* D^(-1/2)Au /
677
678
679	#ifdef HYPRE_USING_OPENMP
680	#pragma omp parallel for private(j,diag) schedule(static)
681	#endif
682	for (j = 0; j < num_rows; j++)
683	{
684	diag = A_diag_data[A_diag_i[j]];
685	ds_data[j] = 1/sqrt(diag);
686
687	r_data[j] = ds_data[j] * f_data[j];
688	}
689
690	hypre_ParCSRMatrixMatvec(-1.0, A, u, 0.0, tmp_vec);
691	#ifdef HYPRE_USING_OPENMP
692	#pragma omp parallel for private(j) schedule(static)
693	#endif
694	for ( j = 0; j < num_rows; j++ )
695	{
696	r_data[j] += ds_data[j] * tmp_data[j];
697	}
698
699	/* save original u, then start
700	the iteration by multiplying r by the cheby coef.*/
701
702	#ifdef HYPRE_USING_OPENMP
703	#pragma omp parallel for private(j) schedule(static)
704	#endif
705	for ( j = 0; j < num_rows; j++ )
706	{
707	orig_u[j] = u_data[j]; /* orig, unscaled u */
708
709	u_data[j] = r_data[j] * coefs[cheby_order];
710	}
711
712	/* now do the other coefficients */
713	for (i = cheby_order - 1; i >= 0; i-- )
714	{
715	/* v = D^(-1/2)AD^(-1/2)u */
716	#ifdef HYPRE_USING_OPENMP
717	#pragma omp parallel for private(j) schedule(static)
718	#endif
719	for ( j = 0; j < num_rows; j++ )
720	{
721	tmp_data[j] = ds_data[j] * u_data[j];
722	}
723	hypre_ParCSRMatrixMatvec(1.0, A, tmp_vec, 0.0, v);
724
725	/* u_new = coefr + v/
726	mult = coefs[i];
727
728	#ifdef HYPRE_USING_OPENMP
729	#pragma omp parallel for private(j,tmp_d) schedule(static)
730	#endif
731	for ( j = 0; j < num_rows; j++ )
732	{
733	tmp_d = ds_data[j]* v_data[j];
734	u_data[j] = mult * r_data[j] + tmp_d;
735	}
736
737	} /* end of cheby_order loop */
738
739
740	/* now we have to scale u_data before adding it to u_orig*/
741
742
743	#ifdef HYPRE_USING_OPENMP
744	#pragma omp parallel for private(j) schedule(static)
745	#endif
746	for ( j = 0; j < num_rows; j++ )
747	{
748	u_data[j] = orig_u[j] + ds_data[j]*u_data[j];
749	}
750
751	hypre_ParVectorDestroy(ds);
752	hypre_ParVectorDestroy(tmp_vec);
753
754
755	}/* end of scaling code */
756
757
758
759	hypre_TFree(orig_u);
760
761
762
763
764	return hypre_error_flag;
765
766
767	}
768
769	/*--------------------------------------------------------------------------
770	* hypre_BoomerAMGRelax_FCFJacobi
771	--------------------------------------------------------------------------/
772
773	int hypre_BoomerAMGRelax_FCFJacobi( hypre_ParCSRMatrix *A,
774	hypre_ParVector *f,
775	int *cf_marker,
776	double relax_weight,
777	hypre_ParVector *u,
778	hypre_ParVector *Vtemp)
779	{
780
781	int i;
782	int relax_points[3];
783	int relax_type = 0;
784
785	hypre_ParVector *Ztemp = NULL;
786
787
788	relax_points[0] = -1; /F /
789	relax_points[1] = 1; /C /
790	relax_points[2] = -1; /F /
791
792	/* if we are on the coarsest level ,the cf_marker will be null
793	and we just do one sweep regular jacobi */
794	if (cf_marker == NULL)
795	{
796	hypre_BoomerAMGRelax(A,
797	f,
798	cf_marker,
799	relax_type,
800	0,
801	relax_weight,
802	0.0,
803	NULL,
804	u,
805	Vtemp, Ztemp);
806	}
807	else
808	{
809	for (i=0; i < 3; i++)
810	hypre_BoomerAMGRelax(A,
811	f,
812	cf_marker,
813	relax_type,
814	relax_points[i],
815	relax_weight,
816	0.0,
817	NULL,
818	u,
819	Vtemp, Ztemp);
820	}
821
822
823	return hypre_error_flag;
824
825	}
826
827	/*--------------------------------------------------------------------------
828	* CG Smoother - if the CG setup is cheap, we can just do it here - for
829	* now we are doing it in the setup, so this function is a
830	* bit unnecessary ...
831	*
832	--------------------------------------------------------------------------/
833
834	int hypre_ParCSRRelax_CG( HYPRE_Solver solver,
835	hypre_ParCSRMatrix *A,
836	hypre_ParVector *f,
837	hypre_ParVector *u,
838	int num_its)
839	{
840	int num_iterations;
841	double final_res_norm;
842
843	HYPRE_PCGSetMaxIter(solver, num_its); /* max iterations */
844
845	HYPRE_ParCSRPCGSolve(solver, (HYPRE_ParCSRMatrix)A, (HYPRE_ParVector)f, (HYPRE_ParVector)u);
846
847	HYPRE_PCGGetNumIterations(solver, &num_iterations);
848	HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
849
850	#if 0
851	{
852	int myid;
853	MPI_Comm_rank(MPI_COMM_WORLD, &myid);
854	if (myid ==0)
855	{
856	printf(" -----CG PCG Iterations = %d\n", num_iterations);
857	printf(" -----CG PCG Final Relative Residual Norm = %e\n", final_res_norm);
858	}
859
860	}
861	#endif
862
863
864
865	return hypre_error_flag;
866
867	}
868
869
870
871
872	/*--------------------------------------------------------------------------
873	* Steepest Descent (Smoother) (Not used)
874	*
875	* We don't check for convergence - just do a fixed number of iterations
876	--------------------------------------------------------------------------/
877
878	int hypre_ParCSRRelax_SD( hypre_ParCSRMatrix A,/ matrix to relax with */
879	hypre_ParVector f, / right-hand side */
880	hypre_ParVector u,/ initial/updated approximation */
881	hypre_ParVector r, / temporary vector */
882	hypre_ParVector p, /another temp vector */
883	int num_its)
884	{
885
886	int i;
887	double alpha, tmp1, tmp2;
888
889
890	/* get residual: r = f - Au /
891	hypre_ParVectorCopy(f, r); /* copy f into r */
892	hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, r);
893
894	for (i = 0; i < num_its; i++)
895	{
896
897	/p = Ar */
898	hypre_ParCSRMatrixMatvec(1.0, A, r, 0.0, p);
899
900	/* alpha = (r,r)/(p,r) */
901	tmp1 = hypre_ParVectorInnerProd( r, r);
902
903	tmp2 = hypre_ParVectorInnerProd( p, r);
904
905	if (tmp2 == 0.0)
906	break;
907
908	alpha = tmp1/tmp2;
909
910	/* u = u + alphar /
911	hypre_ParVectorAxpy( alpha, r, u);
912
913	/* r = r - alpha * p */
914	hypre_ParVectorAxpy( -alpha, p, r);
915
916
917	}
918
919	return hypre_error_flag;
920
921	}
922
923
924
925	/* tql1.f --
926
927	this is the eispack translation - from Barry Smith in Petsc
928
929	Note that this routine always uses real numbers (not complex) even
930	if the underlying matrix is Hermitian. This is because the Lanczos
931	process applied to Hermitian matrices always produces a real,
932	symmetric tridiagonal matrix.
933	*/
934
935	double hypre_LINPACKcgpthy(double,double);
936
937
938	int hypre_LINPACKcgtql1(int n,double d,double e,int ierr)
939	{
940	/* System generated locals */
941	int i__1,i__2;
942	double d__1,d__2,c_b10 = 1.0;
943
944	/* Local variables */
945	double c,f,g,h;
946	int i,j,l,m;
947	double p,r,s,c2,c3 = 0.0;
948	int l1,l2;
949	double s2 = 0.0;
950	int ii;
951	double dl1,el1;
952	int mml;
953	double tst1,tst2;
954
955	/* THIS SUBROUTINE IS A TRANSLATION OF THE ALGOL PROCEDURE TQL1, */
956	/* NUM. MATH. 11, 293-306(1968) BY BOWDLER, MARTIN, REINSCH, AND */
957	/* WILKINSON. */
958	/* HANDBOOK FOR AUTO. COMP., VOL.II-LINEAR ALGEBRA, 227-240(1971). */
959
960	/* THIS SUBROUTINE FINDS THE EIGENVALUES OF A SYMMETRIC */
961	/* TRIDIAGONAL MATRIX BY THE QL METHOD. */
962
963	/* ON INPUT */
964
965	/* N IS THE ORDER OF THE MATRIX. */
966
967	/* D CONTAINS THE DIAGONAL ELEMENTS OF THE INPUT MATRIX. */
968
969	/* E CONTAINS THE SUBDIAGONAL ELEMENTS OF THE INPUT MATRIX */
970	/* IN ITS LAST N-1 POSITIONS. E(1) IS ARBITRARY. */
971
972	/* ON OUTPUT */
973
974	/* D CONTAINS THE EIGENVALUES IN ASCENDING ORDER. IF AN */
975	/* ERROR EXIT IS MADE, THE EIGENVALUES ARE CORRECT AND */
976	/* ORDERED FOR INDICES 1,2,...IERR-1, BUT MAY NOT BE */
977	/* THE SMALLEST EIGENVALUES. */
978
979	/* E HAS BEEN DESTROYED. */
980
981	/* IERR IS SET TO */
982	/* ZERO FOR NORMAL RETURN, */
983	/* J IF THE J-TH EIGENVALUE HAS NOT BEEN */
984	/* DETERMINED AFTER 30 ITERATIONS. */
985
986	/* CALLS CGPTHY FOR DSQRT(AA + BB) . */
987
988	/* QUESTIONS AND COMMENTS SHOULD BE DIRECTED TO BURTON S. GARBOW, */
989	/* MATHEMATICS AND COMPUTER SCIENCE DIV, ARGONNE NATIONAL LABORATORY
990	*/
991
992	/* THIS VERSION DATED AUGUST 1983. */
993
994	/* ------------------------------------------------------------------
995	*/
996	double ds;
997
998	--e;
999	--d;
1000
1001	*ierr = 0;
1002	if (*n == 1) {
1003	goto L1001;
1004	}
1005
1006	i__1 = *n;
1007	for (i = 2; i <= i__1; ++i) {
1008	e[i - 1] = e[i];
1009	}
1010
1011	f = 0.;
1012	tst1 = 0.;
1013	e[*n] = 0.;
1014
1015	i__1 = *n;
1016	for (l = 1; l <= i__1; ++l) {
1017	j = 0;
1018	h = (d__1 = d[l],fabs(d__1)) + (d__2 = e[l],fabs(d__2));
1019	if (tst1 < h) {
1020	tst1 = h;
1021	}
1022	/* .......... LOOK FOR SMALL SUB-DIAGONAL ELEMENT .......... */
1023	i__2 = *n;
1024	for (m = l; m <= i__2; ++m) {
1025	tst2 = tst1 + (d__1 = e[m],fabs(d__1));
1026	if (tst2 == tst1) {
1027	goto L120;
1028	}
1029	/* .......... E(N) IS ALWAYS ZERO,SO THERE IS NO EXIT */
1030	/* THROUGH THE BOTTOM OF THE LOOP .......... */
1031	}
1032	L120:
1033	if (m == l) {
1034	goto L210;
1035	}
1036	L130:
1037	if (j == 30) {
1038	goto L1000;
1039	}
1040	++j;
1041	/* .......... FORM SHIFT .......... */
1042	l1 = l + 1;
1043	l2 = l1 + 1;
1044	g = d[l];
1045	p = (d[l1] - g) / (e[l] * 2.);
1046	r = hypre_LINPACKcgpthy(&p,&c_b10);
1047	ds = 1.0; if (p < 0.0) ds = -1.0;
1048	d[l] = e[l] / (p + ds*r);
1049	d[l1] = e[l] * (p + ds*r);
1050	dl1 = d[l1];
1051	h = g - d[l];
1052	if (l2 > *n) {
1053	goto L145;
1054	}
1055
1056	i__2 = *n;
1057	for (i = l2; i <= i__2; ++i) {
1058	d[i] -= h;
1059	}
1060
1061	L145:
1062	f += h;
1063	/* .......... QL TRANSFORMATION .......... */
1064	p = d[m];
1065	c = 1.;
1066	c2 = c;
1067	el1 = e[l1];
1068	s = 0.;
1069	mml = m - l;
1070	/* .......... FOR I=M-1 STEP -1 UNTIL L DO -- .......... */
1071	i__2 = mml;
1072	for (ii = 1; ii <= i__2; ++ii) {
1073	c3 = c2;
1074	c2 = c;
1075	s2 = s;
1076	i = m - ii;
1077	g = c * e[i];
1078	h = c * p;
1079	r = hypre_LINPACKcgpthy(&p,&e[i]);
1080	e[i + 1] = s * r;
1081	s = e[i] / r;
1082	c = p / r;
1083	p = c * d[i] - s * g;
1084	d[i + 1] = h + s * (c * g + s * d[i]);
1085	}
1086
1087	p = -s * s2 * c3 * el1 * e[l] / dl1;
1088	e[l] = s * p;
1089	d[l] = c * p;
1090	tst2 = tst1 + (d__1 = e[l],fabs(d__1));
1091	if (tst2 > tst1) {
1092	goto L130;
1093	}
1094	L210:
1095	p = d[l] + f;
1096	/* .......... ORDER EIGENVALUES .......... */
1097	if (l == 1) {
1098	goto L250;
1099	}
1100	/* .......... FOR I=L STEP -1 UNTIL 2 DO -- .......... */
1101	i__2 = l;
1102	for (ii = 2; ii <= i__2; ++ii) {
1103	i = l + 2 - ii;
1104	if (p >= d[i - 1]) {
1105	goto L270;
1106	}
1107	d[i] = d[i - 1];
1108	}
1109
1110	L250:
1111	i = 1;
1112	L270:
1113	d[i] = p;
1114	}
1115
1116	goto L1001;
1117	/* .......... SET ERROR -- NO CONVERGENCE TO AN */
1118	/* EIGENVALUE AFTER 30 ITERATIONS .......... */
1119	L1000:
1120	*ierr = l;
1121	L1001:
1122	return 0;
1123
1124	} /* cgtql1_ */
1125
1126
1127	double hypre_LINPACKcgpthy(double a,double b)
1128	{
1129	/* System generated locals */
1130	double ret_val,d__1,d__2,d__3;
1131
1132	/* Local variables */
1133	double p,r,s,t,u;
1134
1135
1136	/* FINDS DSQRT(A2+B2) WITHOUT OVERFLOW OR DESTRUCTIVE UNDERFLOW */
1137
1138
1139	/* Computing MAX */
1140	d__1 = fabs(a),d__2 = fabs(b);
1141	p = hypre_max(d__1,d__2);
1142	if (!p) {
1143	goto L20;
1144	}
1145	/* Computing MIN */
1146	d__2 = fabs(a),d__3 = fabs(b);
1147	/* Computing 2nd power */
1148	d__1 = hypre_min(d__2,d__3) / p;
1149	r = d__1 * d__1;
1150	L10:
1151	t = r + 4.;
1152	if (t == 4.) {
1153	goto L20;
1154	}
1155	s = r / t;
1156	u = s * 2. + 1.;
1157	p = u * p;
1158	/* Computing 2nd power */
1159	d__1 = s / u;
1160	r = d__1 * d__1 * r;
1161	goto L10;
1162	L20:
1163	ret_val = p;
1164
1165	return ret_val;
1166	} /* cgpthy_ */
1167
1168
1169	#if 0
1170
1171	int hypre_ParCSRRelax_Cheby2(hypre_ParCSRMatrix A, / matrix to relax with */
1172	hypre_ParVector f, / right-hand side */
1173	double max_eig, /* u.b = max. e-val est.1.1 /
1174	double eig_ratio, /* l.b = max_eig/eig ratio */
1175	int order, /* polynomial order */
1176	hypre_ParVector u, / initial/updated approximation */
1177	hypre_ParVector v / temporary vector */,
1178	hypre_ParVector v2 /another temp vector */ )
1179	{
1180
1181	/* See Saad "Iterative Methods for Sparse Systems", Alg. 12.1 */
1182	/* r_m = Tm(r_0) - plus we scale residual by SCALE = (1-A/u.b.) */
1183
1184	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1185	double *A_diag_data = hypre_CSRMatrixData(A_diag);
1186	int *A_diag_i = hypre_CSRMatrixI(A_diag);
1187
1188	double *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
1189	double *f_data = hypre_VectorData(hypre_ParVectorLocalVector(f));
1190	double *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));
1191
1192	double *dk = hypre_VectorData(hypre_ParVectorLocalVector(v2));
1193
1194	double theta, delta, sigma;
1195	double p_k, p_kp1, temp1, temp2, diag, scale;
1196	double zero = 0.0;
1197	double upper_bound, lower_bound;
1198
1199	int i, j;
1200	int num_rows = hypre_CSRMatrixNumRows(A_diag);
1201
1202	hypre_ParVector *Ztemp;
1203
1204
1205	Ztemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
1206	hypre_ParCSRMatrixGlobalNumRows(A),
1207	hypre_ParCSRMatrixRowStarts(A));
1208	hypre_ParVectorInitialize(Ztemp);
1209	hypre_ParVectorSetPartitioningOwner(Ztemp,0);
1210
1211	/* make sure we are large enough - Adams et al. 2003 */
1212	upper_bound = max_eig * 1.1;
1213	lower_bound = max_eig/eig_ratio;
1214
1215
1216	/* parameters */
1217	theta = (upper_bound + lower_bound)/2;
1218	delta = (upper_bound - lower_bound)/2;
1219	sigma = theta/delta;
1220
1221	/* set v = f */
1222	hypre_ParVectorCopy(f, v);
1223
1224	/* get residual: v = f-Au /
1225	hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, v);
1226
1227	/* p_0*/
1228	p_k = 1/sigma;
1229
1230	/first order /
1231	temp1 = 1/theta;
1232
1233	/d_0 = 1/theta * SCALEr_0 /
1234	/* x_1 = x_0 + d_0 */
1235
1236	/* NEW PART*/
1237	/* z = Av /
1238	hypre_ParCSRMatrixMatvec(1.0, A, v, 0.0, Ztemp);
1239	/* v = v - Ztemp/u.b. */
1240	scale = -1.0/upper_bound;
1241	hypre_ParVectorAxpy(scale, Ztemp, v);
1242	/* END NEW */
1243
1244	#ifdef HYPRE_USING_OPENMP
1245	#pragma omp parallel for private(i,diag,scale) schedule(static)
1246	#endif
1247	for (i = 0; i < num_rows; i++)
1248	{
1249	diag = 1;
1250	scale = temp1/diag;
1251	dk[i] = scale*v_data[i];
1252	u_data[i] += dk[i];
1253
1254	}
1255
1256	/* higher order */
1257	for (j = 1; j < order; j++)
1258	{
1259	/* get residual: v = f-Au /
1260	hypre_ParVectorCopy(f, v);
1261	hypre_ParCSRMatrixMatvec(-1.0, A, u, 1.0, v);
1262
1263	p_kp1 = 1.0/(2.0*sigma - p_k);
1264	temp1 = p_kp1*p_k;
1265	temp2 = 2.0*p_kp1/delta;
1266
1267	/* NEW PART*/
1268	/* still do jacobi */
1269
1270	/* z = Av /
1271	hypre_ParCSRMatrixMatvec(1.0, A, v, 0.0, Ztemp);
1272	/* v = v - Ztemp/u.b. */
1273	scale = -1.0/upper_bound;
1274	hypre_ParVectorAxpy(scale, Ztemp, v);
1275	/* END NEW */
1276
1277
1278	#ifdef HYPRE_USING_OPENMP
1279	#pragma omp parallel for private(i,diag,scale) schedule(static)
1280	#endif
1281	for (i = 0; i < num_rows; i++)
1282	{
1283	diag = 1;
1284	scale = temp2/diag;
1285	dk[i] = temp1dk[i] + scalev_data[i];
1286	u_data[i] += dk[i];
1287	}
1288	p_k = p_kp1;
1289	}
1290
1291
1292	hypre_ParVectorDestroy(Ztemp);
1293
1294	return hypre_error_flag;
1295
1296
1297	}
1298	#endif
1299
1300	/*------------------------------------------------------------------------
1301
1302	theta = a_ii /sum off_d((a_ij))
1303	we want the min.
1304
1305	--------------------------------------------------------------------------/
1306	int hypre_ParCSRComputeTheta(hypre_ParCSRMatrix *A,
1307	double *theta_est)
1308
1309	{
1310	int i, j;
1311	int num_rows = hypre_ParCSRMatrixNumRows(A);
1312
1313	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1314	int *A_diag_I = hypre_CSRMatrixI(A_diag);
1315	int *A_diag_J = hypre_CSRMatrixJ(A_diag);
1316	double *A_diag_data = hypre_CSRMatrixData(A_diag);
1317
1318	hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1319	int *A_offd_I = hypre_CSRMatrixI(A_offd);
1320	int *A_offd_J = hypre_CSRMatrixJ(A_offd);
1321	double *A_offd_data = hypre_CSRMatrixData(A_offd);
1322	int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1323
1324	double diag, offd_sum;
1325	double theta, ratio;
1326	int min_row = 0;
1327
1328	int my_id;
1329	MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
1330
1331	theta = 1e9;
1332
1333
1334	for (i = 0; i < num_rows; i++)
1335	{
1336
1337	/* get the diag element of the ith row */
1338	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1339	{
1340	if (A_diag_J[j] == i)
1341	{
1342	diag = A_diag_data[j];
1343	/* break; */
1344	}
1345	else
1346	{
1347	if (A_diag_data[j] > 0.0)
1348	{
1349	printf("MYID = %d, row = %d, DIAG_col = %d, val = %g \n", my_id, i, A_diag_J[j], A_diag_data[j]);
1350	}
1351	}
1352
1353	}
1354
1355	/* get the offd part of the ith row */
1356	offd_sum = 0.0;
1357	if (num_cols_offd )
1358	{
1359	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1360	{
1361	offd_sum += fabs(A_offd_data[j]);
1362	if (A_offd_data[j] > 0.0)
1363	{
1364	printf("MYID = %d, row = %d, OFFD_col = %d, val = %g \n", my_id, i, A_offd_J[j], A_offd_data[j]);
1365	}
1366
1367
1368	}
1369
1370	}
1371	if (offd_sum > 0.0)
1372	{
1373	ratio = diag/offd_sum;
1374	theta = hypre_min(theta, ratio);
1375
1376	if (theta == ratio)
1377	min_row = i;
1378	}
1379
1380	}
1381
1382	printf("MYID = %d, Min Row = %d\n",my_id, min_row);
1383
1384
1385
1386	*theta_est = theta;
1387
1388	return hypre_error_flag;
1389
1390
1391	}
1392
1393
1394	/*--------------------------------------------------------------------------
1395	* hypre_ParCSRComputeL1Norms Threads
1396	*
1397	* Compute the l1 norms of the rows of a given matrix, depending on
1398	* the option parameter:
1399	*
1400	* option 1 = Compute the l1 norm of the rows
1401	* option 2 = Compute the l1 norm of the (processor) off-diagonal
1402	* part of the rows plus the diagonal of A
1403	* option 3 = Compute the l2 norm^2 of the rows
1404	* option 4 = Truncated version of option 2 based on Remark 6.2 in "Multigrid
1405	* Smoothers for Ultra-Parallel Computing" (with or without CF)
1406	--------------------------------------------------------------------------/
1407
1408	int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A,
1409	int option,
1410	int *cf_marker,
1411	double **l1_norm_ptr)
1412	{
1413	int i, j;
1414	int num_rows = hypre_ParCSRMatrixNumRows(A);
1415
1416	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1417	int *A_diag_I = hypre_CSRMatrixI(A_diag);
1418	int *A_diag_J = hypre_CSRMatrixJ(A_diag);
1419	double *A_diag_data = hypre_CSRMatrixData(A_diag);
1420
1421	hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1422	int *A_offd_I = hypre_CSRMatrixI(A_offd);
1423	int *A_offd_J = hypre_CSRMatrixJ(A_offd);
1424	double *A_offd_data = hypre_CSRMatrixData(A_offd);
1425	int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1426
1427	double *l1_norm = hypre_CTAlloc(double, num_rows);
1428
1429	int *cf_marker_offd = NULL;
1430
1431	int cf_diag;
1432	double diag;
1433
1434
1435	if (cf_marker != NULL)
1436	{
1437	/*-------------------------------------------------------------------
1438	* Get the CF_marker data for the off-processor columns of A
1439	-------------------------------------------------------------------/
1440	int index;
1441	int num_sends;
1442	int start;
1443	int *int_buf_data = NULL;
1444
1445	hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1446	hypre_ParCSRCommHandle *comm_handle;
1447
1448	if (num_cols_offd)
1449	cf_marker_offd = hypre_CTAlloc(int, num_cols_offd);
1450
1451	num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1452
1453	if (hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends))
1454	int_buf_data = hypre_CTAlloc(int,
1455	hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1456	index = 0;
1457
1458	for (i = 0; i < num_sends; i++)
1459	{
1460	start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1461	for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1462	{
1463	int_buf_data[index++] = cf_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1464	}
1465
1466	}
1467	comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
1468	cf_marker_offd);
1469
1470	hypre_ParCSRCommHandleDestroy(comm_handle);
1471
1472	hypre_TFree(int_buf_data);
1473	}
1474
1475	if (option == 1)
1476	{
1477	for (i = 0; i < num_rows; i++)
1478	{
1479	l1_norm[i] = 0.0;
1480	if (cf_marker == NULL)
1481	{
1482	/* Add the l1 norm of the diag part of the ith row */
1483	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1484	l1_norm[i] += fabs(A_diag_data[j]);
1485	/* Add the l1 norm of the offd part of the ith row */
1486	if (num_cols_offd)
1487	{
1488	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1489	l1_norm[i] += fabs(A_offd_data[j]);
1490	}
1491	}
1492	else
1493	{
1494	cf_diag = cf_marker[i];
1495	/* Add the CF l1 norm of the diag part of the ith row */
1496	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1497	if (cf_diag == cf_marker[A_diag_J[j]])
1498	l1_norm[i] += fabs(A_diag_data[j]);
1499	/* Add the CF l1 norm of the offd part of the ith row */
1500	if (num_cols_offd)
1501	{
1502	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1503	if (cf_diag == cf_marker_offd[A_offd_J[j]])
1504	l1_norm[i] += fabs(A_offd_data[j]);
1505	}
1506	}
1507	}
1508	}
1509	else if (option == 2)
1510	{
1511	for (i = 0; i < num_rows; i++)
1512	{
1513	/* Add the diag element of the ith row */
1514	l1_norm[i] = fabs(A_diag_data[A_diag_I[i]]);
1515	if (cf_marker == NULL)
1516	{
1517	/* Add the l1 norm of the offd part of the ith row */
1518	if (num_cols_offd)
1519	{
1520	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1521	l1_norm[i] += fabs(A_offd_data[j]);
1522	}
1523	}
1524	else
1525	{
1526	cf_diag = cf_marker[i];
1527	/* Add the CF l1 norm of the offd part of the ith row */
1528	if (num_cols_offd)
1529	{
1530	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1531	if (cf_diag == cf_marker_offd[A_offd_J[j]])
1532	l1_norm[i] += fabs(A_offd_data[j]);
1533	}
1534	}
1535	}
1536	}
1537	else if (option == 3)
1538	{
1539	for (i = 0; i < num_rows; i++)
1540	{
1541	l1_norm[i] = 0.0;
1542	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1543	l1_norm[i] += A_diag_data[j] * A_diag_data[j];
1544	if (num_cols_offd)
1545	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1546	l1_norm[i] += A_offd_data[j] * A_offd_data[j];
1547	}
1548	}
1549	else if (option == 4)
1550	{
1551	for (i = 0; i < num_rows; i++)
1552	{
1553	/* Add the diag element of the ith row */
1554	diag = l1_norm[i] = fabs(A_diag_data[A_diag_I[i]]);
1555	if (cf_marker == NULL)
1556	{
1557	/* Add the scaled l1 norm of the offd part of the ith row */
1558	if (num_cols_offd)
1559	{
1560	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1561	l1_norm[i] += 0.5*fabs(A_offd_data[j]);
1562	}
1563	}
1564	else
1565	{
1566	cf_diag = cf_marker[i];
1567	/* Add the scaled CF l1 norm of the offd part of the ith row */
1568	if (num_cols_offd)
1569	{
1570	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1571	if (cf_diag == cf_marker_offd[A_offd_J[j]])
1572	l1_norm[i] += 0.5*fabs(A_offd_data[j]);
1573	}
1574	}
1575
1576	/* Truncate according to Remark 6.2 */
1577	if (l1_norm[i] <= 4.0/3.0*diag)
1578	l1_norm[i] = diag;
1579	}
1580	}
1581
1582	/* Handle negative definite matrices */
1583	for (i = 0; i < num_rows; i++)
1584	if (A_diag_data[A_diag_I[i]] < 0)
1585	l1_norm[i] = -l1_norm[i];
1586
1587	for (i = 0; i < num_rows; i++)
1588	if (fabs(l1_norm[i]) < DBL_EPSILON)
1589	{
1590	hypre_error_in_arg(1);
1591	break;
1592	}
1593
1594	hypre_TFree(cf_marker_offd);
1595
1596	*l1_norm_ptr = l1_norm;
1597
1598	return hypre_error_flag;
1599	}
1600
1601	/*--------------------------------------------------------------------------
1602	* hypre_ParCSRComputeL1Norms Threads
1603	*
1604	* Compute the l1 norms of the rows of a given matrix, depending on
1605	* the option parameter:
1606	*
1607	* option 1 = Compute the l1 norm of the rows
1608	* option 2 = Compute the l1 norm of the (processor) off-diagonal
1609	* part of the rows plus the diagonal of A
1610	* option 3 = Compute the l2 norm^2 of the rows
1611	* option 4 = Truncated version of option 2 based on Remark 6.2 in "Multigrid
1612	* Smoothers for Ultra-Parallel Computing" (with or without CF)
1613	--------------------------------------------------------------------------/
1614
1615	int hypre_ParCSRComputeL1NormsThreads(hypre_ParCSRMatrix *A,
1616	int option,
1617	int num_threads,
1618	int *cf_marker,
1619	double **l1_norm_ptr)
1620	{
1621	int i, j, k;
1622	int num_rows = hypre_ParCSRMatrixNumRows(A);
1623
1624	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1625	int *A_diag_I = hypre_CSRMatrixI(A_diag);
1626	int *A_diag_J = hypre_CSRMatrixJ(A_diag);
1627	double *A_diag_data = hypre_CSRMatrixData(A_diag);
1628
1629	hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1630	int *A_offd_I = hypre_CSRMatrixI(A_offd);
1631	int *A_offd_J = hypre_CSRMatrixJ(A_offd);
1632	double *A_offd_data = hypre_CSRMatrixData(A_offd);
1633	int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1634
1635	double *l1_norm = hypre_CTAlloc(double, num_rows);
1636	int ii, ns, ne, rest, size;
1637	double res;
1638
1639	int *cf_marker_offd = NULL;
1640
1641	int cf_diag;
1642	double diag;
1643
1644
1645	if (cf_marker != NULL)
1646	{
1647	/*-------------------------------------------------------------------
1648	* Get the CF_marker data for the off-processor columns of A
1649	-------------------------------------------------------------------/
1650	int index;
1651	int num_sends;
1652	int start;
1653	int *int_buf_data = NULL;
1654
1655	hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1656	hypre_ParCSRCommHandle *comm_handle;
1657
1658	if (num_cols_offd)
1659	cf_marker_offd = hypre_CTAlloc(int, num_cols_offd);
1660
1661	num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1662
1663	if (hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends))
1664	int_buf_data = hypre_CTAlloc(int,
1665	hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1666	index = 0;
1667
1668	for (i = 0; i < num_sends; i++)
1669	{
1670	start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1671	for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
1672	{
1673	int_buf_data[index++] = cf_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1674	}
1675
1676	}
1677	comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
1678	cf_marker_offd);
1679
1680	hypre_ParCSRCommHandleDestroy(comm_handle);
1681
1682	hypre_TFree(int_buf_data);
1683	}
1684
1685
1686	#define HYPRE_SMP_PRIVATE i,ii,j,k,ns,ne,res,rest,size,cf_diag,diag
1687	#include "../utilities/hypre_smp_forloop.h"
1688	for (k = 0; k < num_threads; k++)
1689	{
1690	size = num_rows/num_threads;
1691	rest = num_rows - size*num_threads;
1692	if (k < rest)
1693	{
1694	ns = k*size+k;
1695	ne = (k+1)*size+k+1;
1696	}
1697	else
1698	{
1699	ns = k*size+rest;
1700	ne = (k+1)*size+rest;
1701	}
1702	if (option == 1)
1703	{
1704	for (i = ns; i < ne; i++)
1705	{
1706	l1_norm[i] = 0.0;
1707	if (cf_marker == NULL)
1708	{
1709	/* Add the l1 norm of the diag part of the ith row */
1710	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1711	l1_norm[i] += fabs(A_diag_data[j]);
1712	/* Add the l1 norm of the offd part of the ith row */
1713	if (num_cols_offd)
1714	{
1715	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1716	l1_norm[i] += fabs(A_offd_data[j]);
1717	}
1718	}
1719	else
1720	{
1721	cf_diag = cf_marker[i];
1722	/* Add the CF l1 norm of the diag part of the ith row */
1723	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1724	if (cf_diag == cf_marker[A_diag_J[j]])
1725	l1_norm[i] += fabs(A_diag_data[j]);
1726	/* Add the CF l1 norm of the offd part of the ith row */
1727	if (num_cols_offd)
1728	{
1729	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1730	if (cf_diag == cf_marker_offd[A_offd_J[j]])
1731	l1_norm[i] += fabs(A_offd_data[j]);
1732	}
1733	}
1734	}
1735	}
1736	else if (option == 2)
1737	{
1738	for (i = ns; i < ne; i++)
1739	{
1740	l1_norm[i] = 0.0;
1741	if (cf_marker == NULL)
1742	{
1743	/* Add the diagonal and the local off-thread part of the ith row */
1744	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1745	{
1746	ii = A_diag_J[j];
1747	if (ii == i \|\| ii < ns \|\| ii >= ne)
1748	l1_norm[i] += fabs(A_diag_data[j]);
1749	}
1750	/* Add the l1 norm of the offd part of the ith row */
1751	if (num_cols_offd)
1752	{
1753	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1754	l1_norm[i] += fabs(A_offd_data[j]);
1755	}
1756	}
1757	else
1758	{
1759	cf_diag = cf_marker[i];
1760	/* Add the diagonal and the local off-thread part of the ith row */
1761	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1762	{
1763	ii = A_diag_J[j];
1764	if ((ii == i \|\| ii < ns \|\| ii >= ne) &&
1765	(cf_diag == cf_marker[A_diag_J[j]]))
1766	l1_norm[i] += fabs(A_diag_data[j]);
1767	}
1768	/* Add the CF l1 norm of the offd part of the ith row */
1769	if (num_cols_offd)
1770	{
1771	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1772	if (cf_diag == cf_marker_offd[A_offd_J[j]])
1773	l1_norm[i] += fabs(A_offd_data[j]);
1774	}
1775	}
1776	}
1777	}
1778	else if (option == 3)
1779	{
1780	for (i = ns; i < ne; i++)
1781	{
1782	l1_norm[i] = 0.0;
1783	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1784	l1_norm[i] += A_diag_data[j] * A_diag_data[j];
1785	if (num_cols_offd)
1786	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1787	l1_norm[i] += A_offd_data[j] * A_offd_data[j];
1788	}
1789	}
1790	else if (option == 4)
1791	{
1792	for (i = ns; i < ne; i++)
1793	{
1794	l1_norm[i] = 0.0;
1795	if (cf_marker == NULL)
1796	{
1797	/* Add the diagonal and the local off-thread part of the ith row */
1798	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1799	{
1800	ii = A_diag_J[j];
1801	if (ii == i \|\| ii < ns \|\| ii >= ne)
1802	{
1803	if (ii == i)
1804	{
1805	diag = fabs(A_diag_data[j]);
1806	l1_norm[i] += fabs(A_diag_data[j]);
1807	}
1808	else
1809	l1_norm[i] += 0.5*fabs(A_diag_data[j]);
1810	}
1811	}
1812	/* Add the l1 norm of the offd part of the ith row */
1813	if (num_cols_offd)
1814	{
1815	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1816	l1_norm[i] += 0.5*fabs(A_offd_data[j]);
1817	}
1818	}
1819	else
1820	{
1821	cf_diag = cf_marker[i];
1822	/* Add the diagonal and the local off-thread part of the ith row */
1823	for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
1824	{
1825	ii = A_diag_J[j];
1826	if ((ii == i \|\| ii < ns \|\| ii >= ne) &&
1827	(cf_diag == cf_marker[A_diag_J[j]]))
1828	{
1829	if (ii == i)
1830	{
1831	diag = fabs(A_diag_data[j]);
1832	l1_norm[i] += fabs(A_diag_data[j]);
1833	}
1834	else
1835	l1_norm[i] += 0.5*fabs(A_diag_data[j]);
1836	}
1837	}
1838	/* Add the CF l1 norm of the offd part of the ith row */
1839	if (num_cols_offd)
1840	{
1841	for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
1842	if (cf_diag == cf_marker_offd[A_offd_J[j]])
1843	l1_norm[i] += 0.5*fabs(A_offd_data[j]);
1844	}
1845	}
1846
1847	/* Truncate according to Remark 6.2 */
1848	if (l1_norm[i] <= 4.0/3.0*diag)
1849	l1_norm[i] = diag;
1850	}
1851	}
1852
1853	/* Handle negative definite matrices */
1854	for (i = ns; i < ne; i++)
1855	if (A_diag_data[A_diag_I[i]] < 0)
1856	l1_norm[i] = -l1_norm[i];
1857
1858	for (i = ns; i < ne; i++)
1859	if (fabs(l1_norm[i]) < DBL_EPSILON)
1860	{
1861	hypre_error_in_arg(1);
1862	break;
1863	}
1864	}
1865
1866	hypre_TFree(cf_marker_offd);
1867
1868	*l1_norm_ptr = l1_norm;
1869
1870	return hypre_error_flag;
1871	}
1872
1873
1874	/*--------------------------------------------------------------------------
1875	* hypre_ParCSRRelax_L1 (Symm GS / SSOR)
1876	--------------------------------------------------------------------------/
1877
1878	int hypre_ParCSRRelax_L1( hypre_ParCSRMatrix *A,
1879	hypre_ParVector *f,
1880	double relax_weight,
1881	double omega,
1882	double *l1_norms,
1883	hypre_ParVector *u,
1884	hypre_ParVector *Vtemp,
1885	hypre_ParVector *Ztemp)
1886	{
1887	MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1888	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
1889	double *A_diag_data = hypre_CSRMatrixData(A_diag);
1890	int *A_diag_i = hypre_CSRMatrixI(A_diag);
1891	int *A_diag_j = hypre_CSRMatrixJ(A_diag);
1892	hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
1893	int *A_offd_i = hypre_CSRMatrixI(A_offd);
1894	double *A_offd_data = hypre_CSRMatrixData(A_offd);
1895	int *A_offd_j = hypre_CSRMatrixJ(A_offd);
1896	hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
1897	hypre_ParCSRCommHandle *comm_handle;
1898
1899	int n = hypre_CSRMatrixNumRows(A_diag);
1900	int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
1901
1902	hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
1903	double *u_data = hypre_VectorData(u_local);
1904
1905	hypre_Vector *f_local = hypre_ParVectorLocalVector(f);
1906	double *f_data = hypre_VectorData(f_local);
1907
1908	hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
1909	double *Vtemp_data = hypre_VectorData(Vtemp_local);
1910	double *Vext_data;
1911	double *v_buf_data;
1912	double *tmp_data;
1913
1914	int i, j;
1915	int ii, jj;
1916	int ns, ne, size, rest;
1917	int relax_error = 0;
1918	int num_sends;
1919	int index, start;
1920	int num_procs, num_threads, my_id ;
1921
1922	double zero = 0.0;
1923	double res, res2;
1924
1925	hypre_Vector *Ztemp_local;
1926	double *Ztemp_data;
1927
1928
1929	MPI_Comm_size(comm,&num_procs);
1930	MPI_Comm_rank(comm,&my_id);
1931	num_threads = hypre_NumThreads();
1932	/*-----------------------------------------------------------------
1933	* Copy current approximation into temporary vector.
1934	-----------------------------------------------------------------/
1935	if (num_procs > 1)
1936	{
1937	num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
1938
1939	v_buf_data = hypre_CTAlloc(double,
1940	hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
1941
1942	Vext_data = hypre_CTAlloc(double,num_cols_offd);
1943
1944	if (num_cols_offd)
1945	{
1946	A_offd_j = hypre_CSRMatrixJ(A_offd);
1947	A_offd_data = hypre_CSRMatrixData(A_offd);
1948	}
1949
1950	index = 0;
1951	for (i = 0; i < num_sends; i++)
1952	{
1953	start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
1954	for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
1955	v_buf_data[index++]
1956	= u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
1957	}
1958
1959	comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
1960	Vext_data);
1961
1962	/*-----------------------------------------------------------------
1963	* Copy current approximation into temporary vector.
1964	-----------------------------------------------------------------/
1965	hypre_ParCSRCommHandleDestroy(comm_handle);
1966	comm_handle = NULL;
1967	}
1968
1969	/*-----------------------------------------------------------------
1970	* Relax all points.
1971	-----------------------------------------------------------------/
1972
1973	Ztemp_local = hypre_ParVectorLocalVector(Ztemp);
1974	Ztemp_data = hypre_VectorData(Ztemp_local);
1975
1976
1977	if (relax_weight == 1 && omega == 1)
1978	{
1979	/tmp_data = hypre_CTAlloc(double,n);/
1980	tmp_data = Ztemp_data;
1981	#define HYPRE_SMP_PRIVATE i
1982	#include "../utilities/hypre_smp_forloop.h"
1983	for (i = 0; i < n; i++)
1984	tmp_data[i] = u_data[i];
1985	#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
1986	#include "../utilities/hypre_smp_forloop.h"
1987	for (j = 0; j < num_threads; j++)
1988	{
1989	size = n/num_threads;
1990	rest = n - size*num_threads;
1991	if (j < rest)
1992	{
1993	ns = j*size+j;
1994	ne = (j+1)*size+j+1;
1995	}
1996	else
1997	{
1998	ns = j*size+rest;
1999	ne = (j+1)*size+rest;
2000	}
2001	for (i = ns; i < ne; i++) /* interior points first */
2002	{
2003
2004	/*-----------------------------------------------------------
2005	* If diagonal is nonzero, relax point i; otherwise, skip it.
2006	-----------------------------------------------------------/
2007
2008	if ( A_diag_data[A_diag_i[i]] != zero)
2009	{
2010	res = f_data[i];
2011	for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2012	{
2013	ii = A_diag_j[jj];
2014	if (ii >= ns && ii < ne)
2015	{
2016	res -= A_diag_data[jj] * u_data[ii];
2017	}
2018	else
2019	res -= A_diag_data[jj] * tmp_data[ii];
2020	}
2021	for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2022	{
2023	ii = A_offd_j[jj];
2024	res -= A_offd_data[jj] * Vext_data[ii];
2025	}
2026	u_data[i] += res / l1_norms[i];
2027	}
2028	}
2029	for (i = ne-1; i > ns-1; i--) /* interior points first */
2030	{
2031
2032	/*-----------------------------------------------------------
2033	* If diagonal is nonzero, relax point i; otherwise, skip it.
2034	-----------------------------------------------------------/
2035
2036	if ( A_diag_data[A_diag_i[i]] != zero)
2037	{
2038	res = f_data[i];
2039	for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2040	{
2041	ii = A_diag_j[jj];
2042	if (ii >= ns && ii < ne)
2043	{
2044	res -= A_diag_data[jj] * u_data[ii];
2045	}
2046	else
2047	res -= A_diag_data[jj] * tmp_data[ii];
2048	}
2049	for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2050	{
2051	ii = A_offd_j[jj];
2052	res -= A_offd_data[jj] * Vext_data[ii];
2053	}
2054	u_data[i] += res / l1_norms[i];
2055	}
2056	}
2057	}
2058	}
2059	else
2060	{
2061	double c1 = omega*relax_weight;
2062	double c2 = omega*(1.0-relax_weight);
2063	/* tmp_data = hypre_CTAlloc(double,n); */
2064	tmp_data = Ztemp_data;
2065	#define HYPRE_SMP_PRIVATE i
2066	#include "../utilities/hypre_smp_forloop.h"
2067	for (i = 0; i < n; i++)
2068	{
2069	tmp_data[i] = u_data[i];
2070	}
2071	#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2072	#include "../utilities/hypre_smp_forloop.h"
2073	for (j = 0; j < num_threads; j++)
2074	{
2075	size = n/num_threads;
2076	rest = n - size*num_threads;
2077	if (j < rest)
2078	{
2079	ns = j*size+j;
2080	ne = (j+1)*size+j+1;
2081	}
2082	else
2083	{
2084	ns = j*size+rest;
2085	ne = (j+1)*size+rest;
2086	}
2087	for (i = ns; i < ne; i++) /* interior points first */
2088	{
2089
2090	/*-----------------------------------------------------------
2091	* If diagonal is nonzero, relax point i; otherwise, skip it.
2092	-----------------------------------------------------------/
2093
2094	if ( A_diag_data[A_diag_i[i]] != zero)
2095	{
2096	res2 = 0.0;
2097	res = f_data[i];
2098	Vtemp_data[i] = u_data[i];
2099	for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2100	{
2101	ii = A_diag_j[jj];
2102	if (ii >= ns && ii < ne)
2103	{
2104	res -= A_diag_data[jj] * u_data[ii];
2105	if (ii < i)
2106	res2 += A_diag_data[jj] * (Vtemp_data[ii] - u_data[ii]);
2107	}
2108	else
2109	res -= A_diag_data[jj] * tmp_data[ii];
2110	}
2111	for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2112	{
2113	ii = A_offd_j[jj];
2114	res -= A_offd_data[jj] * Vext_data[ii];
2115	}
2116	u_data[i] += (c1res + c2res2) / l1_norms[i];
2117	}
2118	}
2119	for (i = ne-1; i > ns-1; i--) /* interior points first */
2120	{
2121
2122	/*-----------------------------------------------------------
2123	* If diagonal is nonzero, relax point i; otherwise, skip it.
2124	-----------------------------------------------------------/
2125
2126	if ( A_diag_data[A_diag_i[i]] != zero)
2127	{
2128	res2 = 0.0;
2129	res = f_data[i];
2130	for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2131	{
2132	ii = A_diag_j[jj];
2133	if (ii >= ns && ii < ne)
2134	{
2135	res -= A_diag_data[jj] * u_data[ii];
2136	if (ii > i)
2137	res2 += A_diag_data[jj] * (Vtemp_data[ii] - u_data[ii]);
2138	}
2139	else
2140	res -= A_diag_data[jj] * tmp_data[ii];
2141	}
2142	for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2143	{
2144	ii = A_offd_j[jj];
2145	res -= A_offd_data[jj] * Vext_data[ii];
2146	}
2147	u_data[i] += (c1res + c2res2) / l1_norms[i];
2148	}
2149	}
2150	}
2151	}
2152	if (num_procs > 1)
2153	{
2154	hypre_TFree(Vext_data);
2155	hypre_TFree(v_buf_data);
2156	}
2157
2158	return(relax_error);
2159	}
2160
2161	/*--------------------------------------------------------------------------
2162	* hypre_ParCSRRelax_L1_GS (GS / SOR) (NOT SYM)
2163	--------------------------------------------------------------------------/
2164
2165	int hypre_ParCSRRelax_L1_GS( hypre_ParCSRMatrix *A,
2166	hypre_ParVector *f,
2167	double relax_weight,
2168	double omega,
2169	double *l1_norms,
2170	hypre_ParVector *u,
2171	hypre_ParVector *Vtemp,
2172	hypre_ParVector *Ztemp)
2173	{
2174	MPI_Comm comm = hypre_ParCSRMatrixComm(A);
2175	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2176	double *A_diag_data = hypre_CSRMatrixData(A_diag);
2177	int *A_diag_i = hypre_CSRMatrixI(A_diag);
2178	int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2179	hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2180	int *A_offd_i = hypre_CSRMatrixI(A_offd);
2181	double *A_offd_data = hypre_CSRMatrixData(A_offd);
2182	int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2183	hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2184	hypre_ParCSRCommHandle *comm_handle;
2185
2186	int n = hypre_CSRMatrixNumRows(A_diag);
2187	int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
2188
2189	hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
2190	double *u_data = hypre_VectorData(u_local);
2191
2192	hypre_Vector *f_local = hypre_ParVectorLocalVector(f);
2193	double *f_data = hypre_VectorData(f_local);
2194
2195	hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
2196	double *Vtemp_data = hypre_VectorData(Vtemp_local);
2197	double *Vext_data;
2198	double *v_buf_data;
2199	double *tmp_data;
2200
2201	int i, j;
2202	int ii, jj;
2203	int ns, ne, size, rest;
2204	int relax_error = 0;
2205	int num_sends;
2206	int index, start;
2207	int num_procs, num_threads, my_id ;
2208
2209	double zero = 0.0;
2210	double res, res2;
2211
2212	hypre_Vector *Ztemp_local;
2213	double *Ztemp_data;
2214
2215
2216	MPI_Comm_size(comm,&num_procs);
2217	MPI_Comm_rank(comm,&my_id);
2218	num_threads = hypre_NumThreads();
2219	/*-----------------------------------------------------------------
2220	* Copy current approximation into temporary vector.
2221	-----------------------------------------------------------------/
2222	if (num_procs > 1)
2223	{
2224	num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2225
2226	v_buf_data = hypre_CTAlloc(double,
2227	hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
2228
2229	Vext_data = hypre_CTAlloc(double,num_cols_offd);
2230
2231	if (num_cols_offd)
2232	{
2233	A_offd_j = hypre_CSRMatrixJ(A_offd);
2234	A_offd_data = hypre_CSRMatrixData(A_offd);
2235	}
2236
2237	index = 0;
2238	for (i = 0; i < num_sends; i++)
2239	{
2240	start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2241	for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
2242	v_buf_data[index++]
2243	= u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2244	}
2245
2246	comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
2247	Vext_data);
2248
2249	/*-----------------------------------------------------------------
2250	* Copy current approximation into temporary vector.
2251	-----------------------------------------------------------------/
2252	hypre_ParCSRCommHandleDestroy(comm_handle);
2253	comm_handle = NULL;
2254	}
2255
2256	/*-----------------------------------------------------------------
2257	* Relax all points.
2258	-----------------------------------------------------------------/
2259
2260	Ztemp_local = hypre_ParVectorLocalVector(Ztemp);
2261	Ztemp_data = hypre_VectorData(Ztemp_local);
2262
2263
2264	if (relax_weight == 1 && omega == 1)
2265	{
2266	/tmp_data = hypre_CTAlloc(double,n);/
2267	tmp_data = Ztemp_data;
2268	#define HYPRE_SMP_PRIVATE i
2269	#include "../utilities/hypre_smp_forloop.h"
2270	for (i = 0; i < n; i++)
2271	tmp_data[i] = u_data[i];
2272	#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2273	#include "../utilities/hypre_smp_forloop.h"
2274	for (j = 0; j < num_threads; j++)
2275	{
2276	size = n/num_threads;
2277	rest = n - size*num_threads;
2278	if (j < rest)
2279	{
2280	ns = j*size+j;
2281	ne = (j+1)*size+j+1;
2282	}
2283	else
2284	{
2285	ns = j*size+rest;
2286	ne = (j+1)*size+rest;
2287	}
2288	for (i = ns; i < ne; i++) /* interior points first */
2289	{
2290
2291	/*-----------------------------------------------------------
2292	* If diagonal is nonzero, relax point i; otherwise, skip it.
2293	-----------------------------------------------------------/
2294
2295	if ( A_diag_data[A_diag_i[i]] != zero)
2296	{
2297	res = f_data[i];
2298	for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2299	{
2300	ii = A_diag_j[jj];
2301	if (ii >= ns && ii < ne)
2302	{
2303	res -= A_diag_data[jj] * u_data[ii];
2304	}
2305	else
2306	res -= A_diag_data[jj] * tmp_data[ii];
2307	}
2308	for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2309	{
2310	ii = A_offd_j[jj];
2311	res -= A_offd_data[jj] * Vext_data[ii];
2312	}
2313	u_data[i] += res / l1_norms[i];
2314	}
2315	}
2316	}
2317	}
2318	else
2319	{
2320	double c1 = omega*relax_weight;
2321	double c2 = omega*(1.0-relax_weight);
2322	/* tmp_data = hypre_CTAlloc(double,n); */
2323	tmp_data = Ztemp_data;
2324	#define HYPRE_SMP_PRIVATE i
2325	#include "../utilities/hypre_smp_forloop.h"
2326	for (i = 0; i < n; i++)
2327	{
2328	tmp_data[i] = u_data[i];
2329	}
2330	#define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size
2331	#include "../utilities/hypre_smp_forloop.h"
2332	for (j = 0; j < num_threads; j++)
2333	{
2334	size = n/num_threads;
2335	rest = n - size*num_threads;
2336	if (j < rest)
2337	{
2338	ns = j*size+j;
2339	ne = (j+1)*size+j+1;
2340	}
2341	else
2342	{
2343	ns = j*size+rest;
2344	ne = (j+1)*size+rest;
2345	}
2346	for (i = ns; i < ne; i++) /* interior points first */
2347	{
2348
2349	/*-----------------------------------------------------------
2350	* If diagonal is nonzero, relax point i; otherwise, skip it.
2351	-----------------------------------------------------------/
2352
2353	if ( A_diag_data[A_diag_i[i]] != zero)
2354	{
2355	res2 = 0.0;
2356	res = f_data[i];
2357	Vtemp_data[i] = u_data[i];
2358	for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2359	{
2360	ii = A_diag_j[jj];
2361	if (ii >= ns && ii < ne)
2362	{
2363	res -= A_diag_data[jj] * u_data[ii];
2364	if (ii < i)
2365	res2 += A_diag_data[jj] * (Vtemp_data[ii] - u_data[ii]);
2366	}
2367	else
2368	res -= A_diag_data[jj] * tmp_data[ii];
2369	}
2370	for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2371	{
2372	ii = A_offd_j[jj];
2373	res -= A_offd_data[jj] * Vext_data[ii];
2374	}
2375	u_data[i] += (c1res + c2res2) / l1_norms[i];
2376	}
2377	}
2378	}
2379	}
2380	if (num_procs > 1)
2381	{
2382	hypre_TFree(Vext_data);
2383	hypre_TFree(v_buf_data);
2384	}
2385
2386	return(relax_error);
2387	}
2388
2389	/*--------------------------------------------------------------------------
2390	* hypre_ParCSRRelax_L1_Jacobi (allows CF)
2391
2392
2393	u += w D^{-1}(f - A u), where D_ii = \|\|A(i,:)\|\|_1
2394	--------------------------------------------------------------------------/
2395
2396	int hypre_ParCSRRelax_L1_Jacobi( hypre_ParCSRMatrix *A,
2397	hypre_ParVector *f,
2398	int *cf_marker,
2399	int relax_points,
2400	double relax_weight,
2401	double *l1_norms,
2402	hypre_ParVector *u,
2403	hypre_ParVector *Vtemp )
2404
2405	{
2406
2407
2408	MPI_Comm comm = hypre_ParCSRMatrixComm(A);
2409	hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
2410	double *A_diag_data = hypre_CSRMatrixData(A_diag);
2411	int *A_diag_i = hypre_CSRMatrixI(A_diag);
2412	int *A_diag_j = hypre_CSRMatrixJ(A_diag);
2413	hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
2414	int *A_offd_i = hypre_CSRMatrixI(A_offd);
2415	double *A_offd_data = hypre_CSRMatrixData(A_offd);
2416	int *A_offd_j = hypre_CSRMatrixJ(A_offd);
2417	hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
2418	hypre_ParCSRCommHandle *comm_handle;
2419
2420	int n = hypre_CSRMatrixNumRows(A_diag);
2421	int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
2422
2423	hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
2424	double *u_data = hypre_VectorData(u_local);
2425
2426	hypre_Vector *f_local = hypre_ParVectorLocalVector(f);
2427	double *f_data = hypre_VectorData(f_local);
2428
2429	hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
2430	double *Vtemp_data = hypre_VectorData(Vtemp_local);
2431	double *Vext_data;
2432	double *v_buf_data;
2433
2434	int i, j;
2435	int ii, jj;
2436	int num_sends;
2437	int index, start;
2438	int num_procs, my_id ;
2439
2440	double zero = 0.0;
2441	double res;
2442
2443
2444	MPI_Comm_size(comm,&num_procs);
2445	MPI_Comm_rank(comm,&my_id);
2446
2447	if (num_procs > 1)
2448	{
2449	num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
2450
2451	v_buf_data = hypre_CTAlloc(double,
2452	hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
2453
2454	Vext_data = hypre_CTAlloc(double,num_cols_offd);
2455
2456	if (num_cols_offd)
2457	{
2458	A_offd_j = hypre_CSRMatrixJ(A_offd);
2459	A_offd_data = hypre_CSRMatrixData(A_offd);
2460	}
2461
2462	index = 0;
2463	for (i = 0; i < num_sends; i++)
2464	{
2465	start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
2466	for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
2467	v_buf_data[index++]
2468	= u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
2469	}
2470
2471	comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data,
2472	Vext_data);
2473	}
2474
2475	/*-----------------------------------------------------------------
2476	* Copy current approximation into temporary vector.
2477	-----------------------------------------------------------------/
2478
2479	#define HYPRE_SMP_PRIVATE i
2480	#include "../utilities/hypre_smp_forloop.h"
2481	for (i = 0; i < n; i++)
2482	{
2483	Vtemp_data[i] = u_data[i];
2484	}
2485
2486	if (num_procs > 1)
2487	{
2488	hypre_ParCSRCommHandleDestroy(comm_handle);
2489	comm_handle = NULL;
2490	}
2491
2492	/*-----------------------------------------------------------------
2493	* Relax all points.
2494	-----------------------------------------------------------------/
2495
2496	if (relax_points == 0 \|\| cf_marker == NULL)
2497	{
2498	#define HYPRE_SMP_PRIVATE i,ii,jj,res
2499	#include "../utilities/hypre_smp_forloop.h"
2500	for (i = 0; i < n; i++)
2501	{
2502
2503	/*-----------------------------------------------------------
2504	* If diagonal is nonzero, relax point i; otherwise, skip it.
2505	-----------------------------------------------------------/
2506	if (A_diag_data[A_diag_i[i]] != zero)
2507	{
2508	res = f_data[i];
2509	for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2510	{
2511	ii = A_diag_j[jj];
2512	res -= A_diag_data[jj] * Vtemp_data[ii];
2513	}
2514	for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2515	{
2516	ii = A_offd_j[jj];
2517	res -= A_offd_data[jj] * Vext_data[ii];
2518	}
2519	u_data[i] += (relax_weight*res)/l1_norms[i];
2520	}
2521	}
2522	}
2523
2524	/*-----------------------------------------------------------------
2525	* Relax only C or F points as determined by relax_points.
2526	-----------------------------------------------------------------/
2527	else
2528	{
2529	#define HYPRE_SMP_PRIVATE i,ii,jj,res
2530	#include "../utilities/hypre_smp_forloop.h"
2531	for (i = 0; i < n; i++)
2532	{
2533
2534	/*-----------------------------------------------------------
2535	* If i is of the right type ( C or F ) and diagonal is
2536	* nonzero, relax point i; otherwise, skip it.
2537	-----------------------------------------------------------/
2538
2539	if (cf_marker[i] == relax_points
2540	&& A_diag_data[A_diag_i[i]] != zero)
2541	{
2542	res = f_data[i];
2543	for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
2544	{
2545	ii = A_diag_j[jj];
2546	res -= A_diag_data[jj] * Vtemp_data[ii];
2547	}
2548	for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
2549	{
2550	ii = A_offd_j[jj];
2551	res -= A_offd_data[jj] * Vext_data[ii];
2552	}
2553	u_data[i] += (relax_weight * res)/l1_norms[i];
2554	}
2555	}
2556	}
2557	if (num_procs > 1)
2558	{
2559	hypre_TFree(Vext_data);
2560	hypre_TFree(v_buf_data);
2561	}
2562
2563	return 0;
2564
2565	}
2566
2567

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/mpi-omp/AMG2013/parcsr_ls/par_relax_more.c

Download in other formats: