/*BHEADER********************************************************************** * Copyright (c) 2008, Lawrence Livermore National Security, LLC. * Produced at the Lawrence Livermore National Laboratory. * This file is part of HYPRE. See file COPYRIGHT for details. * * HYPRE is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License (as published by the Free * Software Foundation) version 2.1 dated February 1999. * * $Revision: 2.4 $ ***********************************************************************EHEADER*/ /****************************************************************************** * * Relaxation scheme * *****************************************************************************/ #include "headers.h" /*-------------------------------------------------------------------------- * hypre_BoomerAMGRelax *--------------------------------------------------------------------------*/ int hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A, hypre_ParVector *f, int *cf_marker, int relax_type, int relax_points, double relax_weight, double omega, double *l1_norms, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); int *A_diag_i = hypre_CSRMatrixI(A_diag); int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); int *A_offd_j = hypre_CSRMatrixJ(A_offd); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(A); int n = hypre_CSRMatrixNumRows(A_diag); int num_cols_offd = hypre_CSRMatrixNumCols(A_offd); HYPRE_BigInt first_index = hypre_ParVectorFirstIndex(u); hypre_Vector *u_local = hypre_ParVectorLocalVector(u); double *u_data = hypre_VectorData(u_local); hypre_Vector *f_local = hypre_ParVectorLocalVector(f); double *f_data = hypre_VectorData(f_local); hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp); double *Vtemp_data = hypre_VectorData(Vtemp_local); double *Vext_data; double *v_buf_data; double *tmp_data; hypre_Vector *Ztemp_local; double *Ztemp_data; hypre_CSRMatrix *A_CSR; int *A_CSR_i; int *A_CSR_j; double *A_CSR_data; hypre_Vector *f_vector; double *f_vector_data; int i, j, jr; int ii, jj, i1; int ns, ne, size, rest; int column; int relax_error = 0; int num_sends; int num_recvs; int index, start; int num_procs, num_threads, my_id, ip, p; int vec_start, vec_len; int n_coarse; int n_start, n_end; MPI_Status *status; MPI_Request *requests; double *A_mat; double *b_vec; double zero = 0.0; double res, res0, res2; double one_minus_weight; double one_minus_omega; double prod; one_minus_weight = 1.0 - relax_weight; one_minus_omega = 1.0 - omega; MPI_Comm_size(comm,&num_procs); MPI_Comm_rank(comm,&my_id); num_threads = hypre_NumThreads(); n_start = 0; n_end = n; if (relax_points < -1) { n_start = -relax_points; relax_points = 0; } if (relax_points > 1) { n_end = relax_points; relax_points = 0; } /*----------------------------------------------------------------------- * Switch statement to direct control based on relax_type: * relax_type = 0 -> Jacobi or CF-Jacobi * relax_type = 1 -> Gauss-Seidel <--- very slow, sequential * relax_type = 2 -> Gauss_Seidel: interior points in parallel , * boundary sequential * relax_type = 3 -> hybrid: SOR-J mix off-processor, SOR on-processor * with outer relaxation parameters (forward solve) * relax_type = 4 -> hybrid: SOR-J mix off-processor, SOR on-processor * with outer relaxation parameters (backward solve) * relax_type = 5 -> hybrid: GS-J mix off-processor, chaotic GS on-node * relax_type = 6 -> hybrid: SSOR-J mix off-processor, SSOR on-processor * with outer relaxation parameters * relax_type = 7 -> Jacobi (uses Matvec), only needed in CGNR * relax_type = 9 -> Direct Solve * relax_type = 20 -> L1 hybrid GS (new one) *-----------------------------------------------------------------------*/ switch (relax_type) { case 0: /* Weighted Jacobi */ { if (num_procs > 1) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); v_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); Vext_data = hypre_CTAlloc(double,num_cols_offd); if (num_cols_offd) { A_offd_j = hypre_CSRMatrixJ(A_offd); A_offd_data = hypre_CSRMatrixData(A_offd); } index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) v_buf_data[index++] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data, Vext_data); } /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; } if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; } /*----------------------------------------------------------------- * Relax all points. *-----------------------------------------------------------------*/ if (relax_points == 0) { #define HYPRE_SMP_PRIVATE i,ii,jj,res #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= one_minus_weight; u_data[i] += relax_weight * res / A_diag_data[A_diag_i[i]]; } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { #define HYPRE_SMP_PRIVATE i,ii,jj,res #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= one_minus_weight; u_data[i] += relax_weight * res / A_diag_data[A_diag_i[i]]; } } } if (num_procs > 1) { hypre_TFree(Vext_data); hypre_TFree(v_buf_data); } } break; case 20: /* hybrid L1 Symm. Gauss-Seidel */ { if (num_threads > 1) { Ztemp_local = hypre_ParVectorLocalVector(Ztemp); Ztemp_data = hypre_VectorData(Ztemp_local); } /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ if (num_procs > 1) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); v_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); Vext_data = hypre_CTAlloc(double,num_cols_offd); if (num_cols_offd) { A_offd_j = hypre_CSRMatrixJ(A_offd); A_offd_data = hypre_CSRMatrixData(A_offd); } index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++) v_buf_data[index++] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data, Vext_data); /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; } /*----------------------------------------------------------------- * Relax all points. *-----------------------------------------------------------------*/ if (relax_weight == 1 && omega == 1) { if (relax_points == 0) { if (num_threads > 1) { tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( l1_norms[i] != zero) { res = f_data[i]; for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] += res / l1_norms[i]; } } for (i = ne-1; i > ns-1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( l1_norms[i] != zero) { res = f_data[i]; for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] += res / l1_norms[i]; } } } } else { for (i = 0; i < n; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( l1_norms[i] != zero) { res = f_data[i]; for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] += res / l1_norms[i]; } } for (i = n-1; i > -1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( l1_norms[i] != zero) { res = f_data[i]; for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] += res / l1_norms[i]; } } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { if (num_threads > 1) { tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && l1_norms[i] != zero) { res = f_data[i]; for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] += res / l1_norms[i]; } } for (i = ne-1; i > ns-1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && l1_norms[i] != zero) { res = f_data[i]; for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] += res / l1_norms[i]; } } } } else { for (i = 0; i < n; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && l1_norms[i] != zero) { res = f_data[i]; for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] += res / l1_norms[i]; } } for (i = n-1; i > -1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && l1_norms[i] != zero) { res = f_data[i]; for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] += res / l1_norms[i]; } } } } } else { #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; } prod = (1.0-relax_weight*omega); if (relax_points == 0) { if (num_threads > 1) { tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( l1_norms[i] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / l1_norms[i]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / l1_norms[i];*/ } } for (i = ne-1; i > ns-1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( l1_norms[i] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / l1_norms[i]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / l1_norms[i];*/ } } } } else { for (i = 0; i < n; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( l1_norms[i] != zero) { res0 = 0.0; res = f_data[i]; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / l1_norms[i]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / l1_norms[i];*/ } } for (i = n-1; i > -1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( l1_norms[i] != zero) { res0 = 0.0; res = f_data[i]; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / l1_norms[i]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / l1_norms[i];*/ } } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { if (num_threads > 1) { tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && l1_norms[i] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res2 += A_diag_data[jj] * Vtemp_data[ii]; res0 -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / l1_norms[i]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / l1_norms[i];*/ } } for (i = ne-1; i > ns-1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && l1_norms[i] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res2 += A_diag_data[jj] * Vtemp_data[ii]; res0 -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / l1_norms[i]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / l1_norms[i];*/ } } } } else { for (i = 0; i < n; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && l1_norms[i] != zero) { res = f_data[i]; res0 = 0.0; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / l1_norms[i]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / l1_norms[i];*/ } } for (i = n-1; i > -1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && l1_norms[i] != zero) { res = f_data[i]; res0 = 0.0; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / l1_norms[i]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / l1_norms[i];*/ } } } } } if (num_procs > 1) { hypre_TFree(Vext_data); hypre_TFree(v_buf_data); } } break; case 5: /* Hybrid: Jacobi off-processor, chaotic Gauss-Seidel on-processor */ { if (num_procs > 1) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); v_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); Vext_data = hypre_CTAlloc(double,num_cols_offd); if (num_cols_offd) { A_offd_j = hypre_CSRMatrixJ(A_offd); A_offd_data = hypre_CSRMatrixData(A_offd); } index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++) v_buf_data[index++] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data, Vext_data); /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; } /*----------------------------------------------------------------- * Relax all points. *-----------------------------------------------------------------*/ if (relax_points == 0) { #define HYPRE_SMP_PRIVATE i,ii,jj,res #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else if (relax_points > 1) { n_coarse = relax_points; #define HYPRE_SMP_PRIVATE i,ii,jj,i1,res #include "../utilities/hypre_smp_forloop.h" for (i1 = 0; i1 < n_coarse; i1++) { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ i = cf_marker[i1]; if (A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= one_minus_weight; u_data[i] += relax_weight * res / A_diag_data[A_diag_i[i]]; } } } else { #define HYPRE_SMP_PRIVATE i,ii,jj,res #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } if (num_procs > 1) { hypre_TFree(Vext_data); hypre_TFree(v_buf_data); } } break; case 3: /* Hybrid: Jacobi off-processor, Gauss-Seidel on-processor (forward loop) */ { Ztemp_local = hypre_ParVectorLocalVector(Ztemp); Ztemp_data = hypre_VectorData(Ztemp_local); if (num_procs > 1) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); v_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); Vext_data = hypre_CTAlloc(double,num_cols_offd); if (num_cols_offd) { A_offd_j = hypre_CSRMatrixJ(A_offd); A_offd_data = hypre_CSRMatrixData(A_offd); } index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++) v_buf_data[index++] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data, Vext_data); /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; } /*----------------------------------------------------------------- * Relax all points. *-----------------------------------------------------------------*/ if (relax_weight == 1 && omega == 1) { if (relax_points == 0) { if (num_threads > 1) { /* tmp_data = hypre_CTAlloc(double,n); */ tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) res -= A_diag_data[jj] * u_data[ii]; else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } /* hypre_TFree(tmp_data);*/ } else { for (i = 0; i < n; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { if (num_threads > 1) { /* tmp_data = hypre_CTAlloc(double,n); */ tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) res -= A_diag_data[jj] * u_data[ii]; else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } /* hypre_TFree(tmp_data); */ } else { for (i = 0; i < n; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } } } else { #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; } prod = (1.0-relax_weight*omega); if (relax_points == 0) { if (num_threads > 1) { /* tmp_data = hypre_CTAlloc(double,n); */ tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; res0 = 0.0; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } /* hypre_TFree(tmp_data); */ } else { for (i = 0; i < n; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { if (num_threads > 1) { /*tmp_data = hypre_CTAlloc(double,n);*/ tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } /* hypre_TFree(tmp_data); */ } else { for (i = 0; i < n; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; res0 = 0.0; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } } } if (num_procs > 1) { hypre_TFree(Vext_data); hypre_TFree(v_buf_data); } } break; case 1: /* Gauss-Seidel VERY SLOW */ { if (num_procs > 1) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); v_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); Vext_data = hypre_CTAlloc(double,num_cols_offd); status = hypre_CTAlloc(MPI_Status,num_recvs+num_sends); requests= hypre_CTAlloc(MPI_Request, num_recvs+num_sends); if (num_cols_offd) { A_offd_j = hypre_CSRMatrixJ(A_offd); A_offd_data = hypre_CSRMatrixData(A_offd); } /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ /* for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; } */ } /*----------------------------------------------------------------- * Relax all points. *-----------------------------------------------------------------*/ for (p = 0; p < num_procs; p++) { jr = 0; if (p != my_id) { for (i = 0; i < num_sends; i++) { ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); if (ip == p) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start; for (j=vec_start; j < vec_start+vec_len; j++) v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; MPI_Isend(&v_buf_data[vec_start], vec_len, MPI_DOUBLE, ip, 0, comm, &requests[jr++]); } } MPI_Waitall(jr,requests,status); MPI_Barrier(comm); } else { if (num_procs > 1) { for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; MPI_Irecv(&Vext_data[vec_start], vec_len, MPI_DOUBLE, ip, 0, comm, &requests[jr++]); } MPI_Waitall(jr,requests,status); } if (relax_points == 0) { for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } if (num_procs > 1) MPI_Barrier(comm); } } if (num_procs > 1) { hypre_TFree(Vext_data); hypre_TFree(v_buf_data); hypre_TFree(status); hypre_TFree(requests); } } break; case 2: /* Gauss-Seidel: relax interior points in parallel, boundary sequentially */ { if (num_procs > 1) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); v_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); Vext_data = hypre_CTAlloc(double,num_cols_offd); status = hypre_CTAlloc(MPI_Status,num_recvs+num_sends); requests= hypre_CTAlloc(MPI_Request, num_recvs+num_sends); if (num_cols_offd) { A_offd_j = hypre_CSRMatrixJ(A_offd); A_offd_data = hypre_CSRMatrixData(A_offd); } } /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ /* for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; } */ /*----------------------------------------------------------------- * Relax interior points first *-----------------------------------------------------------------*/ if (relax_points == 0) { for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ((A_offd_i[i+1]-A_offd_i[i]) == zero && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } else { for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && (A_offd_i[i+1]-A_offd_i[i]) == zero && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } for (p = 0; p < num_procs; p++) { jr = 0; if (p != my_id) { for (i = 0; i < num_sends; i++) { ip = hypre_ParCSRCommPkgSendProc(comm_pkg, i); if (ip == p) { vec_start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1)-vec_start; for (j=vec_start; j < vec_start+vec_len; j++) v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; MPI_Isend(&v_buf_data[vec_start], vec_len, MPI_DOUBLE, ip, 0, comm, &requests[jr++]); } } MPI_Waitall(jr,requests,status); MPI_Barrier(comm); } else { if (num_procs > 1) { for (i = 0; i < num_recvs; i++) { ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg,i+1)-vec_start; MPI_Irecv(&Vext_data[vec_start], vec_len, MPI_DOUBLE, ip, 0, comm, &requests[jr++]); } MPI_Waitall(jr,requests,status); } if (relax_points == 0) { for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ((A_offd_i[i+1]-A_offd_i[i]) != zero && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && (A_offd_i[i+1]-A_offd_i[i]) != zero && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } if (num_procs > 1) MPI_Barrier(comm); } } if (num_procs > 1) { hypre_TFree(Vext_data); hypre_TFree(v_buf_data); hypre_TFree(status); hypre_TFree(requests); } } break; case 4: /* Hybrid: Jacobi off-processor, Gauss-Seidel/SOR on-processor (backward loop) */ { if (num_procs > 1) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); v_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); Vext_data = hypre_CTAlloc(double,num_cols_offd); if (num_cols_offd) { A_offd_j = hypre_CSRMatrixJ(A_offd); A_offd_data = hypre_CSRMatrixData(A_offd); } index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++) v_buf_data[index++] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data, Vext_data); /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; } /*----------------------------------------------------------------- * Relax all points. *-----------------------------------------------------------------*/ if (relax_weight == 1 && omega == 1) { if (relax_points == 0) { if (num_threads > 1) { tmp_data = hypre_CTAlloc(double,n); #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ne-1; i > ns-1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) res -= A_diag_data[jj] * u_data[ii]; else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } hypre_TFree(tmp_data); } else { for (i = n-1; i > -1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { if (num_threads > 1) { tmp_data = hypre_CTAlloc(double,n); #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ne-1; i > ns-1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) res -= A_diag_data[jj] * u_data[ii]; else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } hypre_TFree(tmp_data); } else { for (i = n-1; i > -1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } } } else { #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; } prod = (1.0-relax_weight*omega); if (relax_points == 0) { if (num_threads > 1) { tmp_data = hypre_CTAlloc(double,n); #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ne-1; i > ns-1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; res0 = 0.0; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } hypre_TFree(tmp_data); } else { for (i = n-1; i > -1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { if (num_threads > 1) { tmp_data = hypre_CTAlloc(double,n); #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ne-1; i > ns-1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } hypre_TFree(tmp_data); } else { for (i = n-1; i > -1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; res0 = 0.0; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } } } if (num_procs > 1) { hypre_TFree(Vext_data); hypre_TFree(v_buf_data); } } break; case 6: /* Hybrid: Jacobi off-processor, Symm. Gauss-Seidel/ SSOR on-processor with outer relaxation parameter */ { Ztemp_local = hypre_ParVectorLocalVector(Ztemp); Ztemp_data = hypre_VectorData(Ztemp_local); /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ if (num_procs > 1) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); v_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); Vext_data = hypre_CTAlloc(double,num_cols_offd); if (num_cols_offd) { A_offd_j = hypre_CSRMatrixJ(A_offd); A_offd_data = hypre_CSRMatrixData(A_offd); } index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++) v_buf_data[index++] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg, v_buf_data, Vext_data); /*----------------------------------------------------------------- * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; } /*----------------------------------------------------------------- * Relax all points. *-----------------------------------------------------------------*/ if (relax_weight == 1 && omega == 1) { if (relax_points == 0) { if (num_threads > 1) { /* tmp_data = hypre_CTAlloc(double,n); */ tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } for (i = ne-1; i > ns-1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } /* hypre_TFree(tmp_data); */ } else { for (i = 0; i < n; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } for (i = n-1; i > -1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { if (num_threads > 1) { /* tmp_data = hypre_CTAlloc(double,n); */ tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } for (i = ne-1; i > ns-1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } /* hypre_TFree(tmp_data);*/ } else { for (i = 0; i < n; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } for (i = n-1; i > -1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res -= A_diag_data[jj] * u_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] = res / A_diag_data[A_diag_i[i]]; } } } } } else { #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; } prod = (1.0-relax_weight*omega); if (relax_points == 0) { if (num_threads > 1) { /* tmp_data = hypre_CTAlloc(double,n); */ tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } for (i = ne-1; i > ns-1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } /* hypre_TFree(tmp_data); */ } else { for (i = 0; i < n; i++) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res = f_data[i]; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } for (i = n-1; i > -1; i--) /* interior points first */ { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if ( A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res = f_data[i]; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } } /*----------------------------------------------------------------- * Relax only C or F points as determined by relax_points. *-----------------------------------------------------------------*/ else { if (num_threads > 1) { /* tmp_data = hypre_CTAlloc(double,n); */ tmp_data = Ztemp_data; #define HYPRE_SMP_PRIVATE i #include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) tmp_data[i] = u_data[i]; #define HYPRE_SMP_PRIVATE i,ii,j,jj,ns,ne,res,rest,size #include "../utilities/hypre_smp_forloop.h" for (j = 0; j < num_threads; j++) { size = n/num_threads; rest = n - size*num_threads; if (j < rest) { ns = j*size+j; ne = (j+1)*size+j+1; } else { ns = j*size+rest; ne = (j+1)*size+rest; } for (i = ns; i < ne; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res2 += A_diag_data[jj] * Vtemp_data[ii]; res0 -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } for (i = ne-1; i > ns-1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res0 = 0.0; res2 = 0.0; res = f_data[i]; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; if (ii >= ns && ii < ne) { res2 += A_diag_data[jj] * Vtemp_data[ii]; res0 -= A_diag_data[jj] * u_data[ii]; } else res -= A_diag_data[jj] * tmp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } /* hypre_TFree(tmp_data); */ } else { for (i = 0; i < n; i++) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; res0 = 0.0; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } for (i = n-1; i > -1; i--) /* relax interior points */ { /*----------------------------------------------------------- * If i is of the right type ( C or F ) and diagonal is * nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (cf_marker[i] == relax_points && A_diag_data[A_diag_i[i]] != zero) { res = f_data[i]; res0 = 0.0; res2 = 0.0; for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) { ii = A_diag_j[jj]; res0 -= A_diag_data[jj] * u_data[ii]; res2 += A_diag_data[jj] * Vtemp_data[ii]; } for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) { ii = A_offd_j[jj]; res -= A_offd_data[jj] * Vext_data[ii]; } u_data[i] *= prod; u_data[i] += relax_weight*(omega*res + res0 + one_minus_omega*res2) / A_diag_data[A_diag_i[i]]; /*u_data[i] += omega*(relax_weight*res + res0 + one_minus_weight*res2) / A_diag_data[A_diag_i[i]];*/ } } } } } if (num_procs > 1) { hypre_TFree(Vext_data); hypre_TFree(v_buf_data); } } break; case 7: /* Jacobi (uses ParMatvec) */ { /*----------------------------------------------------------------- * Copy f into temporary vector. *-----------------------------------------------------------------*/ hypre_ParVectorCopy(f,Vtemp); /*----------------------------------------------------------------- * Perform Matvec Vtemp=f-Au *-----------------------------------------------------------------*/ hypre_ParCSRMatrixMatvec(-1.0,A, u, 1.0, Vtemp); for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (A_diag_data[A_diag_i[i]] != zero) { u_data[i] += relax_weight * Vtemp_data[i] / A_diag_data[A_diag_i[i]]; } } } break; case 9: /* Direct solve: use gaussian elimination */ { int n_global = (int)global_num_rows; /*----------------------------------------------------------------- * Generate CSR matrix from ParCSRMatrix A *-----------------------------------------------------------------*/ #ifdef HYPRE_NO_GLOBAL_PARTITION /* all processors are needed for these routines */ A_CSR = hypre_ParCSRMatrixToCSRMatrixAll(A); f_vector = hypre_ParVectorToVectorAll(f); if (n) { #else if (n) { A_CSR = hypre_ParCSRMatrixToCSRMatrixAll(A); f_vector = hypre_ParVectorToVectorAll(f); #endif A_CSR_i = hypre_CSRMatrixI(A_CSR); A_CSR_j = hypre_CSRMatrixJ(A_CSR); A_CSR_data = hypre_CSRMatrixData(A_CSR); f_vector_data = hypre_VectorData(f_vector); A_mat = hypre_CTAlloc(double, n_global*n_global); b_vec = hypre_CTAlloc(double, n_global); /*--------------------------------------------------------------- * Load CSR matrix into A_mat. *---------------------------------------------------------------*/ for (i = 0; i < n_global; i++) { for (jj = A_CSR_i[i]; jj < A_CSR_i[i+1]; jj++) { column = A_CSR_j[jj]; A_mat[i*n_global+column] = A_CSR_data[jj]; } b_vec[i] = f_vector_data[i]; } relax_error = gselim(A_mat,b_vec,n_global); for (i = 0; i < n; i++) { u_data[i] = b_vec[first_index+i]; } hypre_TFree(A_mat); hypre_TFree(b_vec); hypre_CSRMatrixDestroy(A_CSR); A_CSR = NULL; hypre_SeqVectorDestroy(f_vector); f_vector = NULL; } #ifdef HYPRE_NO_GLOBAL_PARTITION else { hypre_CSRMatrixDestroy(A_CSR); A_CSR = NULL; hypre_SeqVectorDestroy(f_vector); f_vector = NULL; } #endif } break; } return(relax_error); } /*------------------------------------------------------------------------- * * Gaussian Elimination * *------------------------------------------------------------------------ */ int gselim(A,x,n) double *A; double *x; int n; { int err_flag = 0; int j,k,m; double factor; if (n==1) /* A is 1x1 */ { if (A[0] != 0.0) { x[0] = x[0]/A[0]; return(err_flag); } else { err_flag = 1; return(err_flag); } } else /* A is nxn. Forward elimination */ { for (k = 0; k < n-1; k++) { if (A[k*n+k] != 0.0) { for (j = k+1; j < n; j++) { if (A[j*n+k] != 0.0) { factor = A[j*n+k]/A[k*n+k]; for (m = k+1; m < n; m++) { A[j*n+m] -= factor * A[k*n+m]; } /* Elimination step for rhs */ x[j] -= factor * x[k]; } } } } /* Back Substitution */ for (k = n-1; k > 0; --k) { x[k] /= A[k*n+k]; for (j = 0; j < k; j++) { if (A[j*n+k] != 0.0) { x[j] -= x[k] * A[j*n+k]; } } } x[0] /= A[0]; return(err_flag); } }