| 1 | /*BHEADER**********************************************************************
|
|---|
| 2 | * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
|
|---|
| 3 | * Produced at the Lawrence Livermore National Laboratory.
|
|---|
| 4 | * This file is part of HYPRE. See file COPYRIGHT for details.
|
|---|
| 5 | *
|
|---|
| 6 | * HYPRE is free software; you can redistribute it and/or modify it under the
|
|---|
| 7 | * terms of the GNU Lesser General Public License (as published by the Free
|
|---|
| 8 | * Software Foundation) version 2.1 dated February 1999.
|
|---|
| 9 | *
|
|---|
| 10 | * $Revision: 2.4 $
|
|---|
| 11 | ***********************************************************************EHEADER*/
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 | #include "headers.h"
|
|---|
| 16 |
|
|---|
| 17 | /*--------------------------------------------------------------------------
|
|---|
| 18 | * hypre_ParAMGBuildMultipass
|
|---|
| 19 | * This routine implements Stuben's direct interpolation with multiple passes.
|
|---|
| 20 | *--------------------------------------------------------------------------*/
|
|---|
| 21 |
|
|---|
| 22 | int
|
|---|
| 23 | hypre_BoomerAMGBuildMultipass( hypre_ParCSRMatrix *A,
|
|---|
| 24 | int *CF_marker,
|
|---|
| 25 | hypre_ParCSRMatrix *S,
|
|---|
| 26 | HYPRE_BigInt *num_cpts_global,
|
|---|
| 27 | int num_functions,
|
|---|
| 28 | int *dof_func,
|
|---|
| 29 | int debug_flag,
|
|---|
| 30 | double trunc_factor,
|
|---|
| 31 | int P_max_elmts,
|
|---|
| 32 | int weight_option,
|
|---|
| 33 | int *col_offd_S_to_A,
|
|---|
| 34 | hypre_ParCSRMatrix **P_ptr )
|
|---|
| 35 | {
|
|---|
| 36 | MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
|---|
| 37 | hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(S);
|
|---|
| 38 | hypre_ParCSRCommHandle *comm_handle;
|
|---|
| 39 | hypre_ParCSRCommPkg *tmp_comm_pkg;
|
|---|
| 40 |
|
|---|
| 41 | hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
|---|
| 42 | double *A_diag_data = hypre_CSRMatrixData(A_diag);
|
|---|
| 43 | int *A_diag_i = hypre_CSRMatrixI(A_diag);
|
|---|
| 44 | int *A_diag_j = hypre_CSRMatrixJ(A_diag);
|
|---|
| 45 |
|
|---|
| 46 | hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
|
|---|
| 47 | double *A_offd_data = NULL;
|
|---|
| 48 | int *A_offd_i = hypre_CSRMatrixI(A_offd);
|
|---|
| 49 | int *A_offd_j = NULL;
|
|---|
| 50 | HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A);
|
|---|
| 51 | int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
|
|---|
| 52 |
|
|---|
| 53 | hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
|
|---|
| 54 | int *S_diag_i = hypre_CSRMatrixI(S_diag);
|
|---|
| 55 | int *S_diag_j = hypre_CSRMatrixJ(S_diag);
|
|---|
| 56 |
|
|---|
| 57 | hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
|
|---|
| 58 | int *S_offd_i = hypre_CSRMatrixI(S_offd);
|
|---|
| 59 | int *S_offd_j = NULL;
|
|---|
| 60 | HYPRE_BigInt *col_map_offd_S = hypre_ParCSRMatrixColMapOffd(S);
|
|---|
| 61 | int num_cols_offd_S = hypre_CSRMatrixNumCols(S_offd);
|
|---|
| 62 | HYPRE_BigInt *col_map_offd = NULL;
|
|---|
| 63 | int num_cols_offd;
|
|---|
| 64 |
|
|---|
| 65 | hypre_ParCSRMatrix *P;
|
|---|
| 66 | hypre_CSRMatrix *P_diag;
|
|---|
| 67 | double *P_diag_data;
|
|---|
| 68 | int *P_diag_i; /*at first counter of nonzero cols for each row,
|
|---|
| 69 | finally will be pointer to start of row */
|
|---|
| 70 | int *P_diag_j;
|
|---|
| 71 |
|
|---|
| 72 | hypre_CSRMatrix *P_offd;
|
|---|
| 73 | double *P_offd_data = NULL;
|
|---|
| 74 | int *P_offd_i; /*at first counter of nonzero cols for each row,
|
|---|
| 75 | finally will be pointer to start of row */
|
|---|
| 76 | int *P_offd_j = NULL;
|
|---|
| 77 |
|
|---|
| 78 | int num_sends = 0;
|
|---|
| 79 | int *int_buf_data = NULL;
|
|---|
| 80 | HYPRE_BigInt *big_buf_data = NULL;
|
|---|
| 81 | int *send_map_start;
|
|---|
| 82 | int *send_map_elmt;
|
|---|
| 83 | int *send_procs;
|
|---|
| 84 | int num_recvs = 0;
|
|---|
| 85 | int *recv_vec_start;
|
|---|
| 86 | int *recv_procs;
|
|---|
| 87 | int *new_recv_vec_start = NULL;
|
|---|
| 88 | int **Pext_send_map_start = NULL;
|
|---|
| 89 | int **Pext_recv_vec_start = NULL;
|
|---|
| 90 | int *Pext_start = NULL;
|
|---|
| 91 | int *P_ncols = NULL;
|
|---|
| 92 |
|
|---|
| 93 | int *CF_marker_offd = NULL;
|
|---|
| 94 | int *dof_func_offd = NULL;
|
|---|
| 95 | int *P_marker;
|
|---|
| 96 | int *P_marker_offd = NULL;
|
|---|
| 97 | int *C_array;
|
|---|
| 98 | int *C_array_offd = NULL;
|
|---|
| 99 | int *pass_array = NULL; /* contains points ordered according to pass */
|
|---|
| 100 | int *pass_pointer = NULL; /* pass_pointer[j] contains pointer to first
|
|---|
| 101 | point of pass j contained in pass_array */
|
|---|
| 102 | int *P_diag_start;
|
|---|
| 103 | int *P_offd_start = NULL;
|
|---|
| 104 | int **P_diag_pass;
|
|---|
| 105 | int **P_offd_pass = NULL;
|
|---|
| 106 | int **Pext_pass = NULL;
|
|---|
| 107 | HYPRE_BigInt *big_temp_pass = NULL;
|
|---|
| 108 | HYPRE_BigInt **new_elmts = NULL; /* new neighbors generated in each pass */
|
|---|
| 109 | int *new_counter = NULL; /* contains no. of new neighbors for
|
|---|
| 110 | each pass */
|
|---|
| 111 | int *loc = NULL; /* contains locations for new neighbor
|
|---|
| 112 | connections in int_o_buffer to avoid searching */
|
|---|
| 113 | int *Pext_i = NULL; /*contains P_diag_i and P_offd_i info for nonzero
|
|---|
| 114 | cols of off proc neighbors */
|
|---|
| 115 | HYPRE_BigInt *Pext_send_buffer = NULL; /* used to collect global nonzero
|
|---|
| 116 | col ids in P_diag for send_map_elmts */
|
|---|
| 117 |
|
|---|
| 118 | int *map_S_to_new = NULL;
|
|---|
| 119 | /*int *map_A_to_new = NULL;*/
|
|---|
| 120 | int *map_A_to_S = NULL;
|
|---|
| 121 | HYPRE_BigInt *new_col_map_offd = NULL;
|
|---|
| 122 | HYPRE_BigInt *col_map_offd_P = NULL;
|
|---|
| 123 | HYPRE_BigInt *big_permute = NULL;
|
|---|
| 124 | int *permute = NULL;
|
|---|
| 125 |
|
|---|
| 126 | int cnt;
|
|---|
| 127 | int cnt_nz;
|
|---|
| 128 | int total_nz;
|
|---|
| 129 | int pass;
|
|---|
| 130 | int num_passes;
|
|---|
| 131 | int max_num_passes = 10;
|
|---|
| 132 |
|
|---|
| 133 | int n_fine;
|
|---|
| 134 | int n_coarse = 0;
|
|---|
| 135 | int n_coarse_offd = 0;
|
|---|
| 136 | int n_SF = 0;
|
|---|
| 137 | int n_SF_offd = 0;
|
|---|
| 138 |
|
|---|
| 139 | int *fine_to_coarse = NULL;
|
|---|
| 140 | HYPRE_BigInt *fine_to_coarse_offd = NULL;
|
|---|
| 141 |
|
|---|
| 142 | int *assigned = NULL;
|
|---|
| 143 | int *assigned_offd = NULL;
|
|---|
| 144 |
|
|---|
| 145 | double *Pext_send_data = NULL;
|
|---|
| 146 | double *Pext_data = NULL;
|
|---|
| 147 |
|
|---|
| 148 | double sum_C, sum_N;
|
|---|
| 149 | double sum_C_pos, sum_C_neg;
|
|---|
| 150 | double sum_N_pos, sum_N_neg;
|
|---|
| 151 | double diagonal;
|
|---|
| 152 | double alfa = 1.0;
|
|---|
| 153 | double beta = 1.0;
|
|---|
| 154 | int j_start;
|
|---|
| 155 | int j_end;
|
|---|
| 156 |
|
|---|
| 157 | int i,i1;
|
|---|
| 158 | int j,j1;
|
|---|
| 159 | int k,k1,k2,k3;
|
|---|
| 160 | int pass_array_size;
|
|---|
| 161 | HYPRE_BigInt global_pass_array_size;
|
|---|
| 162 | HYPRE_BigInt local_pass_array_size;
|
|---|
| 163 | int my_id, num_procs;
|
|---|
| 164 | int index, start;
|
|---|
| 165 | HYPRE_BigInt my_first_cpt;
|
|---|
| 166 | HYPRE_BigInt total_global_cpts;
|
|---|
| 167 | HYPRE_BigInt big_value;
|
|---|
| 168 | int p_cnt;
|
|---|
| 169 | int total_nz_offd;
|
|---|
| 170 | int cnt_nz_offd;
|
|---|
| 171 | int cnt_offd, cnt_new;
|
|---|
| 172 | int no_break;
|
|---|
| 173 | int not_found;
|
|---|
| 174 | int Pext_send_size;
|
|---|
| 175 | int Pext_recv_size;
|
|---|
| 176 | int old_Pext_send_size;
|
|---|
| 177 | int old_Pext_recv_size;
|
|---|
| 178 | int P_offd_size = 0;
|
|---|
| 179 | int local_index = -1;
|
|---|
| 180 | int new_num_cols_offd = 0;
|
|---|
| 181 | int num_cols_offd_P;
|
|---|
| 182 | int num_threads;
|
|---|
| 183 | int ns, ne, size, rest, pass_length;
|
|---|
| 184 | int *tmp_marker, *tmp_marker_offd;
|
|---|
| 185 | int *tmp_array, *tmp_array_offd;
|
|---|
| 186 | /*-----------------------------------------------------------------------
|
|---|
| 187 | * Access the CSR vectors for A and S. Also get size of fine grid.
|
|---|
| 188 | *-----------------------------------------------------------------------*/
|
|---|
| 189 |
|
|---|
| 190 | MPI_Comm_size(comm,&num_procs);
|
|---|
| 191 | MPI_Comm_rank(comm,&my_id);
|
|---|
| 192 | num_threads = hypre_NumThreads();
|
|---|
| 193 |
|
|---|
| 194 | #ifdef HYPRE_NO_GLOBAL_PARTITION
|
|---|
| 195 | my_first_cpt = num_cpts_global[0];
|
|---|
| 196 | /* total_global_cpts = 0; */
|
|---|
| 197 | if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
|
|---|
| 198 | MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
|
|---|
| 199 | #else
|
|---|
| 200 | my_first_cpt = num_cpts_global[my_id];
|
|---|
| 201 | total_global_cpts = num_cpts_global[num_procs];
|
|---|
| 202 | #endif
|
|---|
| 203 |
|
|---|
| 204 | if (!comm_pkg)
|
|---|
| 205 | {
|
|---|
| 206 | comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
|---|
| 207 | if (!comm_pkg)
|
|---|
| 208 | {
|
|---|
| 209 | #ifdef HYPRE_NO_GLOBAL_PARTITION
|
|---|
| 210 | hypre_NewCommPkgCreate(A);
|
|---|
| 211 | #else
|
|---|
| 212 | hypre_MatvecCommPkgCreate(A);
|
|---|
| 213 | #endif
|
|---|
| 214 | comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
|---|
| 215 | }
|
|---|
| 216 | col_offd_S_to_A = NULL;
|
|---|
| 217 | }
|
|---|
| 218 |
|
|---|
| 219 | if (col_offd_S_to_A)
|
|---|
| 220 | {
|
|---|
| 221 | col_map_offd = col_map_offd_S;
|
|---|
| 222 | num_cols_offd = num_cols_offd_S;
|
|---|
| 223 | }
|
|---|
| 224 | else
|
|---|
| 225 | {
|
|---|
| 226 | col_map_offd = col_map_offd_A;
|
|---|
| 227 | num_cols_offd = num_cols_offd_A;
|
|---|
| 228 | }
|
|---|
| 229 |
|
|---|
| 230 | if (num_cols_offd_A)
|
|---|
| 231 | {
|
|---|
| 232 | A_offd_data = hypre_CSRMatrixData(A_offd);
|
|---|
| 233 | A_offd_j = hypre_CSRMatrixJ(A_offd);
|
|---|
| 234 | }
|
|---|
| 235 |
|
|---|
| 236 | if (num_cols_offd)
|
|---|
| 237 | S_offd_j = hypre_CSRMatrixJ(S_offd);
|
|---|
| 238 |
|
|---|
| 239 | n_fine = hypre_CSRMatrixNumRows(A_diag);
|
|---|
| 240 |
|
|---|
| 241 | /*-----------------------------------------------------------------------
|
|---|
| 242 | * Intialize counters and allocate mapping vector.
|
|---|
| 243 | *-----------------------------------------------------------------------*/
|
|---|
| 244 |
|
|---|
| 245 | if (n_fine) fine_to_coarse = hypre_CTAlloc(int, n_fine);
|
|---|
| 246 |
|
|---|
| 247 | n_coarse = 0;
|
|---|
| 248 | n_SF = 0;
|
|---|
| 249 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 250 | #pragma omp parallel for private(i) reduction(+:n_coarse,n_SF ) schedule(static)
|
|---|
| 251 | #endif
|
|---|
| 252 | for (i=0; i < n_fine; i++)
|
|---|
| 253 | if (CF_marker[i] == 1) n_coarse++;
|
|---|
| 254 | else if (CF_marker[i] == -3) n_SF++;
|
|---|
| 255 |
|
|---|
| 256 | pass_array_size = n_fine-n_coarse-n_SF;
|
|---|
| 257 | if (pass_array_size) pass_array = hypre_CTAlloc(int, pass_array_size);
|
|---|
| 258 | pass_pointer = hypre_CTAlloc(int, max_num_passes+1);
|
|---|
| 259 | if (n_fine) assigned = hypre_CTAlloc(int, n_fine);
|
|---|
| 260 |
|
|---|
| 261 | {
|
|---|
| 262 | P_diag_i = hypre_CTAlloc(int, n_fine+1);
|
|---|
| 263 | P_offd_i = hypre_CTAlloc(int, n_fine+1);
|
|---|
| 264 | }
|
|---|
| 265 | if (n_coarse) C_array = hypre_CTAlloc(int, n_coarse);
|
|---|
| 266 |
|
|---|
| 267 | if (num_cols_offd)
|
|---|
| 268 | {
|
|---|
| 269 | CF_marker_offd = hypre_CTAlloc(int, num_cols_offd);
|
|---|
| 270 | if (num_functions > 1) dof_func_offd = hypre_CTAlloc(int, num_cols_offd);
|
|---|
| 271 | }
|
|---|
| 272 |
|
|---|
| 273 | if (num_procs > 1)
|
|---|
| 274 | {
|
|---|
| 275 | num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
|
|---|
| 276 | send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
|
|---|
| 277 | send_map_start = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
|
|---|
| 278 | send_map_elmt = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
|
|---|
| 279 | num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
|
|---|
| 280 | recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
|
|---|
| 281 | recv_vec_start = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
|
|---|
| 282 | if (send_map_start[num_sends])
|
|---|
| 283 | {
|
|---|
| 284 | int_buf_data = hypre_CTAlloc(int, send_map_start[num_sends]);
|
|---|
| 285 | big_buf_data = hypre_CTAlloc(HYPRE_BigInt, send_map_start[num_sends]);
|
|---|
| 286 | }
|
|---|
| 287 | }
|
|---|
| 288 |
|
|---|
| 289 | index = 0;
|
|---|
| 290 | for (i=0; i < num_sends; i++)
|
|---|
| 291 | {
|
|---|
| 292 | start = send_map_start[i];
|
|---|
| 293 | for (j = start; j < send_map_start[i+1]; j++)
|
|---|
| 294 | int_buf_data[index++] = CF_marker[send_map_elmt[j]];
|
|---|
| 295 | }
|
|---|
| 296 | if (num_procs > 1)
|
|---|
| 297 | {
|
|---|
| 298 | comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
|
|---|
| 299 | CF_marker_offd);
|
|---|
| 300 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 301 | }
|
|---|
| 302 |
|
|---|
| 303 | if (num_functions > 1)
|
|---|
| 304 | {
|
|---|
| 305 | index = 0;
|
|---|
| 306 | for (i=0; i < num_sends; i++)
|
|---|
| 307 | {
|
|---|
| 308 | start = send_map_start[i];
|
|---|
| 309 | for (j = start; j < send_map_start[i+1]; j++)
|
|---|
| 310 | int_buf_data[index++] = dof_func[send_map_elmt[j]];
|
|---|
| 311 | }
|
|---|
| 312 | if (num_procs > 1)
|
|---|
| 313 | {
|
|---|
| 314 | comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
|
|---|
| 315 | dof_func_offd);
|
|---|
| 316 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 317 | }
|
|---|
| 318 | }
|
|---|
| 319 |
|
|---|
| 320 | n_coarse_offd = 0;
|
|---|
| 321 | n_SF_offd = 0;
|
|---|
| 322 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 323 | #pragma omp parallel for private(i) reduction(+:n_coarse_offd,n_SF_offd) schedule(static)
|
|---|
| 324 | #endif
|
|---|
| 325 | for (i=0; i < num_cols_offd; i++)
|
|---|
| 326 | if (CF_marker_offd[i] == 1) n_coarse_offd++;
|
|---|
| 327 | else if (CF_marker_offd[i] == -3) n_SF_offd++;
|
|---|
| 328 |
|
|---|
| 329 | if (num_cols_offd)
|
|---|
| 330 | {
|
|---|
| 331 | assigned_offd = hypre_CTAlloc(int, num_cols_offd);
|
|---|
| 332 | map_S_to_new = hypre_CTAlloc(int, num_cols_offd);
|
|---|
| 333 | fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd);
|
|---|
| 334 | new_col_map_offd = hypre_CTAlloc(HYPRE_BigInt, n_coarse_offd);
|
|---|
| 335 | }
|
|---|
| 336 |
|
|---|
| 337 | /*-----------------------------------------------------------------------
|
|---|
| 338 | * First Pass: determine the maximal size of P, and elementsPerRow[i].
|
|---|
| 339 | *-----------------------------------------------------------------------*/
|
|---|
| 340 |
|
|---|
| 341 | /*-----------------------------------------------------------------------
|
|---|
| 342 | * Assigned points are points for which we know an interpolation
|
|---|
| 343 | * formula already, and which are thus available to interpolate from.
|
|---|
| 344 | * assigned[i]=0 for C points, and 1, 2, 3, ... for F points, depending
|
|---|
| 345 | * in which pass their interpolation formula is determined.
|
|---|
| 346 | *
|
|---|
| 347 | * pass_array contains the points ordered according to its pass, i.e.
|
|---|
| 348 | * | C-points | points of pass 1 | points of pass 2 | ....
|
|---|
| 349 | * C_points are points 0 through pass_pointer[1]-1,
|
|---|
| 350 | * points of pass k (0 < k < num_passes) are contained in points
|
|---|
| 351 | * pass_pointer[k] through pass_pointer[k+1]-1 of pass_array .
|
|---|
| 352 | *
|
|---|
| 353 | * pass_array is also used to avoid going through all points for each pass,
|
|---|
| 354 | * i,e. at the bginning it contains all points in descending order starting
|
|---|
| 355 | * with n_fine-1. Then starting from the last point, we evaluate whether
|
|---|
| 356 | * it is a C_point (pass 0). If it is the point is brought to the front
|
|---|
| 357 | * and the length of the points to be searched is shortened. This is
|
|---|
| 358 | * done until the parameter cnt (which determines the first point of
|
|---|
| 359 | * pass_array to be searched) becomes n_fine. Then all points have been
|
|---|
| 360 | * assigned a pass number.
|
|---|
| 361 | *-----------------------------------------------------------------------*/
|
|---|
| 362 |
|
|---|
| 363 |
|
|---|
| 364 | cnt = 0;
|
|---|
| 365 | p_cnt = pass_array_size-1;
|
|---|
| 366 | P_diag_i[0] = 0;
|
|---|
| 367 | P_offd_i[0] = 0;
|
|---|
| 368 | for (i = 0; i < n_fine; i++)
|
|---|
| 369 | {
|
|---|
| 370 | if (CF_marker[i] == 1)
|
|---|
| 371 | {
|
|---|
| 372 | fine_to_coarse[i] = cnt; /* this C point is assigned index
|
|---|
| 373 | coarse_counter on coarse grid,
|
|---|
| 374 | and in column of P */
|
|---|
| 375 | C_array[cnt++] = i;
|
|---|
| 376 | assigned[i] = 0;
|
|---|
| 377 | P_diag_i[i+1] = 1; /* one element in row i1 of P */
|
|---|
| 378 | P_offd_i[i+1] = 0;
|
|---|
| 379 | }
|
|---|
| 380 | else if (CF_marker[i] == -1)
|
|---|
| 381 | {
|
|---|
| 382 | pass_array[p_cnt--] = i;
|
|---|
| 383 | P_diag_i[i+1] = 0;
|
|---|
| 384 | P_offd_i[i+1] = 0;
|
|---|
| 385 | assigned[i] = -1;
|
|---|
| 386 | fine_to_coarse[i] = -1;
|
|---|
| 387 | }
|
|---|
| 388 | else
|
|---|
| 389 | {
|
|---|
| 390 | P_diag_i[i+1] = 0;
|
|---|
| 391 | P_offd_i[i+1] = 0;
|
|---|
| 392 | assigned[i] = -1;
|
|---|
| 393 | fine_to_coarse[i] = -1;
|
|---|
| 394 | }
|
|---|
| 395 | }
|
|---|
| 396 |
|
|---|
| 397 | index = 0;
|
|---|
| 398 | for (i=0; i < num_sends; i++)
|
|---|
| 399 | {
|
|---|
| 400 | start = send_map_start[i];
|
|---|
| 401 | for (j = start; j < send_map_start[i+1]; j++)
|
|---|
| 402 | {
|
|---|
| 403 | big_buf_data[index] = (HYPRE_BigInt) fine_to_coarse[send_map_elmt[j]];
|
|---|
| 404 | if (big_buf_data[index] > -1)
|
|---|
| 405 | big_buf_data[index] += my_first_cpt;
|
|---|
| 406 | index++;
|
|---|
| 407 | }
|
|---|
| 408 | }
|
|---|
| 409 | if (num_procs > 1)
|
|---|
| 410 | {
|
|---|
| 411 | comm_handle = hypre_ParCSRCommHandleCreate(21, comm_pkg, big_buf_data,
|
|---|
| 412 | fine_to_coarse_offd);
|
|---|
| 413 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 414 |
|
|---|
| 415 | hypre_TFree(big_buf_data);
|
|---|
| 416 |
|
|---|
| 417 | }
|
|---|
| 418 |
|
|---|
| 419 | new_recv_vec_start = hypre_CTAlloc(int,num_recvs+1);
|
|---|
| 420 |
|
|---|
| 421 | if (n_coarse_offd)
|
|---|
| 422 | C_array_offd = hypre_CTAlloc(int,n_coarse_offd);
|
|---|
| 423 |
|
|---|
| 424 | cnt = 0;
|
|---|
| 425 | new_recv_vec_start[0] = 0;
|
|---|
| 426 | for (j = 0; j < num_recvs; j++)
|
|---|
| 427 | {
|
|---|
| 428 | for (i = recv_vec_start[j]; i < recv_vec_start[j+1]; i++)
|
|---|
| 429 | {
|
|---|
| 430 | if (CF_marker_offd[i] == 1)
|
|---|
| 431 | {
|
|---|
| 432 | map_S_to_new[i] = cnt;
|
|---|
| 433 | C_array_offd[cnt] = i;
|
|---|
| 434 | new_col_map_offd[cnt++] = fine_to_coarse_offd[i];
|
|---|
| 435 | assigned_offd[i] = 0;
|
|---|
| 436 | }
|
|---|
| 437 | else
|
|---|
| 438 | {
|
|---|
| 439 | assigned_offd[i] = -1;
|
|---|
| 440 | map_S_to_new[i] = -1;
|
|---|
| 441 | }
|
|---|
| 442 | }
|
|---|
| 443 | new_recv_vec_start[j+1] = cnt;
|
|---|
| 444 | }
|
|---|
| 445 |
|
|---|
| 446 | cnt = 0;
|
|---|
| 447 | hypre_TFree(fine_to_coarse_offd);
|
|---|
| 448 |
|
|---|
| 449 | if (col_offd_S_to_A)
|
|---|
| 450 | {
|
|---|
| 451 | map_A_to_S = hypre_CTAlloc(int,num_cols_offd_A);
|
|---|
| 452 | for (i=0; i < num_cols_offd_A; i++)
|
|---|
| 453 | {
|
|---|
| 454 | if (cnt < num_cols_offd && col_map_offd_A[i] == col_map_offd[cnt])
|
|---|
| 455 | map_A_to_S[i] = cnt++;
|
|---|
| 456 | else
|
|---|
| 457 | map_A_to_S[i] = -1;
|
|---|
| 458 | }
|
|---|
| 459 | }
|
|---|
| 460 |
|
|---|
| 461 | /*-----------------------------------------------------------------------
|
|---|
| 462 | * Mark all local neighbors of C points as 'assigned'.
|
|---|
| 463 | *-----------------------------------------------------------------------*/
|
|---|
| 464 |
|
|---|
| 465 | pass_pointer[0] = 0;
|
|---|
| 466 | pass_pointer[1] = 0;
|
|---|
| 467 | total_nz = n_coarse; /* accumulates total number of nonzeros in P_diag */
|
|---|
| 468 | total_nz_offd = 0; /* accumulates total number of nonzeros in P_offd */
|
|---|
| 469 |
|
|---|
| 470 | cnt = 0;
|
|---|
| 471 | cnt_offd = 0;
|
|---|
| 472 | cnt_nz = 0;
|
|---|
| 473 | cnt_nz_offd = 0;
|
|---|
| 474 | for (i = pass_array_size-1; i > cnt-1; i--)
|
|---|
| 475 | {
|
|---|
| 476 | i1 = pass_array[i];
|
|---|
| 477 | for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
|
|---|
| 478 | {
|
|---|
| 479 | j1 = S_diag_j[j];
|
|---|
| 480 | if (CF_marker[j1] == 1)
|
|---|
| 481 | {
|
|---|
| 482 | P_diag_i[i1+1]++;
|
|---|
| 483 | cnt_nz++;
|
|---|
| 484 | assigned[i1] = 1;
|
|---|
| 485 | }
|
|---|
| 486 | }
|
|---|
| 487 | for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
|
|---|
| 488 | {
|
|---|
| 489 | j1 = S_offd_j[j];
|
|---|
| 490 | if (CF_marker_offd[j1] == 1)
|
|---|
| 491 | {
|
|---|
| 492 | P_offd_i[i1+1]++;
|
|---|
| 493 | cnt_nz_offd++;
|
|---|
| 494 | assigned[i1] = 1;
|
|---|
| 495 | }
|
|---|
| 496 | }
|
|---|
| 497 | if (assigned[i1] == 1)
|
|---|
| 498 | {
|
|---|
| 499 | pass_array[i++] = pass_array[cnt];
|
|---|
| 500 | pass_array[cnt++] = i1;
|
|---|
| 501 | }
|
|---|
| 502 | }
|
|---|
| 503 |
|
|---|
| 504 | pass_pointer[2] = cnt;
|
|---|
| 505 |
|
|---|
| 506 | /*-----------------------------------------------------------------------
|
|---|
| 507 | * All local neighbors are assigned, now need to exchange the boundary
|
|---|
| 508 | * info for assigned strong neighbors.
|
|---|
| 509 | *-----------------------------------------------------------------------*/
|
|---|
| 510 |
|
|---|
| 511 | index = 0;
|
|---|
| 512 | for (i=0; i < num_sends; i++)
|
|---|
| 513 | {
|
|---|
| 514 | start = send_map_start[i];
|
|---|
| 515 | for (j = start; j < send_map_start[i+1]; j++)
|
|---|
| 516 | int_buf_data[index++] = assigned[send_map_elmt[j]];
|
|---|
| 517 | }
|
|---|
| 518 | if (num_procs > 1)
|
|---|
| 519 | {
|
|---|
| 520 | comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
|
|---|
| 521 | assigned_offd);
|
|---|
| 522 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 523 | }
|
|---|
| 524 |
|
|---|
| 525 | /*-----------------------------------------------------------------------
|
|---|
| 526 | * Now we need to determine strong neighbors of points of pass 1, etc.
|
|---|
| 527 | * we need to update assigned_offd after each pass
|
|---|
| 528 | *-----------------------------------------------------------------------*/
|
|---|
| 529 |
|
|---|
| 530 | pass = 2;
|
|---|
| 531 | local_pass_array_size = (HYPRE_BigInt)(pass_array_size - cnt);
|
|---|
| 532 | MPI_Allreduce(&local_pass_array_size, &global_pass_array_size, 1,
|
|---|
| 533 | MPI_HYPRE_BIG_INT, MPI_SUM, comm);
|
|---|
| 534 |
|
|---|
| 535 | while (global_pass_array_size && pass < max_num_passes)
|
|---|
| 536 | {
|
|---|
| 537 | for (i = pass_array_size-1; i > cnt-1; i--)
|
|---|
| 538 | {
|
|---|
| 539 | i1 = pass_array[i];
|
|---|
| 540 | no_break = 1;
|
|---|
| 541 | for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
|
|---|
| 542 | {
|
|---|
| 543 | j1 = S_diag_j[j];
|
|---|
| 544 | if (assigned[j1] == pass-1)
|
|---|
| 545 | {
|
|---|
| 546 | pass_array[i++] = pass_array[cnt];
|
|---|
| 547 | pass_array[cnt++] = i1;
|
|---|
| 548 | assigned[i1] = pass;
|
|---|
| 549 | no_break = 0;
|
|---|
| 550 | break;
|
|---|
| 551 | }
|
|---|
| 552 | }
|
|---|
| 553 | if (no_break)
|
|---|
| 554 | {
|
|---|
| 555 | for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
|
|---|
| 556 | {
|
|---|
| 557 | j1 = S_offd_j[j];
|
|---|
| 558 | if (assigned_offd[j1] == pass-1)
|
|---|
| 559 | {
|
|---|
| 560 | pass_array[i++] = pass_array[cnt];
|
|---|
| 561 | pass_array[cnt++] = i1;
|
|---|
| 562 | assigned[i1] = pass;
|
|---|
| 563 | break;
|
|---|
| 564 | }
|
|---|
| 565 | }
|
|---|
| 566 | }
|
|---|
| 567 | }
|
|---|
| 568 | /*printf("pass %d remaining points %d \n", pass, local_pass_array_size);*/
|
|---|
| 569 |
|
|---|
| 570 | pass++;
|
|---|
| 571 | pass_pointer[pass] = cnt;
|
|---|
| 572 |
|
|---|
| 573 | local_pass_array_size = (HYPRE_BigInt) (pass_array_size - cnt);
|
|---|
| 574 | MPI_Allreduce(&local_pass_array_size, &global_pass_array_size, 1,
|
|---|
| 575 | MPI_HYPRE_BIG_INT, MPI_SUM, comm);
|
|---|
| 576 | index = 0;
|
|---|
| 577 | for (i=0; i < num_sends; i++)
|
|---|
| 578 | {
|
|---|
| 579 | start = send_map_start[i];
|
|---|
| 580 | for (j = start; j < send_map_start[i+1]; j++)
|
|---|
| 581 | int_buf_data[index++] = assigned[send_map_elmt[j]];
|
|---|
| 582 | }
|
|---|
| 583 | if (num_procs > 1)
|
|---|
| 584 | {
|
|---|
| 585 | comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
|
|---|
| 586 | assigned_offd);
|
|---|
| 587 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 588 | }
|
|---|
| 589 | }
|
|---|
| 590 |
|
|---|
| 591 | hypre_TFree(int_buf_data);
|
|---|
| 592 |
|
|---|
| 593 | num_passes = pass;
|
|---|
| 594 |
|
|---|
| 595 | P_marker = hypre_CTAlloc(int, n_coarse); /* marks points to see if they have
|
|---|
| 596 | been counted */
|
|---|
| 597 | for (i=0; i < n_coarse; i++)
|
|---|
| 598 | P_marker[i] = -1;
|
|---|
| 599 |
|
|---|
| 600 | if (n_coarse_offd)
|
|---|
| 601 | {
|
|---|
| 602 | P_marker_offd = hypre_CTAlloc(int, n_coarse_offd);
|
|---|
| 603 | for (i=0; i < n_coarse_offd; i++)
|
|---|
| 604 | P_marker_offd[i] = -1;
|
|---|
| 605 | }
|
|---|
| 606 |
|
|---|
| 607 | P_diag_pass = hypre_CTAlloc(int*,num_passes); /* P_diag_pass[i] will contain
|
|---|
| 608 | all column numbers for points of pass i */
|
|---|
| 609 |
|
|---|
| 610 | P_diag_pass[1] = hypre_CTAlloc(int,cnt_nz);
|
|---|
| 611 |
|
|---|
| 612 | P_diag_start = hypre_CTAlloc(int, n_fine); /* P_diag_start[i] contains
|
|---|
| 613 | pointer to begin of column numbers in P_pass for point i,
|
|---|
| 614 | P_diag_i[i+1] contains number of columns for point i */
|
|---|
| 615 |
|
|---|
| 616 | P_offd_start = hypre_CTAlloc(int, n_fine);
|
|---|
| 617 |
|
|---|
| 618 | if (num_procs > 1)
|
|---|
| 619 | {
|
|---|
| 620 | P_offd_pass = hypre_CTAlloc(int*,num_passes);
|
|---|
| 621 |
|
|---|
| 622 | if (cnt_nz_offd)
|
|---|
| 623 | P_offd_pass[1] = hypre_CTAlloc(int,cnt_nz_offd);
|
|---|
| 624 | else
|
|---|
| 625 | P_offd_pass[1] = NULL;
|
|---|
| 626 |
|
|---|
| 627 | new_elmts = hypre_CTAlloc(HYPRE_BigInt*,num_passes);
|
|---|
| 628 |
|
|---|
| 629 | new_counter = hypre_CTAlloc(int, num_passes+1);
|
|---|
| 630 |
|
|---|
| 631 | new_counter[0] = 0;
|
|---|
| 632 | new_counter[1] = n_coarse_offd;
|
|---|
| 633 | new_num_cols_offd = n_coarse_offd;
|
|---|
| 634 |
|
|---|
| 635 | new_elmts[0] = new_col_map_offd;
|
|---|
| 636 | }
|
|---|
| 637 |
|
|---|
| 638 | /*-----------------------------------------------------------------------
|
|---|
| 639 | * Pass 1: now we consider points of pass 1, with strong C_neighbors,
|
|---|
| 640 | *-----------------------------------------------------------------------*/
|
|---|
| 641 |
|
|---|
| 642 | cnt_nz = 0;
|
|---|
| 643 | cnt_nz_offd = 0;
|
|---|
| 644 | for (i=pass_pointer[1]; i < pass_pointer[2]; i++)
|
|---|
| 645 | {
|
|---|
| 646 | i1 = pass_array[i];
|
|---|
| 647 | P_diag_start[i1] = cnt_nz;
|
|---|
| 648 | P_offd_start[i1] = cnt_nz_offd;
|
|---|
| 649 | for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
|
|---|
| 650 | {
|
|---|
| 651 | j1 = S_diag_j[j];
|
|---|
| 652 | if (CF_marker[j1] == 1)
|
|---|
| 653 | P_diag_pass[1][cnt_nz++] = fine_to_coarse[j1];
|
|---|
| 654 | }
|
|---|
| 655 | for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
|
|---|
| 656 | {
|
|---|
| 657 | j1 = S_offd_j[j];
|
|---|
| 658 | if (CF_marker_offd[j1] == 1)
|
|---|
| 659 | P_offd_pass[1][cnt_nz_offd++] = map_S_to_new[j1];
|
|---|
| 660 | }
|
|---|
| 661 | }
|
|---|
| 662 |
|
|---|
| 663 |
|
|---|
| 664 | total_nz += cnt_nz;
|
|---|
| 665 | total_nz_offd += cnt_nz_offd;
|
|---|
| 666 |
|
|---|
| 667 | if (num_procs > 1)
|
|---|
| 668 | {
|
|---|
| 669 | tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
|
|---|
| 670 | Pext_send_map_start = hypre_CTAlloc(int*,num_passes);
|
|---|
| 671 | Pext_recv_vec_start = hypre_CTAlloc(int*,num_passes);
|
|---|
| 672 | Pext_pass = hypre_CTAlloc(int*,num_passes);
|
|---|
| 673 | Pext_i = hypre_CTAlloc(int, num_cols_offd+1);
|
|---|
| 674 | if (num_cols_offd) Pext_start = hypre_CTAlloc(int, num_cols_offd);
|
|---|
| 675 | if (send_map_start[num_sends])
|
|---|
| 676 | P_ncols = hypre_CTAlloc(int,send_map_start[num_sends]);
|
|---|
| 677 | /*for (i=0; i < num_cols_offd+1; i++)
|
|---|
| 678 | Pext_i[i] = 0;
|
|---|
| 679 | for (i=0; i < send_map_start[num_sends]; i++)
|
|---|
| 680 | P_ncols[i] = 0;*/
|
|---|
| 681 | }
|
|---|
| 682 |
|
|---|
| 683 | old_Pext_send_size = 0;
|
|---|
| 684 | old_Pext_recv_size = 0;
|
|---|
| 685 | for (pass=2; pass < num_passes; pass++)
|
|---|
| 686 | {
|
|---|
| 687 | if (num_procs > 1)
|
|---|
| 688 | {
|
|---|
| 689 | Pext_send_map_start[pass] = hypre_CTAlloc(int, num_sends+1);
|
|---|
| 690 | Pext_recv_vec_start[pass] = hypre_CTAlloc(int, num_recvs+1);
|
|---|
| 691 | Pext_send_size = 0;
|
|---|
| 692 | Pext_send_map_start[pass][0] = 0;
|
|---|
| 693 |
|
|---|
| 694 | for (i=0; i < num_sends; i++)
|
|---|
| 695 | {
|
|---|
| 696 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 697 | #pragma omp parallel for private(j,j1) reduction(+:Pext_send_size) schedule(static)
|
|---|
| 698 | #endif
|
|---|
| 699 |
|
|---|
| 700 | for (j=send_map_start[i]; j < send_map_start[i+1]; j++)
|
|---|
| 701 | {
|
|---|
| 702 | j1 = send_map_elmt[j];
|
|---|
| 703 | if (assigned[j1] == pass-1)
|
|---|
| 704 | {
|
|---|
| 705 | P_ncols[j] = P_diag_i[j1+1] + P_offd_i[j1+1];
|
|---|
| 706 | Pext_send_size += P_ncols[j];
|
|---|
| 707 | }
|
|---|
| 708 | }
|
|---|
| 709 | Pext_send_map_start[pass][i+1] = Pext_send_size;
|
|---|
| 710 | }
|
|---|
| 711 |
|
|---|
| 712 | comm_handle = hypre_ParCSRCommHandleCreate (11, comm_pkg,
|
|---|
| 713 | P_ncols, &Pext_i[1]);
|
|---|
| 714 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 715 |
|
|---|
| 716 | if (Pext_send_size > old_Pext_send_size)
|
|---|
| 717 | {
|
|---|
| 718 | hypre_TFree(Pext_send_buffer);
|
|---|
| 719 | Pext_send_buffer = hypre_CTAlloc(HYPRE_BigInt, Pext_send_size);
|
|---|
| 720 | }
|
|---|
| 721 | old_Pext_send_size = Pext_send_size;
|
|---|
| 722 | }
|
|---|
| 723 |
|
|---|
| 724 | cnt_offd = 0;
|
|---|
| 725 | for (i=0; i < num_sends; i++)
|
|---|
| 726 | {
|
|---|
| 727 | for (j=send_map_start[i]; j < send_map_start[i+1]; j++)
|
|---|
| 728 | {
|
|---|
| 729 | j1 = send_map_elmt[j];
|
|---|
| 730 | if (assigned[j1] == pass-1)
|
|---|
| 731 | {
|
|---|
| 732 | j_start = P_diag_start[j1];
|
|---|
| 733 | j_end = j_start+P_diag_i[j1+1];
|
|---|
| 734 | for (k=j_start; k < j_end; k++)
|
|---|
| 735 | {
|
|---|
| 736 | Pext_send_buffer[cnt_offd++] = my_first_cpt
|
|---|
| 737 | + (HYPRE_BigInt) P_diag_pass[pass-1][k];
|
|---|
| 738 | }
|
|---|
| 739 | j_start = P_offd_start[j1];
|
|---|
| 740 | j_end = j_start+P_offd_i[j1+1];
|
|---|
| 741 | for (k=j_start; k < j_end; k++)
|
|---|
| 742 | {
|
|---|
| 743 | k1 = P_offd_pass[pass-1][k];
|
|---|
| 744 | k3 = 0;
|
|---|
| 745 | while (k3 < pass-1)
|
|---|
| 746 | {
|
|---|
| 747 | if (k1 < new_counter[k3+1])
|
|---|
| 748 | {
|
|---|
| 749 | k2 = k1-new_counter[k3];
|
|---|
| 750 | Pext_send_buffer[cnt_offd++] = new_elmts[k3][k2];
|
|---|
| 751 | break;
|
|---|
| 752 | }
|
|---|
| 753 | k3++;
|
|---|
| 754 | }
|
|---|
| 755 | }
|
|---|
| 756 | }
|
|---|
| 757 | }
|
|---|
| 758 | }
|
|---|
| 759 |
|
|---|
| 760 | if (num_procs > 1)
|
|---|
| 761 | {
|
|---|
| 762 | Pext_recv_size = 0;
|
|---|
| 763 | Pext_recv_vec_start[pass][0] = 0;
|
|---|
| 764 | cnt_offd = 0;
|
|---|
| 765 | for (i=0; i < num_recvs; i++)
|
|---|
| 766 | {
|
|---|
| 767 | for (j=recv_vec_start[i]; j<recv_vec_start[i+1]; j++)
|
|---|
| 768 | {
|
|---|
| 769 | if (assigned_offd[j] == pass-1)
|
|---|
| 770 | {
|
|---|
| 771 | Pext_start[j] = cnt_offd;
|
|---|
| 772 | cnt_offd += Pext_i[j+1];
|
|---|
| 773 | }
|
|---|
| 774 | }
|
|---|
| 775 | Pext_recv_size = cnt_offd;
|
|---|
| 776 | Pext_recv_vec_start[pass][i+1] = Pext_recv_size;
|
|---|
| 777 | }
|
|---|
| 778 |
|
|---|
| 779 | hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm;
|
|---|
| 780 | hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
|
|---|
| 781 | hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = send_procs;
|
|---|
| 782 | hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) =
|
|---|
| 783 | Pext_send_map_start[pass];
|
|---|
| 784 | hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
|
|---|
| 785 | hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = recv_procs;
|
|---|
| 786 | hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) =
|
|---|
| 787 | Pext_recv_vec_start[pass];
|
|---|
| 788 |
|
|---|
| 789 | if (Pext_recv_size)
|
|---|
| 790 | {
|
|---|
| 791 | Pext_pass[pass] = hypre_CTAlloc(int, Pext_recv_size);
|
|---|
| 792 | new_elmts[pass-1] = hypre_CTAlloc(HYPRE_BigInt,Pext_recv_size);
|
|---|
| 793 | }
|
|---|
| 794 | else
|
|---|
| 795 | {
|
|---|
| 796 | Pext_pass[pass] = NULL;
|
|---|
| 797 | new_elmts[pass-1] = NULL;
|
|---|
| 798 | }
|
|---|
| 799 |
|
|---|
| 800 | if (Pext_recv_size > old_Pext_recv_size)
|
|---|
| 801 | {
|
|---|
| 802 | hypre_TFree(loc);
|
|---|
| 803 | hypre_TFree(big_temp_pass);
|
|---|
| 804 | loc = hypre_CTAlloc(int,Pext_recv_size);
|
|---|
| 805 | big_temp_pass = hypre_CTAlloc(HYPRE_BigInt, Pext_recv_size);
|
|---|
| 806 | }
|
|---|
| 807 | old_Pext_recv_size = Pext_recv_size;
|
|---|
| 808 |
|
|---|
| 809 | comm_handle = hypre_ParCSRCommHandleCreate (21, tmp_comm_pkg,
|
|---|
| 810 | Pext_send_buffer, big_temp_pass);
|
|---|
| 811 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 812 |
|
|---|
| 813 | }
|
|---|
| 814 |
|
|---|
| 815 | cnt_new = 0;
|
|---|
| 816 | cnt_offd = 0;
|
|---|
| 817 | for (i=0; i < num_recvs; i++)
|
|---|
| 818 | {
|
|---|
| 819 | for (j=recv_vec_start[i]; j < recv_vec_start[i+1]; j++)
|
|---|
| 820 | {
|
|---|
| 821 | if (assigned_offd[j] == pass-1)
|
|---|
| 822 | {
|
|---|
| 823 | for (j1 = cnt_offd; j1 < cnt_offd+Pext_i[j+1]; j1++)
|
|---|
| 824 | {
|
|---|
| 825 | big_value = big_temp_pass[j1];
|
|---|
| 826 | k2 = (int)(big_value - my_first_cpt);
|
|---|
| 827 | if (k2 > -1 && k2 < n_coarse)
|
|---|
| 828 | {
|
|---|
| 829 | Pext_pass[pass][j1] = -k2-1;
|
|---|
| 830 | }
|
|---|
| 831 | else
|
|---|
| 832 | {
|
|---|
| 833 | not_found = 1;
|
|---|
| 834 | k3 = 0;
|
|---|
| 835 | while (k3 < pass-1 && not_found)
|
|---|
| 836 | {
|
|---|
| 837 | k2 = hypre_BigBinarySearch(new_elmts[k3], big_value,
|
|---|
| 838 | (new_counter[k3+1]-new_counter[k3]));
|
|---|
| 839 | if (k2 > -1)
|
|---|
| 840 | {
|
|---|
| 841 | Pext_pass[pass][j1] = k2 + new_counter[k3];
|
|---|
| 842 | not_found = 0;
|
|---|
| 843 | }
|
|---|
| 844 | else
|
|---|
| 845 | {
|
|---|
| 846 | k3++;
|
|---|
| 847 | }
|
|---|
| 848 | }
|
|---|
| 849 | if (not_found)
|
|---|
| 850 | {
|
|---|
| 851 | new_elmts[pass-1][cnt_new] = big_value;
|
|---|
| 852 | loc[cnt_new++] = j1;
|
|---|
| 853 | }
|
|---|
| 854 | }
|
|---|
| 855 | }
|
|---|
| 856 | cnt_offd += Pext_i[j+1];
|
|---|
| 857 | }
|
|---|
| 858 | }
|
|---|
| 859 | }
|
|---|
| 860 |
|
|---|
| 861 | if (cnt_new)
|
|---|
| 862 | {
|
|---|
| 863 | hypre_BigQsortbi(new_elmts[pass-1],loc,0,cnt_new-1);
|
|---|
| 864 | cnt = 0;
|
|---|
| 865 | local_index = new_counter[pass-1];
|
|---|
| 866 | Pext_pass[pass][loc[0]] = local_index;
|
|---|
| 867 |
|
|---|
| 868 | for (i=1; i < cnt_new; i++)
|
|---|
| 869 | {
|
|---|
| 870 | if (new_elmts[pass-1][i] > new_elmts[pass-1][cnt])
|
|---|
| 871 | {
|
|---|
| 872 | new_elmts[pass-1][++cnt] = new_elmts[pass-1][i];
|
|---|
| 873 | local_index++;
|
|---|
| 874 | }
|
|---|
| 875 | Pext_pass[pass][loc[i]] = local_index;
|
|---|
| 876 | }
|
|---|
| 877 | new_counter[pass] = local_index+1;
|
|---|
| 878 | }
|
|---|
| 879 | else if (num_procs > 1)
|
|---|
| 880 | new_counter[pass] = new_counter[pass-1];
|
|---|
| 881 |
|
|---|
| 882 | if (new_num_cols_offd < local_index+1)
|
|---|
| 883 | {
|
|---|
| 884 | new_num_cols_offd = local_index+1;
|
|---|
| 885 |
|
|---|
| 886 | hypre_TFree(P_marker_offd);
|
|---|
| 887 | P_marker_offd = hypre_CTAlloc(int,new_num_cols_offd);
|
|---|
| 888 |
|
|---|
| 889 | for (i=0; i < new_num_cols_offd; i++)
|
|---|
| 890 | P_marker_offd[i] = -1;
|
|---|
| 891 | }
|
|---|
| 892 |
|
|---|
| 893 | cnt_nz = 0;
|
|---|
| 894 | cnt_nz_offd = 0;
|
|---|
| 895 | for (i=pass_pointer[pass]; i < pass_pointer[pass+1]; i++)
|
|---|
| 896 | {
|
|---|
| 897 | i1 = pass_array[i];
|
|---|
| 898 | P_diag_start[i1] = cnt_nz;
|
|---|
| 899 | P_offd_start[i1] = cnt_nz_offd;
|
|---|
| 900 | for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
|
|---|
| 901 | {
|
|---|
| 902 | j1 = S_diag_j[j];
|
|---|
| 903 | if (assigned[j1] == pass-1)
|
|---|
| 904 | {
|
|---|
| 905 | j_start = P_diag_start[j1];
|
|---|
| 906 | j_end = j_start+P_diag_i[j1+1];
|
|---|
| 907 | for (k=j_start; k < j_end; k++)
|
|---|
| 908 | {
|
|---|
| 909 | k1 = P_diag_pass[pass-1][k];
|
|---|
| 910 | if (P_marker[k1] != i1)
|
|---|
| 911 | {
|
|---|
| 912 | cnt_nz++;
|
|---|
| 913 | P_diag_i[i1+1]++;
|
|---|
| 914 | P_marker[k1] = i1;
|
|---|
| 915 | }
|
|---|
| 916 | }
|
|---|
| 917 | j_start = P_offd_start[j1];
|
|---|
| 918 | j_end = j_start+P_offd_i[j1+1];
|
|---|
| 919 | for (k=j_start; k < j_end; k++)
|
|---|
| 920 | {
|
|---|
| 921 | k1 = P_offd_pass[pass-1][k];
|
|---|
| 922 | if (P_marker_offd[k1] != i1)
|
|---|
| 923 | {
|
|---|
| 924 | cnt_nz_offd++;
|
|---|
| 925 | P_offd_i[i1+1]++;
|
|---|
| 926 | P_marker_offd[k1] = i1;
|
|---|
| 927 | }
|
|---|
| 928 | }
|
|---|
| 929 | }
|
|---|
| 930 | }
|
|---|
| 931 | j_start = 0;
|
|---|
| 932 | for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
|
|---|
| 933 | {
|
|---|
| 934 | j1 = S_offd_j[j];
|
|---|
| 935 | if (assigned_offd[j1] == pass-1)
|
|---|
| 936 | {
|
|---|
| 937 | j_start = Pext_start[j1];
|
|---|
| 938 | j_end = j_start+Pext_i[j1+1];
|
|---|
| 939 | for (k=j_start; k < j_end; k++)
|
|---|
| 940 | {
|
|---|
| 941 | k1 = Pext_pass[pass][k];
|
|---|
| 942 | if (k1 < 0)
|
|---|
| 943 | {
|
|---|
| 944 | if (P_marker[-k1-1] != i1)
|
|---|
| 945 | {
|
|---|
| 946 | cnt_nz++;
|
|---|
| 947 | P_diag_i[i1+1]++;
|
|---|
| 948 | P_marker[-k1-1] = i1;
|
|---|
| 949 | }
|
|---|
| 950 | }
|
|---|
| 951 | else if (P_marker_offd[k1] != i1)
|
|---|
| 952 | {
|
|---|
| 953 | cnt_nz_offd++;
|
|---|
| 954 | P_offd_i[i1+1]++;
|
|---|
| 955 | P_marker_offd[k1] = i1;
|
|---|
| 956 | }
|
|---|
| 957 | }
|
|---|
| 958 | }
|
|---|
| 959 | }
|
|---|
| 960 | }
|
|---|
| 961 |
|
|---|
| 962 | total_nz += cnt_nz;
|
|---|
| 963 | total_nz_offd += cnt_nz_offd;
|
|---|
| 964 |
|
|---|
| 965 | P_diag_pass[pass] = hypre_CTAlloc(int, cnt_nz);
|
|---|
| 966 | if (cnt_nz_offd)
|
|---|
| 967 | P_offd_pass[pass] = hypre_CTAlloc(int, cnt_nz_offd);
|
|---|
| 968 | else if (num_procs > 1)
|
|---|
| 969 | P_offd_pass[pass] = NULL;
|
|---|
| 970 |
|
|---|
| 971 | cnt_nz = 0;
|
|---|
| 972 | cnt_nz_offd = 0;
|
|---|
| 973 | for (i=pass_pointer[pass]; i < pass_pointer[pass+1]; i++)
|
|---|
| 974 | {
|
|---|
| 975 | i1 = pass_array[i];
|
|---|
| 976 | for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
|
|---|
| 977 | {
|
|---|
| 978 | j1 = S_diag_j[j];
|
|---|
| 979 | if (assigned[j1] == pass-1)
|
|---|
| 980 | {
|
|---|
| 981 | j_start = P_diag_start[j1];
|
|---|
| 982 | j_end = j_start+P_diag_i[j1+1];
|
|---|
| 983 | for (k=j_start; k < j_end; k++)
|
|---|
| 984 | {
|
|---|
| 985 | k1 = P_diag_pass[pass-1][k];
|
|---|
| 986 | if (P_marker[k1] != -i1-1)
|
|---|
| 987 | {
|
|---|
| 988 | P_diag_pass[pass][cnt_nz++] = k1;
|
|---|
| 989 | P_marker[k1] = -i1-1;
|
|---|
| 990 | }
|
|---|
| 991 | }
|
|---|
| 992 | j_start = P_offd_start[j1];
|
|---|
| 993 | j_end = j_start+P_offd_i[j1+1];
|
|---|
| 994 | for (k=j_start; k < j_end; k++)
|
|---|
| 995 | {
|
|---|
| 996 | k1 = P_offd_pass[pass-1][k];
|
|---|
| 997 | if (P_marker_offd[k1] != -i1-1)
|
|---|
| 998 | {
|
|---|
| 999 | P_offd_pass[pass][cnt_nz_offd++] = k1;
|
|---|
| 1000 | P_marker_offd[k1] = -i1-1;
|
|---|
| 1001 | }
|
|---|
| 1002 | }
|
|---|
| 1003 | }
|
|---|
| 1004 | }
|
|---|
| 1005 | for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
|
|---|
| 1006 | {
|
|---|
| 1007 | j1 = S_offd_j[j];
|
|---|
| 1008 | if (assigned_offd[j1] == pass-1)
|
|---|
| 1009 | {
|
|---|
| 1010 | j_start = Pext_start[j1];
|
|---|
| 1011 | j_end = j_start+Pext_i[j1+1];
|
|---|
| 1012 | for (k=j_start; k < j_end; k++)
|
|---|
| 1013 | {
|
|---|
| 1014 | k1 = Pext_pass[pass][k];
|
|---|
| 1015 | if (k1 < 0)
|
|---|
| 1016 | {
|
|---|
| 1017 | if (P_marker[-k1-1] != -i1-1)
|
|---|
| 1018 | {
|
|---|
| 1019 | P_diag_pass[pass][cnt_nz++] = -k1-1;
|
|---|
| 1020 | P_marker[-k1-1] = -i1-1;
|
|---|
| 1021 | }
|
|---|
| 1022 | }
|
|---|
| 1023 | else if (P_marker_offd[k1] != -i1-1)
|
|---|
| 1024 | {
|
|---|
| 1025 | P_offd_pass[pass][cnt_nz_offd++] = k1;
|
|---|
| 1026 | P_marker_offd[k1] = -i1-1;
|
|---|
| 1027 | }
|
|---|
| 1028 | }
|
|---|
| 1029 | }
|
|---|
| 1030 | }
|
|---|
| 1031 | }
|
|---|
| 1032 | }
|
|---|
| 1033 |
|
|---|
| 1034 | hypre_TFree(loc);
|
|---|
| 1035 | hypre_TFree(P_ncols);
|
|---|
| 1036 | hypre_TFree(Pext_send_buffer);
|
|---|
| 1037 | hypre_TFree(big_temp_pass);
|
|---|
| 1038 | hypre_TFree(new_recv_vec_start);
|
|---|
| 1039 | hypre_TFree(P_marker);
|
|---|
| 1040 | hypre_TFree(P_marker_offd);
|
|---|
| 1041 | P_marker_offd = NULL;
|
|---|
| 1042 |
|
|---|
| 1043 | P_diag_j = NULL;
|
|---|
| 1044 | P_offd_j = NULL;
|
|---|
| 1045 | {
|
|---|
| 1046 |
|
|---|
| 1047 | if (total_nz)
|
|---|
| 1048 | {
|
|---|
| 1049 | P_diag_j = hypre_CTAlloc(int,total_nz);
|
|---|
| 1050 | P_diag_data = hypre_CTAlloc(double,total_nz);
|
|---|
| 1051 | }
|
|---|
| 1052 | if (total_nz_offd)
|
|---|
| 1053 | {
|
|---|
| 1054 | P_offd_j = hypre_CTAlloc(int,total_nz_offd);
|
|---|
| 1055 | P_offd_data = hypre_CTAlloc(double,total_nz_offd);
|
|---|
| 1056 | }
|
|---|
| 1057 | }
|
|---|
| 1058 |
|
|---|
| 1059 | for (i=0; i < n_fine; i++)
|
|---|
| 1060 | {
|
|---|
| 1061 | P_diag_i[i+1] += P_diag_i[i];
|
|---|
| 1062 | P_offd_i[i+1] += P_offd_i[i];
|
|---|
| 1063 | }
|
|---|
| 1064 |
|
|---|
| 1065 | /* determine P for coarse points */
|
|---|
| 1066 |
|
|---|
| 1067 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 1068 | #pragma omp parallel for private(i,i1) schedule(static)
|
|---|
| 1069 | #endif
|
|---|
| 1070 | for (i=0; i < n_coarse; i++)
|
|---|
| 1071 | {
|
|---|
| 1072 | i1 = C_array[i];
|
|---|
| 1073 | P_diag_j[P_diag_i[i1]] = fine_to_coarse[i1];
|
|---|
| 1074 | P_diag_data[P_diag_i[i1]] = 1.0;
|
|---|
| 1075 | }
|
|---|
| 1076 |
|
|---|
| 1077 | P_marker = hypre_CTAlloc(int,n_fine);
|
|---|
| 1078 | for (i=0; i < n_fine; i++)
|
|---|
| 1079 | P_marker[i] = -1;
|
|---|
| 1080 |
|
|---|
| 1081 | if (num_cols_offd)
|
|---|
| 1082 | {
|
|---|
| 1083 | P_marker_offd = hypre_CTAlloc(int,num_cols_offd);
|
|---|
| 1084 | for (i=0; i < num_cols_offd; i++)
|
|---|
| 1085 | P_marker_offd[i] = -1;
|
|---|
| 1086 | }
|
|---|
| 1087 |
|
|---|
| 1088 | if (weight_option) /*if this is set, weights are separated into
|
|---|
| 1089 | negative and positive offdiagonals and accumulated
|
|---|
| 1090 | accordingly */
|
|---|
| 1091 | {
|
|---|
| 1092 | /* determine P for points of pass 1, i.e. neighbors of coarse points */
|
|---|
| 1093 | for (i=pass_pointer[1]; i < pass_pointer[2]; i++)
|
|---|
| 1094 | {
|
|---|
| 1095 | i1 = pass_array[i];
|
|---|
| 1096 | sum_C_pos = 0;
|
|---|
| 1097 | sum_C_neg = 0;
|
|---|
| 1098 | sum_N_pos = 0;
|
|---|
| 1099 | sum_N_neg = 0;
|
|---|
| 1100 | j_start = P_diag_start[i1];
|
|---|
| 1101 | j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1];
|
|---|
| 1102 | for (j=j_start; j < j_end; j++)
|
|---|
| 1103 | {
|
|---|
| 1104 | k1 = P_diag_pass[1][j];
|
|---|
| 1105 | P_marker[C_array[k1]] = i1;
|
|---|
| 1106 | }
|
|---|
| 1107 | cnt = P_diag_i[i1];
|
|---|
| 1108 | for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++)
|
|---|
| 1109 | {
|
|---|
| 1110 | j1 = A_diag_j[j];
|
|---|
| 1111 | if (CF_marker[j1] != -3 &&
|
|---|
| 1112 | (num_functions == 1 || dof_func[i1] == dof_func[j1]))
|
|---|
| 1113 | {
|
|---|
| 1114 | if (A_diag_data[j] < 0)
|
|---|
| 1115 | sum_N_neg += A_diag_data[j];
|
|---|
| 1116 | else
|
|---|
| 1117 | sum_N_pos += A_diag_data[j];
|
|---|
| 1118 | }
|
|---|
| 1119 | if (j1 != -1 && P_marker[j1] == i1)
|
|---|
| 1120 | {
|
|---|
| 1121 | P_diag_data[cnt] = A_diag_data[j];
|
|---|
| 1122 | P_diag_j[cnt++] = fine_to_coarse[j1];
|
|---|
| 1123 | if (A_diag_data[j] < 0)
|
|---|
| 1124 | sum_C_neg += A_diag_data[j];
|
|---|
| 1125 | else
|
|---|
| 1126 | sum_C_pos += A_diag_data[j];
|
|---|
| 1127 | }
|
|---|
| 1128 | }
|
|---|
| 1129 | j_start = P_offd_start[i1];
|
|---|
| 1130 | j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1];
|
|---|
| 1131 | for (j=j_start; j < j_end; j++)
|
|---|
| 1132 | {
|
|---|
| 1133 | k1 = P_offd_pass[1][j];
|
|---|
| 1134 | P_marker_offd[C_array_offd[k1]] = i1;
|
|---|
| 1135 | }
|
|---|
| 1136 | cnt_offd = P_offd_i[i1];
|
|---|
| 1137 | for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++)
|
|---|
| 1138 | {
|
|---|
| 1139 | if (col_offd_S_to_A)
|
|---|
| 1140 | j1 = map_A_to_S[A_offd_j[j]];
|
|---|
| 1141 | else
|
|---|
| 1142 | j1 = A_offd_j[j];
|
|---|
| 1143 | if (CF_marker_offd[j1] != -3 &&
|
|---|
| 1144 | (num_functions == 1 || dof_func[i1] == dof_func_offd[j1]))
|
|---|
| 1145 | {
|
|---|
| 1146 | if (A_offd_data[j] < 0)
|
|---|
| 1147 | sum_N_neg += A_offd_data[j];
|
|---|
| 1148 | else
|
|---|
| 1149 | sum_N_pos += A_offd_data[j];
|
|---|
| 1150 | }
|
|---|
| 1151 | if (j1 != -1 && P_marker_offd[j1] == i1)
|
|---|
| 1152 | {
|
|---|
| 1153 | P_offd_data[cnt_offd] = A_offd_data[j];
|
|---|
| 1154 | P_offd_j[cnt_offd++] = map_S_to_new[j1];
|
|---|
| 1155 | if (A_offd_data[j] < 0)
|
|---|
| 1156 | sum_C_neg += A_offd_data[j];
|
|---|
| 1157 | else
|
|---|
| 1158 | sum_C_pos += A_offd_data[j];
|
|---|
| 1159 | }
|
|---|
| 1160 | }
|
|---|
| 1161 | diagonal = A_diag_data[A_diag_i[i1]];
|
|---|
| 1162 | if (sum_C_neg*diagonal) alfa = -sum_N_neg/(sum_C_neg*diagonal);
|
|---|
| 1163 | if (sum_C_pos*diagonal) beta = -sum_N_pos/(sum_C_pos*diagonal);
|
|---|
| 1164 | for (j=P_diag_i[i1]; j < cnt; j++)
|
|---|
| 1165 | if (P_diag_data[j] < 0)
|
|---|
| 1166 | P_diag_data[j] *= alfa;
|
|---|
| 1167 | else
|
|---|
| 1168 | P_diag_data[j] *= beta;
|
|---|
| 1169 | for (j=P_offd_i[i1]; j < cnt_offd; j++)
|
|---|
| 1170 | if (P_offd_data[j] < 0)
|
|---|
| 1171 | P_offd_data[j] *= alfa;
|
|---|
| 1172 | else
|
|---|
| 1173 | P_offd_data[j] *= beta;
|
|---|
| 1174 | }
|
|---|
| 1175 |
|
|---|
| 1176 | old_Pext_send_size = 0;
|
|---|
| 1177 | old_Pext_recv_size = 0;
|
|---|
| 1178 |
|
|---|
| 1179 | /*if (!col_offd_S_to_A) hypre_TFree(map_A_to_new);*/
|
|---|
| 1180 | hypre_TFree(P_diag_pass[1]);
|
|---|
| 1181 | if (num_procs > 1) hypre_TFree(P_offd_pass[1]);
|
|---|
| 1182 |
|
|---|
| 1183 | if (new_num_cols_offd > n_coarse_offd)
|
|---|
| 1184 | {
|
|---|
| 1185 | hypre_TFree(C_array_offd);
|
|---|
| 1186 | C_array_offd = hypre_CTAlloc(int, new_num_cols_offd);
|
|---|
| 1187 | }
|
|---|
| 1188 |
|
|---|
| 1189 | for (pass = 2; pass < num_passes; pass++)
|
|---|
| 1190 | {
|
|---|
| 1191 |
|
|---|
| 1192 | if (num_procs > 1)
|
|---|
| 1193 | {
|
|---|
| 1194 | Pext_send_size = Pext_send_map_start[pass][num_sends];
|
|---|
| 1195 | if (Pext_send_size > old_Pext_send_size)
|
|---|
| 1196 | {
|
|---|
| 1197 | hypre_TFree(Pext_send_data);
|
|---|
| 1198 | Pext_send_data = hypre_CTAlloc(double, Pext_send_size);
|
|---|
| 1199 | }
|
|---|
| 1200 | old_Pext_send_size = Pext_send_size;
|
|---|
| 1201 |
|
|---|
| 1202 | cnt_offd = 0;
|
|---|
| 1203 | for (i=0; i < num_sends; i++)
|
|---|
| 1204 | {
|
|---|
| 1205 | for (j=send_map_start[i]; j < send_map_start[i+1]; j++)
|
|---|
| 1206 | {
|
|---|
| 1207 | j1 = send_map_elmt[j];
|
|---|
| 1208 | if (assigned[j1] == pass-1)
|
|---|
| 1209 | {
|
|---|
| 1210 | j_start = P_diag_i[j1];
|
|---|
| 1211 | j_end = P_diag_i[j1+1];
|
|---|
| 1212 | for (k=j_start; k < j_end; k++)
|
|---|
| 1213 | {
|
|---|
| 1214 | Pext_send_data[cnt_offd++] = P_diag_data[k];
|
|---|
| 1215 | }
|
|---|
| 1216 | j_start = P_offd_i[j1];
|
|---|
| 1217 | j_end = P_offd_i[j1+1];
|
|---|
| 1218 | for (k=j_start; k < j_end; k++)
|
|---|
| 1219 | {
|
|---|
| 1220 | Pext_send_data[cnt_offd++] = P_offd_data[k];
|
|---|
| 1221 | }
|
|---|
| 1222 | }
|
|---|
| 1223 | }
|
|---|
| 1224 | }
|
|---|
| 1225 |
|
|---|
| 1226 | hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
|
|---|
| 1227 | hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) =
|
|---|
| 1228 | Pext_send_map_start[pass];
|
|---|
| 1229 | hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
|
|---|
| 1230 | hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) =
|
|---|
| 1231 | Pext_recv_vec_start[pass];
|
|---|
| 1232 |
|
|---|
| 1233 | Pext_recv_size = Pext_recv_vec_start[pass][num_recvs];
|
|---|
| 1234 |
|
|---|
| 1235 | if (Pext_recv_size > old_Pext_recv_size)
|
|---|
| 1236 | {
|
|---|
| 1237 | hypre_TFree(Pext_data);
|
|---|
| 1238 | Pext_data = hypre_CTAlloc(double, Pext_recv_size);
|
|---|
| 1239 | }
|
|---|
| 1240 | old_Pext_recv_size = Pext_recv_size;
|
|---|
| 1241 |
|
|---|
| 1242 | comm_handle = hypre_ParCSRCommHandleCreate (1, tmp_comm_pkg,
|
|---|
| 1243 | Pext_send_data, Pext_data);
|
|---|
| 1244 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 1245 |
|
|---|
| 1246 | hypre_TFree(Pext_send_map_start[pass]);
|
|---|
| 1247 | hypre_TFree(Pext_recv_vec_start[pass]);
|
|---|
| 1248 | }
|
|---|
| 1249 |
|
|---|
| 1250 | for (i=pass_pointer[pass]; i < pass_pointer[pass+1]; i++)
|
|---|
| 1251 | {
|
|---|
| 1252 | i1 = pass_array[i];
|
|---|
| 1253 | sum_C_neg = 0;
|
|---|
| 1254 | sum_C_pos = 0;
|
|---|
| 1255 | sum_N_neg = 0;
|
|---|
| 1256 | sum_N_pos = 0;
|
|---|
| 1257 | j_start = P_diag_start[i1];
|
|---|
| 1258 | j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1];
|
|---|
| 1259 | cnt = P_diag_i[i1];
|
|---|
| 1260 | for (j=j_start; j < j_end; j++)
|
|---|
| 1261 | {
|
|---|
| 1262 | k1 = P_diag_pass[pass][j];
|
|---|
| 1263 | C_array[k1] = cnt;
|
|---|
| 1264 | P_diag_data[cnt] = 0;
|
|---|
| 1265 | P_diag_j[cnt++] = k1;
|
|---|
| 1266 | }
|
|---|
| 1267 | j_start = P_offd_start[i1];
|
|---|
| 1268 | j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1];
|
|---|
| 1269 | cnt_offd = P_offd_i[i1];
|
|---|
| 1270 | for (j=j_start; j < j_end; j++)
|
|---|
| 1271 | {
|
|---|
| 1272 | k1 = P_offd_pass[pass][j];
|
|---|
| 1273 | C_array_offd[k1] = cnt_offd;
|
|---|
| 1274 | P_offd_data[cnt_offd] = 0;
|
|---|
| 1275 | P_offd_j[cnt_offd++] = k1;
|
|---|
| 1276 | }
|
|---|
| 1277 | for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
|
|---|
| 1278 | {
|
|---|
| 1279 | j1 = S_diag_j[j];
|
|---|
| 1280 | if (assigned[j1] == pass-1)
|
|---|
| 1281 | P_marker[j1] = i1;
|
|---|
| 1282 | }
|
|---|
| 1283 | for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
|
|---|
| 1284 | {
|
|---|
| 1285 | j1 = S_offd_j[j];
|
|---|
| 1286 | if (assigned_offd[j1] == pass-1)
|
|---|
| 1287 | P_marker_offd[j1] = i1;
|
|---|
| 1288 | }
|
|---|
| 1289 | for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++)
|
|---|
| 1290 | {
|
|---|
| 1291 | j1 = A_diag_j[j];
|
|---|
| 1292 | if (P_marker[j1] == i1)
|
|---|
| 1293 | {
|
|---|
| 1294 | for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++)
|
|---|
| 1295 | {
|
|---|
| 1296 | k1 = P_diag_j[k];
|
|---|
| 1297 | alfa = A_diag_data[j]*P_diag_data[k];
|
|---|
| 1298 | P_diag_data[C_array[k1]] += alfa;
|
|---|
| 1299 | if (alfa < 0)
|
|---|
| 1300 | {
|
|---|
| 1301 | sum_C_neg += alfa;
|
|---|
| 1302 | sum_N_neg += alfa;
|
|---|
| 1303 | }
|
|---|
| 1304 | else
|
|---|
| 1305 | {
|
|---|
| 1306 | sum_C_pos += alfa;
|
|---|
| 1307 | sum_N_pos += alfa;
|
|---|
| 1308 | }
|
|---|
| 1309 | }
|
|---|
| 1310 | for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++)
|
|---|
| 1311 | {
|
|---|
| 1312 | k1 = P_offd_j[k];
|
|---|
| 1313 | alfa = A_diag_data[j]*P_offd_data[k];
|
|---|
| 1314 | P_offd_data[C_array_offd[k1]] += alfa;
|
|---|
| 1315 | if (alfa < 0)
|
|---|
| 1316 | {
|
|---|
| 1317 | sum_C_neg += alfa;
|
|---|
| 1318 | sum_N_neg += alfa;
|
|---|
| 1319 | }
|
|---|
| 1320 | else
|
|---|
| 1321 | {
|
|---|
| 1322 | sum_C_pos += alfa;
|
|---|
| 1323 | sum_N_pos += alfa;
|
|---|
| 1324 | }
|
|---|
| 1325 | }
|
|---|
| 1326 | }
|
|---|
| 1327 | else
|
|---|
| 1328 | {
|
|---|
| 1329 | if (CF_marker[j1] != -3 &&
|
|---|
| 1330 | (num_functions == 1 || dof_func[i1] == dof_func[j1]))
|
|---|
| 1331 | {
|
|---|
| 1332 | if (A_diag_data[j] < 0)
|
|---|
| 1333 | sum_N_neg += A_diag_data[j];
|
|---|
| 1334 | else
|
|---|
| 1335 | sum_N_pos += A_diag_data[j];
|
|---|
| 1336 | }
|
|---|
| 1337 | }
|
|---|
| 1338 | }
|
|---|
| 1339 | for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++)
|
|---|
| 1340 | {
|
|---|
| 1341 | if (col_offd_S_to_A)
|
|---|
| 1342 | j1 = map_A_to_S[A_offd_j[j]];
|
|---|
| 1343 | else
|
|---|
| 1344 | j1 = A_offd_j[j];
|
|---|
| 1345 |
|
|---|
| 1346 | if (j1 > -1 && P_marker_offd[j1] == i1)
|
|---|
| 1347 | {
|
|---|
| 1348 | j_start = Pext_start[j1];
|
|---|
| 1349 | j_end = j_start+Pext_i[j1+1];
|
|---|
| 1350 | for (k=j_start; k < j_end; k++)
|
|---|
| 1351 | {
|
|---|
| 1352 | k1 = Pext_pass[pass][k];
|
|---|
| 1353 | alfa = A_offd_data[j]*Pext_data[k];
|
|---|
| 1354 | if (k1 < 0)
|
|---|
| 1355 | P_diag_data[C_array[-k1-1]] += alfa;
|
|---|
| 1356 | else
|
|---|
| 1357 | P_offd_data[C_array_offd[k1]] += alfa;
|
|---|
| 1358 | if (alfa < 0)
|
|---|
| 1359 | {
|
|---|
| 1360 | sum_C_neg += alfa;
|
|---|
| 1361 | sum_N_neg += alfa;
|
|---|
| 1362 | }
|
|---|
| 1363 | else
|
|---|
| 1364 | {
|
|---|
| 1365 | sum_C_pos += alfa;
|
|---|
| 1366 | sum_N_pos += alfa;
|
|---|
| 1367 | }
|
|---|
| 1368 | }
|
|---|
| 1369 | }
|
|---|
| 1370 | else
|
|---|
| 1371 | {
|
|---|
| 1372 | if (CF_marker_offd[j1] != -3 &&
|
|---|
| 1373 | (num_functions == 1 || dof_func_offd[j1] == dof_func[i1]))
|
|---|
| 1374 | {
|
|---|
| 1375 | if ( A_offd_data[j] < 0)
|
|---|
| 1376 | sum_N_neg += A_offd_data[j];
|
|---|
| 1377 | else
|
|---|
| 1378 | sum_N_pos += A_offd_data[j];
|
|---|
| 1379 | }
|
|---|
| 1380 | }
|
|---|
| 1381 | }
|
|---|
| 1382 | diagonal = A_diag_data[A_diag_i[i1]];
|
|---|
| 1383 | if (sum_C_neg*diagonal) alfa = -sum_N_neg/(sum_C_neg*diagonal);
|
|---|
| 1384 | if (sum_C_pos*diagonal) beta = -sum_N_pos/(sum_C_pos*diagonal);
|
|---|
| 1385 |
|
|---|
| 1386 | for (j=P_diag_i[i1]; j < P_diag_i[i1+1]; j++)
|
|---|
| 1387 | if (P_diag_data[j] < 0)
|
|---|
| 1388 | P_diag_data[j] *= alfa;
|
|---|
| 1389 | else
|
|---|
| 1390 | P_diag_data[j] *= beta;
|
|---|
| 1391 | for (j=P_offd_i[i1]; j < P_offd_i[i1+1]; j++)
|
|---|
| 1392 | if (P_offd_data[j] < 0)
|
|---|
| 1393 | P_offd_data[j] *= alfa;
|
|---|
| 1394 | else
|
|---|
| 1395 | P_offd_data[j] *= beta;
|
|---|
| 1396 | }
|
|---|
| 1397 | hypre_TFree(P_diag_pass[pass]);
|
|---|
| 1398 | if (num_procs > 1)
|
|---|
| 1399 | {
|
|---|
| 1400 | hypre_TFree(P_offd_pass[pass]);
|
|---|
| 1401 | hypre_TFree(Pext_pass[pass]);
|
|---|
| 1402 | }
|
|---|
| 1403 | }
|
|---|
| 1404 | }
|
|---|
| 1405 | else /* no distinction between positive and negative offdiagonal element */
|
|---|
| 1406 | {
|
|---|
| 1407 | /* determine P for points of pass 1, i.e. neighbors of coarse points */
|
|---|
| 1408 | pass_length = pass_pointer[2]-pass_pointer[1];
|
|---|
| 1409 | #define HYPRE_SMP_PRIVATE k,k1,i,i1,j,j1,ns,ne,rest,size,sum_C,sum_N,j_start,j_end,cnt,tmp_marker,tmp_marker_offd,cnt_offd,diagonal,alfa
|
|---|
| 1410 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| 1411 | for (k = 0; k < num_threads; k++)
|
|---|
| 1412 | {
|
|---|
| 1413 | size = pass_length/num_threads;
|
|---|
| 1414 | rest = pass_length - size*num_threads;
|
|---|
| 1415 | if (k < rest)
|
|---|
| 1416 | {
|
|---|
| 1417 | ns = k*size+k;
|
|---|
| 1418 | ne = (k+1)*size+k+1;
|
|---|
| 1419 | }
|
|---|
| 1420 | else
|
|---|
| 1421 | {
|
|---|
| 1422 | ns = k*size+rest;
|
|---|
| 1423 | ne = (k+1)*size+rest;
|
|---|
| 1424 | }
|
|---|
| 1425 | ns = pass_pointer[1]+ns;
|
|---|
| 1426 | ne = pass_pointer[1]+ne;
|
|---|
| 1427 |
|
|---|
| 1428 | tmp_marker = NULL;
|
|---|
| 1429 | if (n_fine) tmp_marker = hypre_CTAlloc(int,n_fine);
|
|---|
| 1430 | tmp_marker_offd = NULL;
|
|---|
| 1431 | if (num_cols_offd) tmp_marker_offd = hypre_CTAlloc(int,num_cols_offd);
|
|---|
| 1432 | for (i=0; i < n_fine; i++)
|
|---|
| 1433 | tmp_marker[i] = -1;
|
|---|
| 1434 | for (i=0; i < num_cols_offd; i++)
|
|---|
| 1435 | tmp_marker_offd[i] = -1;
|
|---|
| 1436 |
|
|---|
| 1437 | /*for (i=pass_pointer[1]; i < pass_pointer[2]; i++)
|
|---|
| 1438 | {*/
|
|---|
| 1439 | for (i=ns; i < ne; i++)
|
|---|
| 1440 | {
|
|---|
| 1441 | i1 = pass_array[i];
|
|---|
| 1442 | sum_C = 0;
|
|---|
| 1443 | sum_N = 0;
|
|---|
| 1444 | j_start = P_diag_start[i1];
|
|---|
| 1445 | j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1];
|
|---|
| 1446 | for (j=j_start; j < j_end; j++)
|
|---|
| 1447 | {
|
|---|
| 1448 | k1 = P_diag_pass[1][j];
|
|---|
| 1449 | tmp_marker[C_array[k1]] = i1;
|
|---|
| 1450 | }
|
|---|
| 1451 | cnt = P_diag_i[i1];
|
|---|
| 1452 | for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++)
|
|---|
| 1453 | {
|
|---|
| 1454 | j1 = A_diag_j[j];
|
|---|
| 1455 | if (CF_marker[j1] != -3 &&
|
|---|
| 1456 | (num_functions == 1 || dof_func[i1] == dof_func[j1]))
|
|---|
| 1457 | sum_N += A_diag_data[j];
|
|---|
| 1458 | if (j1 != -1 && tmp_marker[j1] == i1)
|
|---|
| 1459 | {
|
|---|
| 1460 | P_diag_data[cnt] = A_diag_data[j];
|
|---|
| 1461 | P_diag_j[cnt++] = fine_to_coarse[j1];
|
|---|
| 1462 | sum_C += A_diag_data[j];
|
|---|
| 1463 | }
|
|---|
| 1464 | }
|
|---|
| 1465 | j_start = P_offd_start[i1];
|
|---|
| 1466 | j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1];
|
|---|
| 1467 | for (j=j_start; j < j_end; j++)
|
|---|
| 1468 | {
|
|---|
| 1469 | k1 = P_offd_pass[1][j];
|
|---|
| 1470 | tmp_marker_offd[C_array_offd[k1]] = i1;
|
|---|
| 1471 | }
|
|---|
| 1472 | cnt_offd = P_offd_i[i1];
|
|---|
| 1473 | for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++)
|
|---|
| 1474 | {
|
|---|
| 1475 | if (col_offd_S_to_A)
|
|---|
| 1476 | j1 = map_A_to_S[A_offd_j[j]];
|
|---|
| 1477 | else
|
|---|
| 1478 | j1 = A_offd_j[j];
|
|---|
| 1479 | if (CF_marker_offd[j1] != -3 &&
|
|---|
| 1480 | (num_functions == 1 || dof_func[i1] == dof_func_offd[j1]))
|
|---|
| 1481 | sum_N += A_offd_data[j];
|
|---|
| 1482 | if (j1 != -1 && tmp_marker_offd[j1] == i1)
|
|---|
| 1483 | {
|
|---|
| 1484 | P_offd_data[cnt_offd] = A_offd_data[j];
|
|---|
| 1485 | P_offd_j[cnt_offd++] = map_S_to_new[j1];
|
|---|
| 1486 | sum_C += A_offd_data[j];
|
|---|
| 1487 | }
|
|---|
| 1488 | }
|
|---|
| 1489 | diagonal = A_diag_data[A_diag_i[i1]];
|
|---|
| 1490 | if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal);
|
|---|
| 1491 | for (j=P_diag_i[i1]; j < cnt; j++)
|
|---|
| 1492 | P_diag_data[j] *= alfa;
|
|---|
| 1493 | for (j=P_offd_i[i1]; j < cnt_offd; j++)
|
|---|
| 1494 | P_offd_data[j] *= alfa;
|
|---|
| 1495 | }
|
|---|
| 1496 | hypre_TFree(tmp_marker);
|
|---|
| 1497 | hypre_TFree(tmp_marker_offd);
|
|---|
| 1498 | }
|
|---|
| 1499 |
|
|---|
| 1500 | old_Pext_send_size = 0;
|
|---|
| 1501 | old_Pext_recv_size = 0;
|
|---|
| 1502 |
|
|---|
| 1503 | hypre_TFree(P_diag_pass[1]);
|
|---|
| 1504 | if (num_procs > 1) hypre_TFree(P_offd_pass[1]);
|
|---|
| 1505 |
|
|---|
| 1506 | /*if (new_num_cols_offd > n_coarse_offd)
|
|---|
| 1507 | {
|
|---|
| 1508 | hypre_TFree(C_array_offd);
|
|---|
| 1509 | C_array_offd = hypre_CTAlloc(int, new_num_cols_offd);
|
|---|
| 1510 | }*/
|
|---|
| 1511 |
|
|---|
| 1512 | for (pass = 2; pass < num_passes; pass++)
|
|---|
| 1513 | {
|
|---|
| 1514 |
|
|---|
| 1515 | if (num_procs > 1)
|
|---|
| 1516 | {
|
|---|
| 1517 | Pext_send_size = Pext_send_map_start[pass][num_sends];
|
|---|
| 1518 | if (Pext_send_size > old_Pext_send_size)
|
|---|
| 1519 | {
|
|---|
| 1520 | hypre_TFree(Pext_send_data);
|
|---|
| 1521 | Pext_send_data = hypre_CTAlloc(double, Pext_send_size);
|
|---|
| 1522 | }
|
|---|
| 1523 | old_Pext_send_size = Pext_send_size;
|
|---|
| 1524 |
|
|---|
| 1525 | cnt_offd = 0;
|
|---|
| 1526 | for (i=0; i < num_sends; i++)
|
|---|
| 1527 | {
|
|---|
| 1528 | for (j=send_map_start[i]; j < send_map_start[i+1]; j++)
|
|---|
| 1529 | {
|
|---|
| 1530 | j1 = send_map_elmt[j];
|
|---|
| 1531 | if (assigned[j1] == pass-1)
|
|---|
| 1532 | {
|
|---|
| 1533 | j_start = P_diag_i[j1];
|
|---|
| 1534 | j_end = P_diag_i[j1+1];
|
|---|
| 1535 | for (k=j_start; k < j_end; k++)
|
|---|
| 1536 | {
|
|---|
| 1537 | Pext_send_data[cnt_offd++] = P_diag_data[k];
|
|---|
| 1538 | }
|
|---|
| 1539 | j_start = P_offd_i[j1];
|
|---|
| 1540 | j_end = P_offd_i[j1+1];
|
|---|
| 1541 | for (k=j_start; k < j_end; k++)
|
|---|
| 1542 | {
|
|---|
| 1543 | Pext_send_data[cnt_offd++] = P_offd_data[k];
|
|---|
| 1544 | }
|
|---|
| 1545 | }
|
|---|
| 1546 | }
|
|---|
| 1547 | }
|
|---|
| 1548 |
|
|---|
| 1549 | hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends;
|
|---|
| 1550 | hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) =
|
|---|
| 1551 | Pext_send_map_start[pass];
|
|---|
| 1552 | hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs;
|
|---|
| 1553 | hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) =
|
|---|
| 1554 | Pext_recv_vec_start[pass];
|
|---|
| 1555 |
|
|---|
| 1556 | Pext_recv_size = Pext_recv_vec_start[pass][num_recvs];
|
|---|
| 1557 |
|
|---|
| 1558 | if (Pext_recv_size > old_Pext_recv_size)
|
|---|
| 1559 | {
|
|---|
| 1560 | hypre_TFree(Pext_data);
|
|---|
| 1561 | Pext_data = hypre_CTAlloc(double, Pext_recv_size);
|
|---|
| 1562 | }
|
|---|
| 1563 | old_Pext_recv_size = Pext_recv_size;
|
|---|
| 1564 |
|
|---|
| 1565 | comm_handle = hypre_ParCSRCommHandleCreate (1, tmp_comm_pkg,
|
|---|
| 1566 | Pext_send_data, Pext_data);
|
|---|
| 1567 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 1568 |
|
|---|
| 1569 | hypre_TFree(Pext_send_map_start[pass]);
|
|---|
| 1570 | hypre_TFree(Pext_recv_vec_start[pass]);
|
|---|
| 1571 | }
|
|---|
| 1572 |
|
|---|
| 1573 | pass_length = pass_pointer[pass+1]-pass_pointer[pass];
|
|---|
| 1574 | #define HYPRE_SMP_PRIVATE k,k1,k2,i,i1,j,j1,ns,ne,rest,size,sum_C,sum_N,j_start,j_end,cnt,tmp_marker,tmp_marker_offd,cnt_offd,diagonal,alfa,tmp_array,tmp_array_offd
|
|---|
| 1575 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| 1576 | for (k = 0; k < num_threads; k++)
|
|---|
| 1577 | {
|
|---|
| 1578 | size = pass_length/num_threads;
|
|---|
| 1579 | rest = pass_length - size*num_threads;
|
|---|
| 1580 | if (k < rest)
|
|---|
| 1581 | {
|
|---|
| 1582 | ns = k*size+k;
|
|---|
| 1583 | ne = (k+1)*size+k+1;
|
|---|
| 1584 | }
|
|---|
| 1585 | else
|
|---|
| 1586 | {
|
|---|
| 1587 | ns = k*size+rest;
|
|---|
| 1588 | ne = (k+1)*size+rest;
|
|---|
| 1589 | }
|
|---|
| 1590 | ns = pass_pointer[pass]+ns;
|
|---|
| 1591 | ne = pass_pointer[pass]+ne;
|
|---|
| 1592 |
|
|---|
| 1593 | tmp_marker = NULL;
|
|---|
| 1594 | if (n_fine) tmp_marker = hypre_CTAlloc(int,n_fine);
|
|---|
| 1595 | tmp_marker_offd = NULL;
|
|---|
| 1596 | if (num_cols_offd) tmp_marker_offd = hypre_CTAlloc(int,num_cols_offd);
|
|---|
| 1597 | tmp_array = NULL;
|
|---|
| 1598 | if (n_coarse) tmp_array = hypre_CTAlloc(int,n_coarse);
|
|---|
| 1599 | /*if (new_num_cols_offd > n_coarse_offd)
|
|---|
| 1600 | {
|
|---|
| 1601 | hypre_TFree(C_array_offd);
|
|---|
| 1602 | C_array_offd = hypre_CTAlloc(int, new_num_cols_offd);
|
|---|
| 1603 | }*/
|
|---|
| 1604 | tmp_array_offd = NULL;
|
|---|
| 1605 | if (new_num_cols_offd > n_coarse_offd)
|
|---|
| 1606 | tmp_array_offd = hypre_CTAlloc(int,new_num_cols_offd);
|
|---|
| 1607 | else
|
|---|
| 1608 | tmp_array_offd = hypre_CTAlloc(int,n_coarse_offd);
|
|---|
| 1609 |
|
|---|
| 1610 | for (i=0; i < n_fine; i++)
|
|---|
| 1611 | tmp_marker[i] = -1;
|
|---|
| 1612 | for (i=0; i < num_cols_offd; i++)
|
|---|
| 1613 | tmp_marker_offd[i] = -1;
|
|---|
| 1614 |
|
|---|
| 1615 | for (i=ns; i < ne; i++)
|
|---|
| 1616 | {
|
|---|
| 1617 | /*for (i=pass_pointer[pass]; i < pass_pointer[pass+1]; i++)
|
|---|
| 1618 | {*/
|
|---|
| 1619 | i1 = pass_array[i];
|
|---|
| 1620 | sum_C = 0;
|
|---|
| 1621 | sum_N = 0;
|
|---|
| 1622 | j_start = P_diag_start[i1];
|
|---|
| 1623 | j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1];
|
|---|
| 1624 | cnt = P_diag_i[i1];
|
|---|
| 1625 | for (j=j_start; j < j_end; j++)
|
|---|
| 1626 | {
|
|---|
| 1627 | k1 = P_diag_pass[pass][j];
|
|---|
| 1628 | tmp_array[k1] = cnt;
|
|---|
| 1629 | P_diag_data[cnt] = 0;
|
|---|
| 1630 | P_diag_j[cnt++] = k1;
|
|---|
| 1631 | }
|
|---|
| 1632 | j_start = P_offd_start[i1];
|
|---|
| 1633 | j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1];
|
|---|
| 1634 | cnt_offd = P_offd_i[i1];
|
|---|
| 1635 | for (j=j_start; j < j_end; j++)
|
|---|
| 1636 | {
|
|---|
| 1637 | k1 = P_offd_pass[pass][j];
|
|---|
| 1638 | tmp_array_offd[k1] = cnt_offd;
|
|---|
| 1639 | P_offd_data[cnt_offd] = 0;
|
|---|
| 1640 | P_offd_j[cnt_offd++] = k1;
|
|---|
| 1641 | }
|
|---|
| 1642 | for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++)
|
|---|
| 1643 | {
|
|---|
| 1644 | j1 = S_diag_j[j];
|
|---|
| 1645 | if (assigned[j1] == pass-1)
|
|---|
| 1646 | /*P_marker[j1] = i1;*/
|
|---|
| 1647 | tmp_marker[j1] = i1;
|
|---|
| 1648 | }
|
|---|
| 1649 | for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++)
|
|---|
| 1650 | {
|
|---|
| 1651 | j1 = S_offd_j[j];
|
|---|
| 1652 | if (assigned_offd[j1] == pass-1)
|
|---|
| 1653 | /*P_marker_offd[j1] = i1; */
|
|---|
| 1654 | tmp_marker_offd[j1] = i1;
|
|---|
| 1655 | }
|
|---|
| 1656 | for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++)
|
|---|
| 1657 | {
|
|---|
| 1658 | j1 = A_diag_j[j];
|
|---|
| 1659 | /*if (P_marker[j1] == i1)*/
|
|---|
| 1660 | if (tmp_marker[j1] == i1)
|
|---|
| 1661 | {
|
|---|
| 1662 | for (k2=P_diag_i[j1]; k2 < P_diag_i[j1+1]; k2++)
|
|---|
| 1663 | {
|
|---|
| 1664 | k1 = P_diag_j[k2];
|
|---|
| 1665 | alfa = A_diag_data[j]*P_diag_data[k2];
|
|---|
| 1666 | P_diag_data[tmp_array[k1]] += alfa;
|
|---|
| 1667 | sum_C += alfa;
|
|---|
| 1668 | sum_N += alfa;
|
|---|
| 1669 | }
|
|---|
| 1670 | for (k2=P_offd_i[j1]; k2 < P_offd_i[j1+1]; k2++)
|
|---|
| 1671 | {
|
|---|
| 1672 | k1 = P_offd_j[k2];
|
|---|
| 1673 | alfa = A_diag_data[j]*P_offd_data[k2];
|
|---|
| 1674 | P_offd_data[tmp_array_offd[k1]] += alfa;
|
|---|
| 1675 | sum_C += alfa;
|
|---|
| 1676 | sum_N += alfa;
|
|---|
| 1677 | }
|
|---|
| 1678 | }
|
|---|
| 1679 | else
|
|---|
| 1680 | {
|
|---|
| 1681 | if (CF_marker[j1] != -3 &&
|
|---|
| 1682 | (num_functions == 1 || dof_func[i1] == dof_func[j1]))
|
|---|
| 1683 | sum_N += A_diag_data[j];
|
|---|
| 1684 | }
|
|---|
| 1685 | }
|
|---|
| 1686 | for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++)
|
|---|
| 1687 | {
|
|---|
| 1688 | if (col_offd_S_to_A)
|
|---|
| 1689 | j1 = map_A_to_S[A_offd_j[j]];
|
|---|
| 1690 | else
|
|---|
| 1691 | j1 = A_offd_j[j];
|
|---|
| 1692 |
|
|---|
| 1693 | /*if (j1 > -1 && P_marker_offd[j1] == i1)*/
|
|---|
| 1694 | if (j1 > -1 && tmp_marker_offd[j1] == i1)
|
|---|
| 1695 | {
|
|---|
| 1696 | j_start = Pext_start[j1];
|
|---|
| 1697 | j_end = j_start+Pext_i[j1+1];
|
|---|
| 1698 | for (k2=j_start; k2 < j_end; k2++)
|
|---|
| 1699 | {
|
|---|
| 1700 | k1 = Pext_pass[pass][k2];
|
|---|
| 1701 | alfa = A_offd_data[j]*Pext_data[k2];
|
|---|
| 1702 | if (k1 < 0)
|
|---|
| 1703 | P_diag_data[tmp_array[-k1-1]] += alfa;
|
|---|
| 1704 | else
|
|---|
| 1705 | P_offd_data[tmp_array_offd[k1]] += alfa;
|
|---|
| 1706 | sum_C += alfa;
|
|---|
| 1707 | sum_N += alfa;
|
|---|
| 1708 | }
|
|---|
| 1709 | }
|
|---|
| 1710 | else
|
|---|
| 1711 | {
|
|---|
| 1712 | if (CF_marker_offd[j1] != -3 &&
|
|---|
| 1713 | (num_functions == 1 || dof_func_offd[j1] == dof_func[i1]))
|
|---|
| 1714 | sum_N += A_offd_data[j];
|
|---|
| 1715 | }
|
|---|
| 1716 | }
|
|---|
| 1717 | diagonal = A_diag_data[A_diag_i[i1]];
|
|---|
| 1718 | if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal);
|
|---|
| 1719 |
|
|---|
| 1720 | for (j=P_diag_i[i1]; j < P_diag_i[i1+1]; j++)
|
|---|
| 1721 | P_diag_data[j] *= alfa;
|
|---|
| 1722 | for (j=P_offd_i[i1]; j < P_offd_i[i1+1]; j++)
|
|---|
| 1723 | P_offd_data[j] *= alfa;
|
|---|
| 1724 | }
|
|---|
| 1725 | hypre_TFree(tmp_marker);
|
|---|
| 1726 | hypre_TFree(tmp_marker_offd);
|
|---|
| 1727 | hypre_TFree(tmp_array);
|
|---|
| 1728 | hypre_TFree(tmp_array_offd);
|
|---|
| 1729 | }
|
|---|
| 1730 |
|
|---|
| 1731 | hypre_TFree(P_diag_pass[pass]);
|
|---|
| 1732 | if (num_procs > 1)
|
|---|
| 1733 | {
|
|---|
| 1734 | hypre_TFree(P_offd_pass[pass]);
|
|---|
| 1735 | hypre_TFree(Pext_pass[pass]);
|
|---|
| 1736 | }
|
|---|
| 1737 | }
|
|---|
| 1738 | }
|
|---|
| 1739 |
|
|---|
| 1740 | hypre_TFree(CF_marker_offd);
|
|---|
| 1741 | hypre_TFree(Pext_send_map_start);
|
|---|
| 1742 | hypre_TFree(Pext_recv_vec_start);
|
|---|
| 1743 | if (n_coarse) hypre_TFree(C_array);
|
|---|
| 1744 | hypre_TFree(C_array_offd);
|
|---|
| 1745 | hypre_TFree(dof_func_offd);
|
|---|
| 1746 | hypre_TFree(Pext_send_data);
|
|---|
| 1747 | hypre_TFree(Pext_data);
|
|---|
| 1748 | hypre_TFree(P_diag_pass);
|
|---|
| 1749 | hypre_TFree(P_offd_pass);
|
|---|
| 1750 | hypre_TFree(Pext_pass);
|
|---|
| 1751 | hypre_TFree(P_diag_start);
|
|---|
| 1752 | hypre_TFree(P_offd_start);
|
|---|
| 1753 | hypre_TFree(Pext_start);
|
|---|
| 1754 | hypre_TFree(Pext_i);
|
|---|
| 1755 | hypre_TFree(fine_to_coarse);
|
|---|
| 1756 | hypre_TFree(assigned);
|
|---|
| 1757 | hypre_TFree(assigned_offd);
|
|---|
| 1758 | hypre_TFree(pass_pointer);
|
|---|
| 1759 | hypre_TFree(pass_array);
|
|---|
| 1760 | hypre_TFree(map_S_to_new);
|
|---|
| 1761 | hypre_TFree(map_A_to_S);
|
|---|
| 1762 | hypre_TFree(P_marker);
|
|---|
| 1763 | if (num_procs > 1) hypre_TFree(tmp_comm_pkg);
|
|---|
| 1764 |
|
|---|
| 1765 | P = hypre_ParCSRMatrixCreate(comm,
|
|---|
| 1766 | hypre_ParCSRMatrixGlobalNumRows(A),
|
|---|
| 1767 | total_global_cpts,
|
|---|
| 1768 | hypre_ParCSRMatrixColStarts(A),
|
|---|
| 1769 | num_cpts_global,
|
|---|
| 1770 | 0,
|
|---|
| 1771 | P_diag_i[n_fine],
|
|---|
| 1772 | P_offd_i[n_fine]);
|
|---|
| 1773 | P_diag = hypre_ParCSRMatrixDiag(P);
|
|---|
| 1774 | hypre_CSRMatrixData(P_diag) = P_diag_data;
|
|---|
| 1775 | hypre_CSRMatrixI(P_diag) = P_diag_i;
|
|---|
| 1776 | hypre_CSRMatrixJ(P_diag) = P_diag_j;
|
|---|
| 1777 | P_offd = hypre_ParCSRMatrixOffd(P);
|
|---|
| 1778 | hypre_CSRMatrixData(P_offd) = P_offd_data;
|
|---|
| 1779 | hypre_CSRMatrixI(P_offd) = P_offd_i;
|
|---|
| 1780 | hypre_CSRMatrixJ(P_offd) = P_offd_j;
|
|---|
| 1781 | hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
|
|---|
| 1782 |
|
|---|
| 1783 | /* Compress P, removing coefficients smaller than trunc_factor * Max
|
|---|
| 1784 | and/or keep yat most <P_max_elmts> per row absolutely maximal coefficients */
|
|---|
| 1785 |
|
|---|
| 1786 | if (trunc_factor != 0.0 || P_max_elmts != 0)
|
|---|
| 1787 | {
|
|---|
| 1788 | hypre_BoomerAMGInterpTruncation(P, trunc_factor, P_max_elmts);
|
|---|
| 1789 | P_diag_data = hypre_CSRMatrixData(P_diag);
|
|---|
| 1790 | P_diag_i = hypre_CSRMatrixI(P_diag);
|
|---|
| 1791 | P_diag_j = hypre_CSRMatrixJ(P_diag);
|
|---|
| 1792 | P_offd_data = hypre_CSRMatrixData(P_offd);
|
|---|
| 1793 | P_offd_i = hypre_CSRMatrixI(P_offd);
|
|---|
| 1794 | P_offd_j = hypre_CSRMatrixJ(P_offd);
|
|---|
| 1795 | }
|
|---|
| 1796 | P_offd_size = P_offd_i[n_fine];
|
|---|
| 1797 |
|
|---|
| 1798 | num_cols_offd_P = 0;
|
|---|
| 1799 | if (P_offd_size)
|
|---|
| 1800 | {
|
|---|
| 1801 | if (new_num_cols_offd > num_cols_offd)
|
|---|
| 1802 | {
|
|---|
| 1803 | hypre_TFree(P_marker_offd);
|
|---|
| 1804 | P_marker_offd = hypre_CTAlloc(int,new_num_cols_offd);
|
|---|
| 1805 | }
|
|---|
| 1806 |
|
|---|
| 1807 | #define HYPRE_SMP_PRIVATE i
|
|---|
| 1808 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| 1809 | for (i=0; i < new_num_cols_offd; i++)
|
|---|
| 1810 | P_marker_offd[i] = 0;
|
|---|
| 1811 |
|
|---|
| 1812 | num_cols_offd_P = 0;
|
|---|
| 1813 | for (i=0; i < P_offd_size; i++)
|
|---|
| 1814 | {
|
|---|
| 1815 | index = P_offd_j[i];
|
|---|
| 1816 | if (!P_marker_offd[index])
|
|---|
| 1817 | {
|
|---|
| 1818 | num_cols_offd_P++;
|
|---|
| 1819 | P_marker_offd[index] = 1;
|
|---|
| 1820 | }
|
|---|
| 1821 | }
|
|---|
| 1822 |
|
|---|
| 1823 | col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt,num_cols_offd_P);
|
|---|
| 1824 | permute = hypre_CTAlloc(int, new_counter[num_passes-1]);
|
|---|
| 1825 | big_permute = hypre_CTAlloc(HYPRE_BigInt, new_counter[num_passes-1]);
|
|---|
| 1826 |
|
|---|
| 1827 | #define HYPRE_SMP_PRIVATE i
|
|---|
| 1828 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| 1829 | for (i=0; i < new_counter[num_passes-1]; i++)
|
|---|
| 1830 | big_permute[i] = -1;
|
|---|
| 1831 |
|
|---|
| 1832 | cnt = 0;
|
|---|
| 1833 | for (i=0; i < num_passes-1; i++)
|
|---|
| 1834 | {
|
|---|
| 1835 | for (j=new_counter[i]; j < new_counter[i+1]; j++)
|
|---|
| 1836 | {
|
|---|
| 1837 | if (P_marker_offd[j])
|
|---|
| 1838 | {
|
|---|
| 1839 | col_map_offd_P[cnt] = new_elmts[i][j-new_counter[i]];
|
|---|
| 1840 | big_permute[j] = col_map_offd_P[cnt++];
|
|---|
| 1841 | }
|
|---|
| 1842 | }
|
|---|
| 1843 | }
|
|---|
| 1844 |
|
|---|
| 1845 | hypre_BigQsort0(col_map_offd_P,0,num_cols_offd_P-1);
|
|---|
| 1846 |
|
|---|
| 1847 | #define HYPRE_SMP_PRIVATE i, big_value
|
|---|
| 1848 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| 1849 | for (i=0; i < new_counter[num_passes-1]; i++)
|
|---|
| 1850 | {
|
|---|
| 1851 | big_value = big_permute[i];
|
|---|
| 1852 | if (big_value != -1)
|
|---|
| 1853 | permute[i] = hypre_BigBinarySearch(col_map_offd_P,
|
|---|
| 1854 | big_value,num_cols_offd_P);
|
|---|
| 1855 | }
|
|---|
| 1856 |
|
|---|
| 1857 | #define HYPRE_SMP_PRIVATE i
|
|---|
| 1858 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| 1859 | for (i=0; i < P_offd_size; i++)
|
|---|
| 1860 | P_offd_j[i] = permute[P_offd_j[i]];
|
|---|
| 1861 | }
|
|---|
| 1862 | if (num_procs > 1)
|
|---|
| 1863 | {
|
|---|
| 1864 | for (i=0; i < num_passes-1; i++)
|
|---|
| 1865 | hypre_TFree(new_elmts[i]);
|
|---|
| 1866 | }
|
|---|
| 1867 | hypre_TFree(P_marker_offd);
|
|---|
| 1868 | hypre_TFree(permute);
|
|---|
| 1869 | hypre_TFree(big_permute);
|
|---|
| 1870 | hypre_TFree(new_elmts);
|
|---|
| 1871 | hypre_TFree(new_counter);
|
|---|
| 1872 |
|
|---|
| 1873 | if (num_cols_offd_P)
|
|---|
| 1874 | {
|
|---|
| 1875 | hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
|
|---|
| 1876 | hypre_CSRMatrixNumCols(P_offd) = num_cols_offd_P;
|
|---|
| 1877 | }
|
|---|
| 1878 |
|
|---|
| 1879 | if (n_SF)
|
|---|
| 1880 | {
|
|---|
| 1881 | for (i=0; i < n_fine; i++)
|
|---|
| 1882 | if (CF_marker[i] == -3) CF_marker[i] = -1;
|
|---|
| 1883 | }
|
|---|
| 1884 |
|
|---|
| 1885 | if (num_procs > 1)
|
|---|
| 1886 | {
|
|---|
| 1887 | #ifdef HYPRE_NO_GLOBAL_PARTITION
|
|---|
| 1888 | hypre_NewCommPkgCreate(P);
|
|---|
| 1889 | #else
|
|---|
| 1890 | hypre_MatvecCommPkgCreate(P);
|
|---|
| 1891 | #endif
|
|---|
| 1892 | }
|
|---|
| 1893 |
|
|---|
| 1894 | *P_ptr = P;
|
|---|
| 1895 |
|
|---|
| 1896 |
|
|---|
| 1897 | /*-----------------------------------------------------------------------
|
|---|
| 1898 | * Build and return dof_func array for coarse grid.
|
|---|
| 1899 | *-----------------------------------------------------------------------*/
|
|---|
| 1900 |
|
|---|
| 1901 | /*-----------------------------------------------------------------------
|
|---|
| 1902 | * Free mapping vector and marker array.
|
|---|
| 1903 | *-----------------------------------------------------------------------*/
|
|---|
| 1904 |
|
|---|
| 1905 |
|
|---|
| 1906 | return(0);
|
|---|
| 1907 | }
|
|---|
| 1908 |
|
|---|