| [2aa6644] | 1 | /*BHEADER**********************************************************************
|
|---|
| 2 | * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
|
|---|
| 3 | * Produced at the Lawrence Livermore National Laboratory.
|
|---|
| 4 | * This file is part of HYPRE. See file COPYRIGHT for details.
|
|---|
| 5 | *
|
|---|
| 6 | * HYPRE is free software; you can redistribute it and/or modify it under the
|
|---|
| 7 | * terms of the GNU Lesser General Public License (as published by the Free
|
|---|
| 8 | * Software Foundation) version 2.1 dated February 1999.
|
|---|
| 9 | *
|
|---|
| 10 | * $Revision: 2.4 $
|
|---|
| 11 | ***********************************************************************EHEADER*/
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 | #include "headers.h"
|
|---|
| 16 |
|
|---|
| 17 | /*---------------------------------------------------------------------------
|
|---|
| 18 | * hypre_BoomerAMGBuildInterp
|
|---|
| 19 | *--------------------------------------------------------------------------*/
|
|---|
| 20 |
|
|---|
| 21 | int
|
|---|
| 22 | hypre_BoomerAMGBuildInterp( hypre_ParCSRMatrix *A,
|
|---|
| 23 | int *CF_marker,
|
|---|
| 24 | hypre_ParCSRMatrix *S,
|
|---|
| 25 | HYPRE_BigInt *num_cpts_global,
|
|---|
| 26 | int num_functions,
|
|---|
| 27 | int *dof_func,
|
|---|
| 28 | int debug_flag,
|
|---|
| 29 | double trunc_factor,
|
|---|
| 30 | int max_elmts,
|
|---|
| 31 | int *col_offd_S_to_A,
|
|---|
| 32 | hypre_ParCSRMatrix **P_ptr)
|
|---|
| 33 | {
|
|---|
| 34 |
|
|---|
| 35 | MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
|---|
| 36 | hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
|---|
| 37 | hypre_ParCSRCommHandle *comm_handle;
|
|---|
| 38 |
|
|---|
| 39 | hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
|---|
| 40 | double *A_diag_data = hypre_CSRMatrixData(A_diag);
|
|---|
| 41 | int *A_diag_i = hypre_CSRMatrixI(A_diag);
|
|---|
| 42 | int *A_diag_j = hypre_CSRMatrixJ(A_diag);
|
|---|
| 43 |
|
|---|
| 44 | hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
|
|---|
| 45 | double *A_offd_data = hypre_CSRMatrixData(A_offd);
|
|---|
| 46 | int *A_offd_i = hypre_CSRMatrixI(A_offd);
|
|---|
| 47 | int *A_offd_j = hypre_CSRMatrixJ(A_offd);
|
|---|
| 48 | int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd);
|
|---|
| 49 |
|
|---|
| 50 | hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S);
|
|---|
| 51 | int *S_diag_i = hypre_CSRMatrixI(S_diag);
|
|---|
| 52 | int *S_diag_j = hypre_CSRMatrixJ(S_diag);
|
|---|
| 53 |
|
|---|
| 54 | hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S);
|
|---|
| 55 | int *S_offd_i = hypre_CSRMatrixI(S_offd);
|
|---|
| 56 | int *S_offd_j = hypre_CSRMatrixJ(S_offd);
|
|---|
| 57 |
|
|---|
| 58 | hypre_ParCSRMatrix *P;
|
|---|
| 59 | HYPRE_BigInt *col_map_offd_P = NULL;
|
|---|
| 60 | int *tmp_map_offd = NULL;
|
|---|
| 61 |
|
|---|
| 62 | int *CF_marker_offd = NULL;
|
|---|
| 63 | int *dof_func_offd = NULL;
|
|---|
| 64 |
|
|---|
| 65 | hypre_CSRMatrix *A_ext;
|
|---|
| 66 |
|
|---|
| 67 | double *A_ext_data;
|
|---|
| 68 | int *A_ext_i;
|
|---|
| 69 | int *A_ext_j;
|
|---|
| 70 |
|
|---|
| 71 | hypre_CSRMatrix *P_diag;
|
|---|
| 72 | hypre_CSRMatrix *P_offd;
|
|---|
| 73 |
|
|---|
| 74 | double *P_diag_data = NULL;
|
|---|
| 75 | int *P_diag_i;
|
|---|
| 76 | int *P_diag_j = NULL;
|
|---|
| 77 | double *P_offd_data = NULL;
|
|---|
| 78 | int *P_offd_i;
|
|---|
| 79 | int *P_offd_j = NULL;
|
|---|
| 80 |
|
|---|
| 81 | int P_diag_size, P_offd_size;
|
|---|
| 82 |
|
|---|
| 83 | int *P_marker = NULL;
|
|---|
| 84 | int *P_marker_offd = NULL;
|
|---|
| 85 |
|
|---|
| 86 | int jj_counter,jj_counter_offd;
|
|---|
| 87 | int *jj_count, *jj_count_offd;
|
|---|
| 88 | int jj_begin_row,jj_begin_row_offd;
|
|---|
| 89 | int jj_end_row,jj_end_row_offd;
|
|---|
| 90 |
|
|---|
| 91 | int start_indexing = 0; /* start indexing for P_data at 0 */
|
|---|
| 92 |
|
|---|
| 93 | int n_fine = hypre_CSRMatrixNumRows(A_diag);
|
|---|
| 94 |
|
|---|
| 95 | int strong_f_marker;
|
|---|
| 96 |
|
|---|
| 97 | int *fine_to_coarse;
|
|---|
| 98 | /*int *fine_to_coarse_offd;*/
|
|---|
| 99 | int *coarse_counter;
|
|---|
| 100 | int coarse_shift;
|
|---|
| 101 | HYPRE_BigInt total_global_cpts;
|
|---|
| 102 | HYPRE_BigInt my_first_cpt;
|
|---|
| 103 | int num_cols_P_offd;
|
|---|
| 104 |
|
|---|
| 105 | int i,i1,i2;
|
|---|
| 106 | int j,jl,jj,jj1;
|
|---|
| 107 | int start;
|
|---|
| 108 | int sgn;
|
|---|
| 109 | int c_num;
|
|---|
| 110 |
|
|---|
| 111 | double diagonal;
|
|---|
| 112 | double sum;
|
|---|
| 113 | double distribute;
|
|---|
| 114 |
|
|---|
| 115 | double zero = 0.0;
|
|---|
| 116 | double one = 1.0;
|
|---|
| 117 |
|
|---|
| 118 | int my_id;
|
|---|
| 119 | int num_procs;
|
|---|
| [e0b7443] | 120 | int num_threadsID;
|
|---|
| [2aa6644] | 121 | int num_sends;
|
|---|
| 122 | int index;
|
|---|
| 123 | int ns, ne, size, rest;
|
|---|
| 124 | int *int_buf_data = NULL;
|
|---|
| 125 |
|
|---|
| 126 | double wall_time; /* for debugging instrumentation */
|
|---|
| 127 |
|
|---|
| 128 | MPI_Comm_size(comm, &num_procs);
|
|---|
| 129 | MPI_Comm_rank(comm,&my_id);
|
|---|
| [e0b7443] | 130 | num_threadsID = hypre_NumThreads();
|
|---|
| [2aa6644] | 131 |
|
|---|
| 132 |
|
|---|
| 133 | #ifdef HYPRE_NO_GLOBAL_PARTITION
|
|---|
| 134 | my_first_cpt = num_cpts_global[0];
|
|---|
| 135 | if (my_id == (num_procs -1)) total_global_cpts = num_cpts_global[1];
|
|---|
| 136 | MPI_Bcast(&total_global_cpts, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);
|
|---|
| 137 | #else
|
|---|
| 138 | my_first_cpt = num_cpts_global[my_id];
|
|---|
| 139 | total_global_cpts = num_cpts_global[num_procs];
|
|---|
| 140 | #endif
|
|---|
| 141 |
|
|---|
| 142 | /*-------------------------------------------------------------------
|
|---|
| 143 | * Get the CF_marker data for the off-processor columns
|
|---|
| 144 | *-------------------------------------------------------------------*/
|
|---|
| 145 |
|
|---|
| 146 | if (debug_flag==4) wall_time = time_getWallclockSeconds();
|
|---|
| 147 |
|
|---|
| 148 | if (num_cols_A_offd) CF_marker_offd = hypre_CTAlloc(int, num_cols_A_offd);
|
|---|
| 149 | if (num_functions > 1 && num_cols_A_offd)
|
|---|
| 150 | dof_func_offd = hypre_CTAlloc(int, num_cols_A_offd);
|
|---|
| 151 |
|
|---|
| 152 | if (!comm_pkg)
|
|---|
| 153 | {
|
|---|
| 154 | #ifdef HYPRE_NO_GLOBAL_PARTITION
|
|---|
| 155 | hypre_NewCommPkgCreate(A);
|
|---|
| 156 | #else
|
|---|
| 157 | hypre_MatvecCommPkgCreate(A);
|
|---|
| 158 | #endif
|
|---|
| 159 | comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
|---|
| 160 | }
|
|---|
| 161 |
|
|---|
| 162 | num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
|
|---|
| 163 | if (hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends))
|
|---|
| 164 | int_buf_data = hypre_CTAlloc(int,
|
|---|
| 165 | hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
|
|---|
| 166 |
|
|---|
| 167 | index = 0;
|
|---|
| 168 | for (i = 0; i < num_sends; i++)
|
|---|
| 169 | {
|
|---|
| 170 | start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
|
|---|
| 171 | for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
|
|---|
| 172 | int_buf_data[index++]
|
|---|
| 173 | = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
|
|---|
| 174 | }
|
|---|
| 175 |
|
|---|
| 176 | comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
|
|---|
| 177 | CF_marker_offd);
|
|---|
| 178 |
|
|---|
| 179 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 180 | if (num_functions > 1)
|
|---|
| 181 | {
|
|---|
| 182 | index = 0;
|
|---|
| 183 | for (i = 0; i < num_sends; i++)
|
|---|
| 184 | {
|
|---|
| 185 | start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
|
|---|
| 186 | for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
|
|---|
| 187 | int_buf_data[index++]
|
|---|
| 188 | = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
|
|---|
| 189 | }
|
|---|
| 190 |
|
|---|
| 191 | comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data,
|
|---|
| 192 | dof_func_offd);
|
|---|
| 193 |
|
|---|
| 194 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 195 | }
|
|---|
| 196 |
|
|---|
| 197 | if (debug_flag==4)
|
|---|
| 198 | {
|
|---|
| 199 | wall_time = time_getWallclockSeconds() - wall_time;
|
|---|
| 200 | printf("Proc = %d Interp: Comm 1 CF_marker = %f\n",
|
|---|
| 201 | my_id, wall_time);
|
|---|
| 202 | fflush(NULL);
|
|---|
| 203 | }
|
|---|
| 204 |
|
|---|
| 205 | /*----------------------------------------------------------------------
|
|---|
| 206 | * Get the ghost rows of A
|
|---|
| 207 | *---------------------------------------------------------------------*/
|
|---|
| 208 |
|
|---|
| 209 | if (debug_flag==4) wall_time = time_getWallclockSeconds();
|
|---|
| 210 |
|
|---|
| 211 | if (num_procs > 1)
|
|---|
| 212 | {
|
|---|
| 213 | A_ext = hypre_ParCSRMatrixExtractConvBExt(A,A,1);
|
|---|
| 214 | A_ext_i = hypre_CSRMatrixI(A_ext);
|
|---|
| 215 | A_ext_j = hypre_CSRMatrixJ(A_ext);
|
|---|
| 216 | A_ext_data = hypre_CSRMatrixData(A_ext);
|
|---|
| 217 | }
|
|---|
| 218 |
|
|---|
| 219 | /*index = 0;
|
|---|
| 220 | for (i=0; i < num_cols_A_offd; i++)
|
|---|
| 221 | {
|
|---|
| 222 | for (j=A_ext_i[i]; j < A_ext_i[i+1]; j++)
|
|---|
| 223 | {
|
|---|
| 224 | k = A_ext_j[j];
|
|---|
| 225 | if (k >= col_1 && k < col_n)
|
|---|
| 226 | {
|
|---|
| 227 | A_ext_j[index] = k - col_1;
|
|---|
| 228 | A_ext_data[index++] = A_ext_data[j];
|
|---|
| 229 | }
|
|---|
| 230 | else
|
|---|
| 231 | {
|
|---|
| 232 | kc = hypre_BinarySearch(col_map_offd,k,num_cols_A_offd);
|
|---|
| 233 | if (kc > -1)
|
|---|
| 234 | {
|
|---|
| 235 | A_ext_j[index] = -kc-1;
|
|---|
| 236 | A_ext_data[index++] = A_ext_data[j];
|
|---|
| 237 | }
|
|---|
| 238 | }
|
|---|
| 239 | }
|
|---|
| 240 | A_ext_i[i] = index;
|
|---|
| 241 | }
|
|---|
| 242 | for (i = num_cols_A_offd; i > 0; i--)
|
|---|
| 243 | A_ext_i[i] = A_ext_i[i-1];
|
|---|
| 244 | if (num_procs > 1) A_ext_i[0] = 0;*/
|
|---|
| 245 |
|
|---|
| 246 | if (debug_flag==4)
|
|---|
| 247 | {
|
|---|
| 248 | wall_time = time_getWallclockSeconds() - wall_time;
|
|---|
| 249 | printf("Proc = %d Interp: Comm 2 Get A_ext = %f\n",
|
|---|
| 250 | my_id, wall_time);
|
|---|
| 251 | fflush(NULL);
|
|---|
| 252 | }
|
|---|
| 253 |
|
|---|
| 254 |
|
|---|
| 255 | /*-----------------------------------------------------------------------
|
|---|
| 256 | * First Pass: Determine size of P and fill in fine_to_coarse mapping.
|
|---|
| 257 | *-----------------------------------------------------------------------*/
|
|---|
| 258 |
|
|---|
| 259 | /*-----------------------------------------------------------------------
|
|---|
| 260 | * Intialize counters and allocate mapping vector.
|
|---|
| 261 | *-----------------------------------------------------------------------*/
|
|---|
| 262 |
|
|---|
| [e0b7443] | 263 | coarse_counter = hypre_CTAlloc(int, num_threadsID);
|
|---|
| 264 | jj_count = hypre_CTAlloc(int, num_threadsID);
|
|---|
| 265 | jj_count_offd = hypre_CTAlloc(int, num_threadsID);
|
|---|
| [2aa6644] | 266 |
|
|---|
| 267 | fine_to_coarse = hypre_CTAlloc(int, n_fine);
|
|---|
| 268 | /*#define HYPRE_SMP_PRIVATE i
|
|---|
| 269 | #include "../utilities/hypre_smp_forloop.h"*/
|
|---|
| 270 | for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1;
|
|---|
| 271 |
|
|---|
| 272 | jj_counter = start_indexing;
|
|---|
| 273 | jj_counter_offd = start_indexing;
|
|---|
| 274 |
|
|---|
| 275 | /*-----------------------------------------------------------------------
|
|---|
| 276 | * Loop over fine grid.
|
|---|
| 277 | *-----------------------------------------------------------------------*/
|
|---|
| 278 |
|
|---|
| 279 | /* RDF: this looks a little tricky, but doable */
|
|---|
| 280 | #define HYPRE_SMP_PRIVATE i,j,i1,jj,ns,ne,size,rest
|
|---|
| 281 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| [e0b7443] | 282 | for (j = 0; j < num_threadsID; j++)
|
|---|
| [2aa6644] | 283 | {
|
|---|
| [e0b7443] | 284 | size = n_fine/num_threadsID;
|
|---|
| 285 | rest = n_fine - size*num_threadsID;
|
|---|
| [2aa6644] | 286 | if (j < rest)
|
|---|
| 287 | {
|
|---|
| 288 | ns = j*size+j;
|
|---|
| 289 | ne = (j+1)*size+j+1;
|
|---|
| 290 | }
|
|---|
| 291 | else
|
|---|
| 292 | {
|
|---|
| 293 | ns = j*size+rest;
|
|---|
| 294 | ne = (j+1)*size+rest;
|
|---|
| 295 | }
|
|---|
| 296 | for (i = ns; i < ne; i++)
|
|---|
| 297 | {
|
|---|
| 298 |
|
|---|
| 299 | /*--------------------------------------------------------------------
|
|---|
| 300 | * If i is a C-point, interpolation is the identity. Also set up
|
|---|
| 301 | * mapping vector.
|
|---|
| 302 | *--------------------------------------------------------------------*/
|
|---|
| 303 |
|
|---|
| 304 | if (CF_marker[i] >= 0)
|
|---|
| 305 | {
|
|---|
| 306 | jj_count[j]++;
|
|---|
| 307 | fine_to_coarse[i] = coarse_counter[j];
|
|---|
| 308 | coarse_counter[j]++;
|
|---|
| 309 | }
|
|---|
| 310 |
|
|---|
| 311 | /*--------------------------------------------------------------------
|
|---|
| 312 | * If i is an F-point, interpolation is from the C-points that
|
|---|
| 313 | * strongly influence i.
|
|---|
| 314 | *--------------------------------------------------------------------*/
|
|---|
| 315 |
|
|---|
| 316 | else
|
|---|
| 317 | {
|
|---|
| 318 | for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
|
|---|
| 319 | {
|
|---|
| 320 | i1 = S_diag_j[jj];
|
|---|
| 321 | if (CF_marker[i1] >= 0)
|
|---|
| 322 | {
|
|---|
| 323 | jj_count[j]++;
|
|---|
| 324 | }
|
|---|
| 325 | }
|
|---|
| 326 |
|
|---|
| 327 | if (num_procs > 1)
|
|---|
| 328 | {
|
|---|
| 329 | if (col_offd_S_to_A)
|
|---|
| 330 | {
|
|---|
| 331 | for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
|
|---|
| 332 | {
|
|---|
| 333 | i1 = col_offd_S_to_A[S_offd_j[jj]];
|
|---|
| 334 | if (CF_marker_offd[i1] >= 0)
|
|---|
| 335 | {
|
|---|
| 336 | jj_count_offd[j]++;
|
|---|
| 337 | }
|
|---|
| 338 | }
|
|---|
| 339 | }
|
|---|
| 340 | else
|
|---|
| 341 | {
|
|---|
| 342 | for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
|
|---|
| 343 | {
|
|---|
| 344 | i1 = S_offd_j[jj];
|
|---|
| 345 | if (CF_marker_offd[i1] >= 0)
|
|---|
| 346 | {
|
|---|
| 347 | jj_count_offd[j]++;
|
|---|
| 348 | }
|
|---|
| 349 | }
|
|---|
| 350 | }
|
|---|
| 351 | }
|
|---|
| 352 | }
|
|---|
| 353 | }
|
|---|
| 354 | }
|
|---|
| 355 |
|
|---|
| 356 | /*-----------------------------------------------------------------------
|
|---|
| 357 | * Allocate arrays.
|
|---|
| 358 | *-----------------------------------------------------------------------*/
|
|---|
| 359 |
|
|---|
| [e0b7443] | 360 | for (i=0; i < num_threadsID-1; i++)
|
|---|
| [2aa6644] | 361 | {
|
|---|
| 362 | coarse_counter[i+1] += coarse_counter[i];
|
|---|
| 363 | jj_count[i+1] += jj_count[i];
|
|---|
| 364 | jj_count_offd[i+1] += jj_count_offd[i];
|
|---|
| 365 | }
|
|---|
| [e0b7443] | 366 | i = num_threadsID-1;
|
|---|
| [2aa6644] | 367 | jj_counter = jj_count[i];
|
|---|
| 368 | jj_counter_offd = jj_count_offd[i];
|
|---|
| 369 |
|
|---|
| 370 | P_diag_size = jj_counter;
|
|---|
| 371 |
|
|---|
| 372 | P_diag_i = hypre_CTAlloc(int, n_fine+1);
|
|---|
| 373 | if (P_diag_size)
|
|---|
| 374 | {
|
|---|
| 375 | P_diag_j = hypre_CTAlloc(int, P_diag_size);
|
|---|
| 376 | P_diag_data = hypre_CTAlloc(double, P_diag_size);
|
|---|
| 377 | }
|
|---|
| 378 |
|
|---|
| 379 | P_diag_i[n_fine] = jj_counter;
|
|---|
| 380 |
|
|---|
| 381 |
|
|---|
| 382 | P_offd_size = jj_counter_offd;
|
|---|
| 383 |
|
|---|
| 384 | P_offd_i = hypre_CTAlloc(int, n_fine+1);
|
|---|
| 385 | if (P_offd_size)
|
|---|
| 386 | {
|
|---|
| 387 | P_offd_j = hypre_CTAlloc(int, P_offd_size);
|
|---|
| 388 | P_offd_data = hypre_CTAlloc(double, P_offd_size);
|
|---|
| 389 | }
|
|---|
| 390 | /*-----------------------------------------------------------------------
|
|---|
| 391 | * Intialize some stuff.
|
|---|
| 392 | *-----------------------------------------------------------------------*/
|
|---|
| 393 |
|
|---|
| 394 | jj_counter = start_indexing;
|
|---|
| 395 | jj_counter_offd = start_indexing;
|
|---|
| 396 |
|
|---|
| 397 | if (debug_flag==4)
|
|---|
| 398 | {
|
|---|
| 399 | wall_time = time_getWallclockSeconds() - wall_time;
|
|---|
| 400 | printf("Proc = %d Interp: Internal work 1 = %f\n",
|
|---|
| 401 | my_id, wall_time);
|
|---|
| 402 | fflush(NULL);
|
|---|
| 403 | }
|
|---|
| 404 |
|
|---|
| 405 | /*-----------------------------------------------------------------------
|
|---|
| 406 | * Send and receive fine_to_coarse info.
|
|---|
| 407 | *-----------------------------------------------------------------------*/
|
|---|
| 408 |
|
|---|
| 409 | if (debug_flag==4) wall_time = time_getWallclockSeconds();
|
|---|
| 410 |
|
|---|
| 411 | /*fine_to_coarse_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_A_offd); */
|
|---|
| 412 |
|
|---|
| 413 | #define HYPRE_SMP_PRIVATE i,j,ns,ne,size,rest,coarse_shift
|
|---|
| 414 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| [e0b7443] | 415 | for (j = 0; j < num_threadsID; j++)
|
|---|
| [2aa6644] | 416 | {
|
|---|
| 417 | coarse_shift = 0;
|
|---|
| 418 | if (j > 0) coarse_shift = coarse_counter[j-1];
|
|---|
| [e0b7443] | 419 | size = n_fine/num_threadsID;
|
|---|
| 420 | rest = n_fine - size*num_threadsID;
|
|---|
| [2aa6644] | 421 | if (j < rest)
|
|---|
| 422 | {
|
|---|
| 423 | ns = j*size+j;
|
|---|
| 424 | ne = (j+1)*size+j+1;
|
|---|
| 425 | }
|
|---|
| 426 | else
|
|---|
| 427 | {
|
|---|
| 428 | ns = j*size+rest;
|
|---|
| 429 | ne = (j+1)*size+rest;
|
|---|
| 430 | }
|
|---|
| 431 | for (i = ns; i < ne; i++)
|
|---|
| 432 | fine_to_coarse[i] += coarse_shift;
|
|---|
| 433 | }
|
|---|
| 434 |
|
|---|
| 435 | /* index = 0;
|
|---|
| 436 | for (i = 0; i < num_sends; i++)
|
|---|
| 437 | {
|
|---|
| 438 | start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
|
|---|
| 439 | for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
|
|---|
| 440 | big_buf_data[index++] = my_first_cpt+
|
|---|
| 441 | (HYPRE_BigInt)fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
|
|---|
| 442 | }
|
|---|
| 443 |
|
|---|
| 444 | comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, big_buf_data,
|
|---|
| 445 | fine_to_coarse_offd);
|
|---|
| 446 |
|
|---|
| 447 | hypre_ParCSRCommHandleDestroy(comm_handle);
|
|---|
| 448 |
|
|---|
| 449 | if (debug_flag==4)
|
|---|
| 450 | {
|
|---|
| 451 | wall_time = time_getWallclockSeconds() - wall_time;
|
|---|
| 452 | printf("Proc = %d Interp: Comm 4 FineToCoarse = %f\n",
|
|---|
| 453 | my_id, wall_time);
|
|---|
| 454 | fflush(NULL);
|
|---|
| 455 | }
|
|---|
| 456 | */
|
|---|
| 457 | if (debug_flag==4) wall_time = time_getWallclockSeconds();
|
|---|
| 458 |
|
|---|
| 459 | /*-----------------------------------------------------------------------
|
|---|
| 460 | * Loop over fine grid points.
|
|---|
| 461 | *-----------------------------------------------------------------------*/
|
|---|
| 462 |
|
|---|
| 463 | #define HYPRE_SMP_PRIVATE i,j,jl,i1,i2,jj,jj1,ns,ne,size,rest,sum,diagonal,distribute,P_marker,P_marker_offd,strong_f_marker,jj_counter,jj_counter_offd,sgn,c_num,jj_begin_row,jj_end_row,jj_begin_row_offd,jj_end_row_offd
|
|---|
| 464 | #include "../utilities/hypre_smp_forloop.h"
|
|---|
| [e0b7443] | 465 | for (jl = 0; jl < num_threadsID; jl++)
|
|---|
| [2aa6644] | 466 | {
|
|---|
| [e0b7443] | 467 | size = n_fine/num_threadsID;
|
|---|
| 468 | rest = n_fine - size*num_threadsID;
|
|---|
| [2aa6644] | 469 | if (jl < rest)
|
|---|
| 470 | {
|
|---|
| 471 | ns = jl*size+jl;
|
|---|
| 472 | ne = (jl+1)*size+jl+1;
|
|---|
| 473 | }
|
|---|
| 474 | else
|
|---|
| 475 | {
|
|---|
| 476 | ns = jl*size+rest;
|
|---|
| 477 | ne = (jl+1)*size+rest;
|
|---|
| 478 | }
|
|---|
| 479 | jj_counter = 0;
|
|---|
| 480 | if (jl > 0) jj_counter = jj_count[jl-1];
|
|---|
| 481 | jj_counter_offd = 0;
|
|---|
| 482 | if (jl > 0) jj_counter_offd = jj_count_offd[jl-1];
|
|---|
| 483 |
|
|---|
| 484 | if (n_fine) P_marker = hypre_CTAlloc(int, n_fine);
|
|---|
| 485 | else P_marker = NULL;
|
|---|
| 486 | if (num_cols_A_offd) P_marker_offd = hypre_CTAlloc(int, num_cols_A_offd);
|
|---|
| 487 | else P_marker_offd = NULL;
|
|---|
| 488 |
|
|---|
| 489 | for (i = 0; i < n_fine; i++)
|
|---|
| 490 | {
|
|---|
| 491 | P_marker[i] = -1;
|
|---|
| 492 | }
|
|---|
| 493 | for (i = 0; i < num_cols_A_offd; i++)
|
|---|
| 494 | {
|
|---|
| 495 | P_marker_offd[i] = -1;
|
|---|
| 496 | }
|
|---|
| 497 | strong_f_marker = -2;
|
|---|
| 498 |
|
|---|
| 499 | for (i = ns; i < ne; i++)
|
|---|
| 500 | {
|
|---|
| 501 |
|
|---|
| 502 | /*--------------------------------------------------------------------
|
|---|
| 503 | * If i is a c-point, interpolation is the identity.
|
|---|
| 504 | *--------------------------------------------------------------------*/
|
|---|
| 505 |
|
|---|
| 506 | if (CF_marker[i] >= 0)
|
|---|
| 507 | {
|
|---|
| 508 | P_diag_i[i] = jj_counter;
|
|---|
| 509 | P_diag_j[jj_counter] = fine_to_coarse[i];
|
|---|
| 510 | P_diag_data[jj_counter] = one;
|
|---|
| 511 | jj_counter++;
|
|---|
| 512 | }
|
|---|
| 513 |
|
|---|
| 514 | /*--------------------------------------------------------------------
|
|---|
| 515 | * If i is an F-point, build interpolation.
|
|---|
| 516 | *--------------------------------------------------------------------*/
|
|---|
| 517 |
|
|---|
| 518 | else
|
|---|
| 519 | {
|
|---|
| 520 | /* Diagonal part of P */
|
|---|
| 521 | P_diag_i[i] = jj_counter;
|
|---|
| 522 | jj_begin_row = jj_counter;
|
|---|
| 523 |
|
|---|
| 524 | for (jj = S_diag_i[i]; jj < S_diag_i[i+1]; jj++)
|
|---|
| 525 | {
|
|---|
| 526 | i1 = S_diag_j[jj];
|
|---|
| 527 |
|
|---|
| 528 | /*--------------------------------------------------------------
|
|---|
| 529 | * If neighbor i1 is a C-point, set column number in P_diag_j
|
|---|
| 530 | * and initialize interpolation weight to zero.
|
|---|
| 531 | *--------------------------------------------------------------*/
|
|---|
| 532 |
|
|---|
| 533 | if (CF_marker[i1] >= 0)
|
|---|
| 534 | {
|
|---|
| 535 | P_marker[i1] = jj_counter;
|
|---|
| 536 | P_diag_j[jj_counter] = fine_to_coarse[i1];
|
|---|
| 537 | P_diag_data[jj_counter] = zero;
|
|---|
| 538 | jj_counter++;
|
|---|
| 539 | }
|
|---|
| 540 |
|
|---|
| 541 | /*--------------------------------------------------------------
|
|---|
| 542 | * If neighbor i1 is an F-point, mark it as a strong F-point
|
|---|
| 543 | * whose connection needs to be distributed.
|
|---|
| 544 | *--------------------------------------------------------------*/
|
|---|
| 545 |
|
|---|
| 546 | else if (CF_marker[i1] != -3)
|
|---|
| 547 | {
|
|---|
| 548 | P_marker[i1] = strong_f_marker;
|
|---|
| 549 | }
|
|---|
| 550 | }
|
|---|
| 551 | jj_end_row = jj_counter;
|
|---|
| 552 |
|
|---|
| 553 | /* Off-Diagonal part of P */
|
|---|
| 554 | P_offd_i[i] = jj_counter_offd;
|
|---|
| 555 | jj_begin_row_offd = jj_counter_offd;
|
|---|
| 556 |
|
|---|
| 557 |
|
|---|
| 558 | if (num_procs > 1)
|
|---|
| 559 | {
|
|---|
| 560 | if (col_offd_S_to_A)
|
|---|
| 561 | {
|
|---|
| 562 | for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
|
|---|
| 563 | {
|
|---|
| 564 | i1 = col_offd_S_to_A[S_offd_j[jj]];
|
|---|
| 565 |
|
|---|
| 566 | /*-----------------------------------------------------------
|
|---|
| 567 | * If neighbor i1 is a C-point, set column number in P_offd_j
|
|---|
| 568 | * and initialize interpolation weight to zero.
|
|---|
| 569 | *-----------------------------------------------------------*/
|
|---|
| 570 |
|
|---|
| 571 | if (CF_marker_offd[i1] >= 0)
|
|---|
| 572 | {
|
|---|
| 573 | P_marker_offd[i1] = jj_counter_offd;
|
|---|
| 574 | /*P_offd_j[jj_counter_offd] = fine_to_coarse_offd[i1];*/
|
|---|
| 575 | P_offd_j[jj_counter_offd] = i1;
|
|---|
| 576 | P_offd_data[jj_counter_offd] = zero;
|
|---|
| 577 | jj_counter_offd++;
|
|---|
| 578 | }
|
|---|
| 579 |
|
|---|
| 580 | /*-----------------------------------------------------------
|
|---|
| 581 | * If neighbor i1 is an F-point, mark it as a strong F-point
|
|---|
| 582 | * whose connection needs to be distributed.
|
|---|
| 583 | *-----------------------------------------------------------*/
|
|---|
| 584 |
|
|---|
| 585 | else if (CF_marker_offd[i1] != -3)
|
|---|
| 586 | {
|
|---|
| 587 | P_marker_offd[i1] = strong_f_marker;
|
|---|
| 588 | }
|
|---|
| 589 | }
|
|---|
| 590 | }
|
|---|
| 591 | else
|
|---|
| 592 | {
|
|---|
| 593 | for (jj = S_offd_i[i]; jj < S_offd_i[i+1]; jj++)
|
|---|
| 594 | {
|
|---|
| 595 | i1 = S_offd_j[jj];
|
|---|
| 596 |
|
|---|
| 597 | /*-----------------------------------------------------------
|
|---|
| 598 | * If neighbor i1 is a C-point, set column number in P_offd_j
|
|---|
| 599 | * and initialize interpolation weight to zero.
|
|---|
| 600 | *-----------------------------------------------------------*/
|
|---|
| 601 |
|
|---|
| 602 | if (CF_marker_offd[i1] >= 0)
|
|---|
| 603 | {
|
|---|
| 604 | P_marker_offd[i1] = jj_counter_offd;
|
|---|
| 605 | /*P_offd_j[jj_counter_offd] = fine_to_coarse_offd[i1];*/
|
|---|
| 606 | P_offd_j[jj_counter_offd] = i1;
|
|---|
| 607 | P_offd_data[jj_counter_offd] = zero;
|
|---|
| 608 | jj_counter_offd++;
|
|---|
| 609 | }
|
|---|
| 610 |
|
|---|
| 611 | /*-----------------------------------------------------------
|
|---|
| 612 | * If neighbor i1 is an F-point, mark it as a strong F-point
|
|---|
| 613 | * whose connection needs to be distributed.
|
|---|
| 614 | *-----------------------------------------------------------*/
|
|---|
| 615 |
|
|---|
| 616 | else if (CF_marker_offd[i1] != -3)
|
|---|
| 617 | {
|
|---|
| 618 | P_marker_offd[i1] = strong_f_marker;
|
|---|
| 619 | }
|
|---|
| 620 | }
|
|---|
| 621 | }
|
|---|
| 622 | }
|
|---|
| 623 |
|
|---|
| 624 | jj_end_row_offd = jj_counter_offd;
|
|---|
| 625 |
|
|---|
| 626 | diagonal = A_diag_data[A_diag_i[i]];
|
|---|
| 627 |
|
|---|
| 628 |
|
|---|
| 629 | /* Loop over ith row of A. First, the diagonal part of A */
|
|---|
| 630 |
|
|---|
| 631 | for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++)
|
|---|
| 632 | {
|
|---|
| 633 | i1 = A_diag_j[jj];
|
|---|
| 634 |
|
|---|
| 635 | /*--------------------------------------------------------------
|
|---|
| 636 | * Case 1: neighbor i1 is a C-point and strongly influences i,
|
|---|
| 637 | * accumulate a_{i,i1} into the interpolation weight.
|
|---|
| 638 | *--------------------------------------------------------------*/
|
|---|
| 639 |
|
|---|
| 640 | if (P_marker[i1] >= jj_begin_row)
|
|---|
| 641 | {
|
|---|
| 642 | P_diag_data[P_marker[i1]] += A_diag_data[jj];
|
|---|
| 643 | }
|
|---|
| 644 |
|
|---|
| 645 | /*--------------------------------------------------------------
|
|---|
| 646 | * Case 2: neighbor i1 is an F-point and strongly influences i,
|
|---|
| 647 | * distribute a_{i,i1} to C-points that strongly infuence i.
|
|---|
| 648 | * Note: currently no distribution to the diagonal in this case.
|
|---|
| 649 | *--------------------------------------------------------------*/
|
|---|
| 650 |
|
|---|
| 651 | else if (P_marker[i1] == strong_f_marker)
|
|---|
| 652 | {
|
|---|
| 653 | sum = zero;
|
|---|
| 654 |
|
|---|
| 655 | /*-----------------------------------------------------------
|
|---|
| 656 | * Loop over row of A for point i1 and calculate the sum
|
|---|
| 657 | * of the connections to c-points that strongly influence i.
|
|---|
| 658 | *-----------------------------------------------------------*/
|
|---|
| 659 | sgn = 1;
|
|---|
| 660 | if (A_diag_data[A_diag_i[i1]] < 0) sgn = -1;
|
|---|
| 661 | /* Diagonal block part of row i1 */
|
|---|
| 662 | for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
|
|---|
| 663 | {
|
|---|
| 664 | i2 = A_diag_j[jj1];
|
|---|
| 665 | if (P_marker[i2] >= jj_begin_row &&
|
|---|
| 666 | (sgn*A_diag_data[jj1]) < 0)
|
|---|
| 667 | {
|
|---|
| 668 | sum += A_diag_data[jj1];
|
|---|
| 669 | }
|
|---|
| 670 | }
|
|---|
| 671 |
|
|---|
| 672 | /* Off-Diagonal block part of row i1 */
|
|---|
| 673 | if (num_procs > 1)
|
|---|
| 674 | {
|
|---|
| 675 | for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
|
|---|
| 676 | {
|
|---|
| 677 | i2 = A_offd_j[jj1];
|
|---|
| 678 | if (P_marker_offd[i2] >= jj_begin_row_offd
|
|---|
| 679 | && (sgn*A_offd_data[jj1]) < 0)
|
|---|
| 680 | {
|
|---|
| 681 | sum += A_offd_data[jj1];
|
|---|
| 682 | }
|
|---|
| 683 | }
|
|---|
| 684 | }
|
|---|
| 685 |
|
|---|
| 686 | if (sum != 0)
|
|---|
| 687 | {
|
|---|
| 688 | distribute = A_diag_data[jj] / sum;
|
|---|
| 689 |
|
|---|
| 690 | /*-----------------------------------------------------------
|
|---|
| 691 | * Loop over row of A for point i1 and do the distribution.
|
|---|
| 692 | *-----------------------------------------------------------*/
|
|---|
| 693 |
|
|---|
| 694 | /* Diagonal block part of row i1 */
|
|---|
| 695 | for (jj1 = A_diag_i[i1]; jj1 < A_diag_i[i1+1]; jj1++)
|
|---|
| 696 | {
|
|---|
| 697 | i2 = A_diag_j[jj1];
|
|---|
| 698 | if (P_marker[i2] >= jj_begin_row
|
|---|
| 699 | && (sgn*A_diag_data[jj1]) < 0)
|
|---|
| 700 | {
|
|---|
| 701 | P_diag_data[P_marker[i2]]
|
|---|
| 702 | += distribute * A_diag_data[jj1];
|
|---|
| 703 | }
|
|---|
| 704 | }
|
|---|
| 705 |
|
|---|
| 706 | /* Off-Diagonal block part of row i1 */
|
|---|
| 707 | if (num_procs > 1)
|
|---|
| 708 | {
|
|---|
| 709 | for (jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++)
|
|---|
| 710 | {
|
|---|
| 711 | i2 = A_offd_j[jj1];
|
|---|
| 712 | if (P_marker_offd[i2] >= jj_begin_row_offd
|
|---|
| 713 | && (sgn*A_offd_data[jj1]) < 0)
|
|---|
| 714 | {
|
|---|
| 715 | P_offd_data[P_marker_offd[i2]]
|
|---|
| 716 | += distribute * A_offd_data[jj1];
|
|---|
| 717 | }
|
|---|
| 718 | }
|
|---|
| 719 | }
|
|---|
| 720 | }
|
|---|
| 721 | else
|
|---|
| 722 | {
|
|---|
| 723 | if (num_functions == 1 || dof_func[i] == dof_func[i1])
|
|---|
| 724 | diagonal += A_diag_data[jj];
|
|---|
| 725 | }
|
|---|
| 726 | }
|
|---|
| 727 |
|
|---|
| 728 | /*--------------------------------------------------------------
|
|---|
| 729 | * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
|
|---|
| 730 | * into the diagonal.
|
|---|
| 731 | *--------------------------------------------------------------*/
|
|---|
| 732 |
|
|---|
| 733 | else if (CF_marker[i1] != -3)
|
|---|
| 734 | {
|
|---|
| 735 | if (num_functions == 1 || dof_func[i] == dof_func[i1])
|
|---|
| 736 | diagonal += A_diag_data[jj];
|
|---|
| 737 | }
|
|---|
| 738 |
|
|---|
| 739 | }
|
|---|
| 740 |
|
|---|
| 741 |
|
|---|
| 742 | /*----------------------------------------------------------------
|
|---|
| 743 | * Still looping over ith row of A. Next, loop over the
|
|---|
| 744 | * off-diagonal part of A
|
|---|
| 745 | *---------------------------------------------------------------*/
|
|---|
| 746 |
|
|---|
| 747 | if (num_procs > 1)
|
|---|
| 748 | {
|
|---|
| 749 | for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
|
|---|
| 750 | {
|
|---|
| 751 | i1 = A_offd_j[jj];
|
|---|
| 752 |
|
|---|
| 753 | /*--------------------------------------------------------------
|
|---|
| 754 | * Case 1: neighbor i1 is a C-point and strongly influences i,
|
|---|
| 755 | * accumulate a_{i,i1} into the interpolation weight.
|
|---|
| 756 | *--------------------------------------------------------------*/
|
|---|
| 757 |
|
|---|
| 758 | if (P_marker_offd[i1] >= jj_begin_row_offd)
|
|---|
| 759 | {
|
|---|
| 760 | P_offd_data[P_marker_offd[i1]] += A_offd_data[jj];
|
|---|
| 761 | }
|
|---|
| 762 |
|
|---|
| 763 | /*------------------------------------------------------------
|
|---|
| 764 | * Case 2: neighbor i1 is an F-point and strongly influences i,
|
|---|
| 765 | * distribute a_{i,i1} to C-points that strongly infuence i.
|
|---|
| 766 | * Note: currently no distribution to the diagonal in this case.
|
|---|
| 767 | *-----------------------------------------------------------*/
|
|---|
| 768 |
|
|---|
| 769 | else if (P_marker_offd[i1] == strong_f_marker)
|
|---|
| 770 | {
|
|---|
| 771 | sum = zero;
|
|---|
| 772 |
|
|---|
| 773 | /*---------------------------------------------------------
|
|---|
| 774 | * Loop over row of A_ext for point i1 and calculate the sum
|
|---|
| 775 | * of the connections to c-points that strongly influence i.
|
|---|
| 776 | *---------------------------------------------------------*/
|
|---|
| 777 |
|
|---|
| 778 | /* find row number */
|
|---|
| 779 | c_num = A_offd_j[jj];
|
|---|
| 780 |
|
|---|
| 781 | sgn = 1;
|
|---|
| 782 | if (A_ext_data[A_ext_i[c_num]] < 0) sgn = -1;
|
|---|
| 783 | for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
|
|---|
| 784 | {
|
|---|
| 785 | i2 = A_ext_j[jj1];
|
|---|
| 786 |
|
|---|
| 787 | if (i2 > -1)
|
|---|
| 788 | {
|
|---|
| 789 | /* in the diagonal block */
|
|---|
| 790 | if (P_marker[i2] >= jj_begin_row
|
|---|
| 791 | && (sgn*A_ext_data[jj1]) < 0)
|
|---|
| 792 | {
|
|---|
| 793 | sum += A_ext_data[jj1];
|
|---|
| 794 | }
|
|---|
| 795 | }
|
|---|
| 796 | else
|
|---|
| 797 | {
|
|---|
| 798 | /* in the off_diagonal block */
|
|---|
| 799 | if (P_marker_offd[-i2-1] >= jj_begin_row_offd
|
|---|
| 800 | && (sgn*A_ext_data[jj1]) < 0)
|
|---|
| 801 | {
|
|---|
| 802 | sum += A_ext_data[jj1];
|
|---|
| 803 | }
|
|---|
| 804 |
|
|---|
| 805 | }
|
|---|
| 806 |
|
|---|
| 807 | }
|
|---|
| 808 |
|
|---|
| 809 | if (sum != 0)
|
|---|
| 810 | {
|
|---|
| 811 | distribute = A_offd_data[jj] / sum;
|
|---|
| 812 | /*---------------------------------------------------------
|
|---|
| 813 | * Loop over row of A_ext for point i1 and do
|
|---|
| 814 | * the distribution.
|
|---|
| 815 | *--------------------------------------------------------*/
|
|---|
| 816 |
|
|---|
| 817 | /* Diagonal block part of row i1 */
|
|---|
| 818 |
|
|---|
| 819 | for (jj1 = A_ext_i[c_num]; jj1 < A_ext_i[c_num+1]; jj1++)
|
|---|
| 820 | {
|
|---|
| 821 | i2 = A_ext_j[jj1];
|
|---|
| 822 |
|
|---|
| 823 | if (i2 > -1) /* in the diagonal block */
|
|---|
| 824 | {
|
|---|
| 825 | if (P_marker[i2] >= jj_begin_row
|
|---|
| 826 | && (sgn*A_ext_data[jj1]) < 0)
|
|---|
| 827 | {
|
|---|
| 828 | P_diag_data[P_marker[i2]]
|
|---|
| 829 | += distribute * A_ext_data[jj1];
|
|---|
| 830 | }
|
|---|
| 831 | }
|
|---|
| 832 | else
|
|---|
| 833 | {
|
|---|
| 834 | /* in the off_diagonal block */
|
|---|
| 835 | if (P_marker_offd[-i2-1] >= jj_begin_row_offd
|
|---|
| 836 | && (sgn*A_ext_data[jj1]) < 0)
|
|---|
| 837 | P_offd_data[P_marker_offd[-i2-1]]
|
|---|
| 838 | += distribute * A_ext_data[jj1];
|
|---|
| 839 | }
|
|---|
| 840 | }
|
|---|
| 841 | }
|
|---|
| 842 | else
|
|---|
| 843 | {
|
|---|
| 844 | if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
|
|---|
| 845 | diagonal += A_offd_data[jj];
|
|---|
| 846 | }
|
|---|
| 847 | }
|
|---|
| 848 |
|
|---|
| 849 | /*-----------------------------------------------------------
|
|---|
| 850 | * Case 3: neighbor i1 weakly influences i, accumulate a_{i,i1}
|
|---|
| 851 | * into the diagonal.
|
|---|
| 852 | *-----------------------------------------------------------*/
|
|---|
| 853 |
|
|---|
| 854 | else if (CF_marker_offd[i1] != -3)
|
|---|
| 855 | {
|
|---|
| 856 | if (num_functions == 1 || dof_func[i] == dof_func_offd[i1])
|
|---|
| 857 | diagonal += A_offd_data[jj];
|
|---|
| 858 | }
|
|---|
| 859 |
|
|---|
| 860 | }
|
|---|
| 861 | }
|
|---|
| 862 |
|
|---|
| 863 | /*-----------------------------------------------------------------
|
|---|
| 864 | * Set interpolation weight by dividing by the diagonal.
|
|---|
| 865 | *-----------------------------------------------------------------*/
|
|---|
| 866 |
|
|---|
| 867 | if (diagonal == 0.0)
|
|---|
| 868 | {
|
|---|
| 869 | printf(" Warning! zero diagonal! Proc id %d row %d\n", my_id,i);
|
|---|
| 870 | diagonal = A_diag_data[A_diag_i[i]];
|
|---|
| 871 | }
|
|---|
| 872 |
|
|---|
| 873 | for (jj = jj_begin_row; jj < jj_end_row; jj++)
|
|---|
| 874 | {
|
|---|
| 875 | P_diag_data[jj] /= -diagonal;
|
|---|
| 876 | }
|
|---|
| 877 |
|
|---|
| 878 | for (jj = jj_begin_row_offd; jj < jj_end_row_offd; jj++)
|
|---|
| 879 | {
|
|---|
| 880 | P_offd_data[jj] /= -diagonal;
|
|---|
| 881 | }
|
|---|
| 882 |
|
|---|
| 883 | }
|
|---|
| 884 |
|
|---|
| 885 | strong_f_marker--;
|
|---|
| 886 |
|
|---|
| 887 | P_offd_i[i+1] = jj_counter_offd;
|
|---|
| 888 | }
|
|---|
| 889 | hypre_TFree(P_marker);
|
|---|
| 890 | hypre_TFree(P_marker_offd);
|
|---|
| 891 | }
|
|---|
| 892 |
|
|---|
| 893 | P = hypre_ParCSRMatrixCreate(comm,
|
|---|
| 894 | hypre_ParCSRMatrixGlobalNumRows(A),
|
|---|
| 895 | total_global_cpts,
|
|---|
| 896 | hypre_ParCSRMatrixColStarts(A),
|
|---|
| 897 | num_cpts_global,
|
|---|
| 898 | 0,
|
|---|
| 899 | P_diag_i[n_fine],
|
|---|
| 900 | P_offd_i[n_fine]);
|
|---|
| 901 |
|
|---|
| 902 | P_diag = hypre_ParCSRMatrixDiag(P);
|
|---|
| 903 | hypre_CSRMatrixData(P_diag) = P_diag_data;
|
|---|
| 904 | hypre_CSRMatrixI(P_diag) = P_diag_i;
|
|---|
| 905 | hypre_CSRMatrixJ(P_diag) = P_diag_j;
|
|---|
| 906 | P_offd = hypre_ParCSRMatrixOffd(P);
|
|---|
| 907 | hypre_CSRMatrixData(P_offd) = P_offd_data;
|
|---|
| 908 | hypre_CSRMatrixI(P_offd) = P_offd_i;
|
|---|
| 909 | hypre_CSRMatrixJ(P_offd) = P_offd_j;
|
|---|
| 910 | hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
|
|---|
| 911 |
|
|---|
| 912 | /* Compress P, removing coefficients smaller than trunc_factor * Max */
|
|---|
| 913 |
|
|---|
| 914 | if (trunc_factor != 0.0 || max_elmts > 0)
|
|---|
| 915 | {
|
|---|
| 916 | hypre_BoomerAMGInterpTruncation(P, trunc_factor, max_elmts);
|
|---|
| 917 | P_diag_data = hypre_CSRMatrixData(P_diag);
|
|---|
| 918 | P_diag_i = hypre_CSRMatrixI(P_diag);
|
|---|
| 919 | P_diag_j = hypre_CSRMatrixJ(P_diag);
|
|---|
| 920 | P_offd_data = hypre_CSRMatrixData(P_offd);
|
|---|
| 921 | P_offd_i = hypre_CSRMatrixI(P_offd);
|
|---|
| 922 | P_offd_j = hypre_CSRMatrixJ(P_offd);
|
|---|
| 923 | P_diag_size = P_diag_i[n_fine];
|
|---|
| 924 | P_offd_size = P_offd_i[n_fine];
|
|---|
| 925 | }
|
|---|
| 926 |
|
|---|
| 927 | num_cols_P_offd = 0;
|
|---|
| 928 | if (P_offd_size)
|
|---|
| 929 | {
|
|---|
| 930 | if (num_cols_A_offd) P_marker = hypre_CTAlloc(int, num_cols_A_offd);
|
|---|
| 931 |
|
|---|
| 932 | /*#define HYPRE_SMP_PRIVATE i
|
|---|
| 933 | #include "../utilities/hypre_smp_forloop.h"*/
|
|---|
| 934 | for (i=0; i < num_cols_A_offd; i++)
|
|---|
| 935 | P_marker[i] = 0;
|
|---|
| 936 |
|
|---|
| 937 | num_cols_P_offd = 0;
|
|---|
| 938 | for (i=0; i < P_offd_size; i++)
|
|---|
| 939 | {
|
|---|
| 940 | index = P_offd_j[i];
|
|---|
| 941 | if (!P_marker[index])
|
|---|
| 942 | {
|
|---|
| 943 | num_cols_P_offd++;
|
|---|
| 944 | P_marker[index] = 1;
|
|---|
| 945 | }
|
|---|
| 946 | }
|
|---|
| 947 |
|
|---|
| 948 | if (num_cols_P_offd)
|
|---|
| 949 | {
|
|---|
| 950 | col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt,num_cols_P_offd);
|
|---|
| 951 | tmp_map_offd = hypre_CTAlloc(int,num_cols_P_offd);
|
|---|
| 952 | }
|
|---|
| 953 |
|
|---|
| 954 | index = 0;
|
|---|
| 955 | for (i=0; i < num_cols_P_offd; i++)
|
|---|
| 956 | {
|
|---|
| 957 | while (P_marker[index]==0) index++;
|
|---|
| 958 | tmp_map_offd[i] = index++;
|
|---|
| 959 | }
|
|---|
| 960 |
|
|---|
| 961 | /*#define HYPRE_SMP_PRIVATE i
|
|---|
| 962 | #include "../utilities/hypre_smp_forloop.h"*/
|
|---|
| 963 | for (i=0; i < P_offd_size; i++)
|
|---|
| 964 | P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
|
|---|
| 965 | P_offd_j[i],
|
|---|
| 966 | num_cols_P_offd);
|
|---|
| 967 | hypre_TFree(P_marker);
|
|---|
| 968 | }
|
|---|
| 969 |
|
|---|
| 970 | for (i=0; i < n_fine; i++)
|
|---|
| 971 | if (CF_marker[i] == -3) CF_marker[i] = -1;
|
|---|
| 972 |
|
|---|
| 973 | if (num_cols_P_offd)
|
|---|
| 974 | {
|
|---|
| 975 | hypre_ParCSRMatrixColMapOffd(P) = col_map_offd_P;
|
|---|
| 976 | hypre_CSRMatrixNumCols(P_offd) = num_cols_P_offd;
|
|---|
| 977 | }
|
|---|
| 978 |
|
|---|
| 979 | if (num_procs > 1)
|
|---|
| 980 | hypre_GetCommPkgRTFromCommPkgA(P,A, fine_to_coarse, tmp_map_offd);
|
|---|
| 981 |
|
|---|
| 982 |
|
|---|
| 983 | *P_ptr = P;
|
|---|
| 984 |
|
|---|
| 985 | hypre_TFree(tmp_map_offd);
|
|---|
| 986 | hypre_TFree(CF_marker_offd);
|
|---|
| 987 | hypre_TFree(dof_func_offd);
|
|---|
| 988 | hypre_TFree(int_buf_data);
|
|---|
| 989 | hypre_TFree(fine_to_coarse);
|
|---|
| 990 | /*hypre_TFree(fine_to_coarse_offd);*/
|
|---|
| 991 | hypre_TFree(coarse_counter);
|
|---|
| 992 | hypre_TFree(jj_count);
|
|---|
| 993 | hypre_TFree(jj_count_offd);
|
|---|
| 994 |
|
|---|
| 995 | if (num_procs > 1) hypre_CSRMatrixDestroy(A_ext);
|
|---|
| 996 |
|
|---|
| 997 | return(0);
|
|---|
| 998 |
|
|---|
| 999 | }
|
|---|
| 1000 |
|
|---|
| 1001 | int
|
|---|
| 1002 | hypre_BoomerAMGInterpTruncation( hypre_ParCSRMatrix *P,
|
|---|
| 1003 | double trunc_factor,
|
|---|
| 1004 | int max_elmts)
|
|---|
| 1005 | {
|
|---|
| 1006 | hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
|
|---|
| 1007 | int *P_diag_i = hypre_CSRMatrixI(P_diag);
|
|---|
| 1008 | int *P_diag_j = hypre_CSRMatrixJ(P_diag);
|
|---|
| 1009 | double *P_diag_data = hypre_CSRMatrixData(P_diag);
|
|---|
| 1010 | int *P_diag_j_new;
|
|---|
| 1011 | double *P_diag_data_new;
|
|---|
| 1012 |
|
|---|
| 1013 | hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
|
|---|
| 1014 | int *P_offd_i = hypre_CSRMatrixI(P_offd);
|
|---|
| 1015 | int *P_offd_j = hypre_CSRMatrixJ(P_offd);
|
|---|
| 1016 | double *P_offd_data = hypre_CSRMatrixData(P_offd);
|
|---|
| 1017 | int *P_offd_j_new;
|
|---|
| 1018 | double *P_offd_data_new;
|
|---|
| 1019 |
|
|---|
| 1020 | int n_fine = hypre_CSRMatrixNumRows(P_diag);
|
|---|
| 1021 | int num_cols = hypre_CSRMatrixNumCols(P_diag);
|
|---|
| 1022 | int i, j, start_j;
|
|---|
| 1023 | int ierr = 0;
|
|---|
| 1024 | double max_coef;
|
|---|
| 1025 | int next_open = 0;
|
|---|
| 1026 | int now_checking = 0;
|
|---|
| 1027 | int next_open_offd = 0;
|
|---|
| 1028 | int now_checking_offd = 0;
|
|---|
| 1029 | int num_lost = 0;
|
|---|
| 1030 | int num_lost_offd = 0;
|
|---|
| 1031 | int num_lost_global = 0;
|
|---|
| 1032 | int num_lost_global_offd = 0;
|
|---|
| 1033 | int P_diag_size;
|
|---|
| 1034 | int P_offd_size;
|
|---|
| 1035 | int num_elmts;
|
|---|
| 1036 | int cnt, cnt_diag, cnt_offd;
|
|---|
| 1037 | double row_sum;
|
|---|
| 1038 | double scale;
|
|---|
| 1039 |
|
|---|
| 1040 | /* Threading variables. Entry i of num_lost_(offd_)per_thread holds the
|
|---|
| 1041 | * number of dropped entries over thread i's row range. Cum_lost_per_thread
|
|---|
| 1042 | * will temporarily store the cumulative number of dropped entries up to
|
|---|
| 1043 | * each thread. */
|
|---|
| [e0b7443] | 1044 | int my_thread_num, num_threadsID, start, stop;
|
|---|
| [2aa6644] | 1045 | int * max_num_threads = hypre_CTAlloc(int, 1);
|
|---|
| 1046 | int * cum_lost_per_thread;
|
|---|
| 1047 | int * num_lost_per_thread;
|
|---|
| 1048 | int * num_lost_offd_per_thread;
|
|---|
| 1049 |
|
|---|
| 1050 | /* Initialize threading variables */
|
|---|
| 1051 | max_num_threads[0] = hypre_NumThreads();
|
|---|
| 1052 | cum_lost_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
|
|---|
| 1053 | num_lost_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
|
|---|
| 1054 | num_lost_offd_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
|
|---|
| 1055 | for(i=0; i < max_num_threads[0]; i++)
|
|---|
| 1056 | {
|
|---|
| 1057 | num_lost_per_thread[i] = 0;
|
|---|
| 1058 | num_lost_offd_per_thread[i] = 0;
|
|---|
| 1059 | }
|
|---|
| 1060 |
|
|---|
| 1061 | #ifdef HYPRE_USING_OPENMP
|
|---|
| [e0b7443] | 1062 | #pragma omp parallel private(i,my_thread_num,num_threadsID,max_coef,j,start_j,row_sum,scale,num_lost,now_checking,next_open,num_lost_offd,now_checking_offd,next_open_offd,start,stop,cnt_diag,cnt_offd,num_elmts,cnt)
|
|---|
| [2aa6644] | 1063 | #endif
|
|---|
| 1064 | {
|
|---|
| 1065 | my_thread_num = hypre_GetThreadNum();
|
|---|
| [e0b7443] | 1066 | num_threadsID = hypre_NumActiveThreads();
|
|---|
| [2aa6644] | 1067 |
|
|---|
| 1068 | /* Compute each thread's range of rows to truncate and compress. Note,
|
|---|
| 1069 | * that i, j and data are all compressed as entries are dropped, but
|
|---|
| 1070 | * that the compression only occurs locally over each thread's row
|
|---|
| 1071 | * range. P_diag_i is only made globally consistent at the end of this
|
|---|
| 1072 | * routine. During the dropping phases, P_diag_i[stop] will point to
|
|---|
| 1073 | * the start of the next thread's row range. */
|
|---|
| 1074 |
|
|---|
| 1075 | /* my row range */
|
|---|
| [e0b7443] | 1076 | start = (n_fine/num_threadsID)*my_thread_num;
|
|---|
| 1077 | if (my_thread_num == num_threadsID-1)
|
|---|
| [2aa6644] | 1078 | { stop = n_fine; }
|
|---|
| 1079 | else
|
|---|
| [e0b7443] | 1080 | { stop = (n_fine/num_threadsID)*(my_thread_num+1); }
|
|---|
| [2aa6644] | 1081 |
|
|---|
| 1082 | /*
|
|---|
| 1083 | * Truncate based on truncation tolerance
|
|---|
| 1084 | */
|
|---|
| 1085 | if (trunc_factor > 0)
|
|---|
| 1086 | {
|
|---|
| 1087 | num_lost_offd = 0;
|
|---|
| 1088 |
|
|---|
| 1089 | next_open = P_diag_i[start];
|
|---|
| 1090 | now_checking = P_diag_i[start];
|
|---|
| 1091 | next_open_offd = P_offd_i[start];;
|
|---|
| 1092 | now_checking_offd = P_offd_i[start];;
|
|---|
| 1093 |
|
|---|
| 1094 | for (i = start; i < stop; i++)
|
|---|
| 1095 | {
|
|---|
| 1096 | max_coef = 0;
|
|---|
| 1097 | for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
|
|---|
| 1098 | max_coef = (max_coef < fabs(P_diag_data[j])) ?
|
|---|
| 1099 | fabs(P_diag_data[j]) : max_coef;
|
|---|
| 1100 | for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
|
|---|
| 1101 | max_coef = (max_coef < fabs(P_offd_data[j])) ?
|
|---|
| 1102 | fabs(P_offd_data[j]) : max_coef;
|
|---|
| 1103 | max_coef *= trunc_factor;
|
|---|
| 1104 |
|
|---|
| 1105 | start_j = P_diag_i[i];
|
|---|
| 1106 | P_diag_i[i] -= num_lost;
|
|---|
| 1107 | row_sum = 0;
|
|---|
| 1108 | scale = 0;
|
|---|
| 1109 | for (j = start_j; j < P_diag_i[i+1]; j++)
|
|---|
| 1110 | {
|
|---|
| 1111 | row_sum += P_diag_data[now_checking];
|
|---|
| 1112 | if (fabs(P_diag_data[now_checking]) < max_coef)
|
|---|
| 1113 | {
|
|---|
| 1114 | num_lost++;
|
|---|
| 1115 | now_checking++;
|
|---|
| 1116 | }
|
|---|
| 1117 | else
|
|---|
| 1118 | {
|
|---|
| 1119 | scale += P_diag_data[now_checking];
|
|---|
| 1120 | P_diag_data[next_open] = P_diag_data[now_checking];
|
|---|
| 1121 | P_diag_j[next_open] = P_diag_j[now_checking];
|
|---|
| 1122 | now_checking++;
|
|---|
| 1123 | next_open++;
|
|---|
| 1124 | }
|
|---|
| 1125 | }
|
|---|
| 1126 |
|
|---|
| 1127 | start_j = P_offd_i[i];
|
|---|
| 1128 | P_offd_i[i] -= num_lost_offd;
|
|---|
| 1129 |
|
|---|
| 1130 | for (j = start_j; j < P_offd_i[i+1]; j++)
|
|---|
| 1131 | {
|
|---|
| 1132 | row_sum += P_offd_data[now_checking_offd];
|
|---|
| 1133 | if (fabs(P_offd_data[now_checking_offd]) < max_coef)
|
|---|
| 1134 | {
|
|---|
| 1135 | num_lost_offd++;
|
|---|
| 1136 | now_checking_offd++;
|
|---|
| 1137 | }
|
|---|
| 1138 | else
|
|---|
| 1139 | {
|
|---|
| 1140 | scale += P_offd_data[now_checking_offd];
|
|---|
| 1141 | P_offd_data[next_open_offd] = P_offd_data[now_checking_offd];
|
|---|
| 1142 | P_offd_j[next_open_offd] = P_offd_j[now_checking_offd];
|
|---|
| 1143 | now_checking_offd++;
|
|---|
| 1144 | next_open_offd++;
|
|---|
| 1145 | }
|
|---|
| 1146 | }
|
|---|
| 1147 | /* normalize row of P */
|
|---|
| 1148 |
|
|---|
| 1149 | if (scale != 0.)
|
|---|
| 1150 | {
|
|---|
| 1151 | if (scale != row_sum)
|
|---|
| 1152 | {
|
|---|
| 1153 | scale = row_sum/scale;
|
|---|
| 1154 | for (j = P_diag_i[i]; j < (P_diag_i[i+1]-num_lost); j++)
|
|---|
| 1155 | P_diag_data[j] *= scale;
|
|---|
| 1156 | for (j = P_offd_i[i]; j < (P_offd_i[i+1]-num_lost_offd); j++)
|
|---|
| 1157 | P_offd_data[j] *= scale;
|
|---|
| 1158 | }
|
|---|
| 1159 | }
|
|---|
| 1160 | }
|
|---|
| 1161 | /* store number of dropped elements and number of threads */
|
|---|
| 1162 | if(my_thread_num == 0)
|
|---|
| [e0b7443] | 1163 | { max_num_threads[0] = num_threadsID; }
|
|---|
| [2aa6644] | 1164 | num_lost_per_thread[my_thread_num] = num_lost;
|
|---|
| 1165 | num_lost_offd_per_thread[my_thread_num] = num_lost_offd;
|
|---|
| 1166 |
|
|---|
| 1167 | } /* end if (trunc_factor > 0) */
|
|---|
| 1168 |
|
|---|
| 1169 | /*P_diag_i[n_fine] -= num_lost;
|
|---|
| 1170 | P_offd_i[n_fine] -= num_lost_offd;
|
|---|
| 1171 | } */
|
|---|
| 1172 | if (max_elmts > 0)
|
|---|
| 1173 | {
|
|---|
| 1174 | int P_mxnum, cnt1, last_index, last_index_offd;
|
|---|
| 1175 | int *P_aux_j;
|
|---|
| 1176 | double *P_aux_data;
|
|---|
| 1177 | /* find maximum row length locally over this row range */
|
|---|
| 1178 | P_mxnum = 0;
|
|---|
| 1179 | for (i=start; i<stop; i++)
|
|---|
| 1180 | {
|
|---|
| 1181 | /* Note P_diag_i[stop] is the starting point for the next thread
|
|---|
| 1182 | * in j and data, not the stop point for this thread */
|
|---|
| 1183 | last_index = P_diag_i[i+1];
|
|---|
| 1184 | last_index_offd = P_offd_i[i+1];
|
|---|
| 1185 | if(i == stop-1)
|
|---|
| 1186 | {
|
|---|
| 1187 | last_index -= num_lost_per_thread[my_thread_num];
|
|---|
| 1188 | last_index_offd -= num_lost_offd_per_thread[my_thread_num];
|
|---|
| 1189 | }
|
|---|
| 1190 | cnt1 = last_index-P_diag_i[i] + last_index_offd-P_offd_i[i];
|
|---|
| 1191 | if (cnt1 > P_mxnum) P_mxnum = cnt1;
|
|---|
| 1192 | }
|
|---|
| 1193 | /*rowlength = 0;
|
|---|
| 1194 | if (n_fine)
|
|---|
| 1195 | rowlength = P_diag_i[1]+P_offd_i[1];
|
|---|
| 1196 | P_mxnum = rowlength;
|
|---|
| 1197 | for (i=1; i<n_fine; i++)
|
|---|
| 1198 | {
|
|---|
| 1199 | rowlength = P_diag_i[i+1]-P_diag_i[i]+P_offd_i[i+1]-P_offd_i[i];
|
|---|
| 1200 | if (rowlength > P_mxnum) P_mxnum = rowlength;
|
|---|
| 1201 | }*/
|
|---|
| 1202 | if (P_mxnum > max_elmts)
|
|---|
| 1203 | {
|
|---|
| 1204 | num_lost = 0;
|
|---|
| 1205 | num_lost_offd = 0;
|
|---|
| 1206 |
|
|---|
| 1207 | /* two temporary arrays to hold row i for temporary operations */
|
|---|
| 1208 | P_aux_j = hypre_CTAlloc(int, P_mxnum);
|
|---|
| 1209 | P_aux_data = hypre_CTAlloc(double, P_mxnum);
|
|---|
| 1210 | cnt_diag = P_diag_i[start];
|
|---|
| 1211 | cnt_offd = P_offd_i[start];
|
|---|
| 1212 |
|
|---|
| 1213 | for (i = start; i < stop; i++)
|
|---|
| 1214 | {
|
|---|
| 1215 | /* Note P_diag_i[stop] is the starting point for the next thread
|
|---|
| 1216 | * in j and data, not the stop point for this thread */
|
|---|
| 1217 | last_index = P_diag_i[i+1];
|
|---|
| 1218 | last_index_offd = P_offd_i[i+1];
|
|---|
| 1219 | if(i == stop-1)
|
|---|
| 1220 | {
|
|---|
| 1221 | last_index -= num_lost_per_thread[my_thread_num];
|
|---|
| 1222 | last_index_offd -= num_lost_offd_per_thread[my_thread_num];
|
|---|
| 1223 | }
|
|---|
| 1224 |
|
|---|
| 1225 | row_sum = 0;
|
|---|
| 1226 | num_elmts = P_diag_i[i+1]-P_diag_i[i]+P_offd_i[i+1]-P_offd_i[i];
|
|---|
| 1227 | if (max_elmts < num_elmts)
|
|---|
| 1228 | {
|
|---|
| 1229 | cnt = 0;
|
|---|
| 1230 | for (j = P_diag_i[i]; j < P_diag_i[i+1]; j++)
|
|---|
| 1231 | {
|
|---|
| 1232 | P_aux_j[cnt] = P_diag_j[j];
|
|---|
| 1233 | P_aux_data[cnt++] = P_diag_data[j];
|
|---|
| 1234 | row_sum += P_diag_data[j];
|
|---|
| 1235 | }
|
|---|
| 1236 | num_lost += cnt;
|
|---|
| 1237 | cnt1 = cnt;
|
|---|
| 1238 | for (j = P_offd_i[i]; j < P_offd_i[i+1]; j++)
|
|---|
| 1239 | {
|
|---|
| 1240 | P_aux_j[cnt] = P_offd_j[j]+num_cols;
|
|---|
| 1241 | P_aux_data[cnt++] = P_offd_data[j];
|
|---|
| 1242 | row_sum += P_offd_data[j];
|
|---|
| 1243 | }
|
|---|
| 1244 | num_lost_offd += cnt-cnt1;
|
|---|
| 1245 | /* sort data */
|
|---|
| 1246 | hypre_qsort2abs(P_aux_j,P_aux_data,0,cnt-1);
|
|---|
| 1247 | scale = 0;
|
|---|
| 1248 | P_diag_i[i] = cnt_diag;
|
|---|
| 1249 | P_offd_i[i] = cnt_offd;
|
|---|
| 1250 | for (j = 0; j < max_elmts; j++)
|
|---|
| 1251 | {
|
|---|
| 1252 | scale += P_aux_data[j];
|
|---|
| 1253 | if (P_aux_j[j] < num_cols)
|
|---|
| 1254 | {
|
|---|
| 1255 | P_diag_j[cnt_diag] = P_aux_j[j];
|
|---|
| 1256 | P_diag_data[cnt_diag++] = P_aux_data[j];
|
|---|
| 1257 | }
|
|---|
| 1258 | else
|
|---|
| 1259 | {
|
|---|
| 1260 | P_offd_j[cnt_offd] = P_aux_j[j]-num_cols;
|
|---|
| 1261 | P_offd_data[cnt_offd++] = P_aux_data[j];
|
|---|
| 1262 | }
|
|---|
| 1263 | }
|
|---|
| 1264 | num_lost -= cnt_diag-P_diag_i[i];
|
|---|
| 1265 | num_lost_offd -= cnt_offd-P_offd_i[i];
|
|---|
| 1266 | /* normalize row of P */
|
|---|
| 1267 |
|
|---|
| 1268 | if (scale != 0.)
|
|---|
| 1269 | {
|
|---|
| 1270 | if (scale != row_sum)
|
|---|
| 1271 | {
|
|---|
| 1272 | scale = row_sum/scale;
|
|---|
| 1273 | for (j = P_diag_i[i]; j < cnt_diag; j++)
|
|---|
| 1274 | P_diag_data[j] *= scale;
|
|---|
| 1275 | for (j = P_offd_i[i]; j < cnt_offd; j++)
|
|---|
| 1276 | P_offd_data[j] *= scale;
|
|---|
| 1277 | }
|
|---|
| 1278 | }
|
|---|
| 1279 | }
|
|---|
| 1280 | else
|
|---|
| 1281 | {
|
|---|
| 1282 | if (P_diag_i[i] != cnt_diag)
|
|---|
| 1283 | {
|
|---|
| 1284 | start_j = P_diag_i[i];
|
|---|
| 1285 | P_diag_i[i] = cnt_diag;
|
|---|
| 1286 | for (j = start_j; j < P_diag_i[i+1]; j++)
|
|---|
| 1287 | {
|
|---|
| 1288 | P_diag_j[cnt_diag] = P_diag_j[j];
|
|---|
| 1289 | P_diag_data[cnt_diag++] = P_diag_data[j];
|
|---|
| 1290 | }
|
|---|
| 1291 | }
|
|---|
| 1292 | else
|
|---|
| 1293 | cnt_diag += P_diag_i[i+1]-P_diag_i[i];
|
|---|
| 1294 | if (P_offd_i[i] != cnt_offd)
|
|---|
| 1295 | {
|
|---|
| 1296 | start_j = P_offd_i[i];
|
|---|
| 1297 | P_offd_i[i] = cnt_offd;
|
|---|
| 1298 | for (j = start_j; j < P_offd_i[i+1]; j++)
|
|---|
| 1299 | {
|
|---|
| 1300 | P_offd_j[cnt_offd] = P_offd_j[j];
|
|---|
| 1301 | P_offd_data[cnt_offd++] = P_offd_data[j];
|
|---|
| 1302 | }
|
|---|
| 1303 | }
|
|---|
| 1304 | else
|
|---|
| 1305 | cnt_offd += P_offd_i[i+1]-P_offd_i[i];
|
|---|
| 1306 | }
|
|---|
| 1307 | }
|
|---|
| 1308 |
|
|---|
| 1309 | num_lost_per_thread[my_thread_num] += num_lost;
|
|---|
| 1310 | num_lost_offd_per_thread[my_thread_num] += num_lost_offd;
|
|---|
| 1311 | hypre_TFree(P_aux_j);
|
|---|
| 1312 | hypre_TFree(P_aux_data);
|
|---|
| 1313 |
|
|---|
| 1314 | } /* end if (P_mxnum > max_elmts) */
|
|---|
| 1315 | } /* end if (max_elmts > 0) */
|
|---|
| 1316 |
|
|---|
| 1317 | /* Sum up num_lost_global */
|
|---|
| 1318 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 1319 | #pragma omp barrier
|
|---|
| 1320 | #endif
|
|---|
| 1321 | if(my_thread_num == 0)
|
|---|
| 1322 | {
|
|---|
| 1323 | num_lost_global = 0;
|
|---|
| 1324 | num_lost_global_offd = 0;
|
|---|
| 1325 | for(i = 0; i < max_num_threads[0]; i++)
|
|---|
| 1326 | {
|
|---|
| 1327 | num_lost_global += num_lost_per_thread[i];
|
|---|
| 1328 | num_lost_global_offd += num_lost_offd_per_thread[i];
|
|---|
| 1329 | }
|
|---|
| 1330 | }
|
|---|
| 1331 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 1332 | #pragma omp barrier
|
|---|
| 1333 | #endif
|
|---|
| 1334 |
|
|---|
| 1335 | /*
|
|---|
| 1336 | * Synchronize and create new diag data structures
|
|---|
| 1337 | */
|
|---|
| 1338 | if (num_lost_global)
|
|---|
| 1339 | {
|
|---|
| 1340 | /* Each thread has it's own locally compressed CSR matrix from rows start
|
|---|
| 1341 | * to stop. Now, we have to copy each thread's chunk into the new
|
|---|
| 1342 | * process-wide CSR data structures
|
|---|
| 1343 | *
|
|---|
| 1344 | * First, we compute the new process-wide number of nonzeros (i.e.,
|
|---|
| 1345 | * P_diag_size), and compute cum_lost_per_thread[k] so that this
|
|---|
| 1346 | * entry holds the cumulative sum of entries dropped up to and
|
|---|
| 1347 | * including thread k. */
|
|---|
| 1348 | if(my_thread_num == 0)
|
|---|
| 1349 | {
|
|---|
| 1350 | P_diag_size = P_diag_i[n_fine];
|
|---|
| 1351 |
|
|---|
| 1352 | for(i = 0; i < max_num_threads[0]; i++)
|
|---|
| 1353 | {
|
|---|
| 1354 | P_diag_size -= num_lost_per_thread[i];
|
|---|
| 1355 | if(i > 0)
|
|---|
| 1356 | { cum_lost_per_thread[i] = num_lost_per_thread[i] + cum_lost_per_thread[i-1]; }
|
|---|
| 1357 | else
|
|---|
| 1358 | { cum_lost_per_thread[i] = num_lost_per_thread[i]; }
|
|---|
| 1359 | }
|
|---|
| 1360 |
|
|---|
| 1361 | P_diag_j_new = hypre_CTAlloc(int,P_diag_size);
|
|---|
| 1362 | P_diag_data_new = hypre_CTAlloc(double,P_diag_size);
|
|---|
| 1363 | }
|
|---|
| 1364 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 1365 | #pragma omp barrier
|
|---|
| 1366 | #endif
|
|---|
| 1367 |
|
|---|
| 1368 |
|
|---|
| 1369 | /* points to next open spot in new data structures for this thread */
|
|---|
| 1370 | if(my_thread_num == 0)
|
|---|
| 1371 | { next_open = 0; }
|
|---|
| 1372 | else
|
|---|
| 1373 | {
|
|---|
| 1374 | /* remember, cum_lost_per_thread[k] stores the num dropped up to and
|
|---|
| 1375 | * including thread k */
|
|---|
| 1376 | next_open = P_diag_i[start] - cum_lost_per_thread[my_thread_num-1];
|
|---|
| 1377 | }
|
|---|
| 1378 | /* copy the j and data arrays over */
|
|---|
| 1379 | for(i = P_diag_i[start]; i < P_diag_i[stop] - num_lost_per_thread[my_thread_num]; i++)
|
|---|
| 1380 | {
|
|---|
| 1381 | P_diag_j_new[next_open] = P_diag_j[i];
|
|---|
| 1382 | P_diag_data_new[next_open] = P_diag_data[i];
|
|---|
| 1383 | next_open += 1;
|
|---|
| 1384 | }
|
|---|
| 1385 |
|
|---|
| 1386 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 1387 | #pragma omp barrier
|
|---|
| 1388 | #endif
|
|---|
| 1389 | /* update P_diag_i with number of dropped entries by all lower ranked
|
|---|
| 1390 | * threads */
|
|---|
| 1391 | if(my_thread_num > 0)
|
|---|
| 1392 | {
|
|---|
| 1393 | for(i=start; i<stop; i++)
|
|---|
| 1394 | {
|
|---|
| 1395 | P_diag_i[i] -= cum_lost_per_thread[my_thread_num-1];
|
|---|
| 1396 | }
|
|---|
| 1397 | }
|
|---|
| 1398 |
|
|---|
| 1399 | if(my_thread_num == 0)
|
|---|
| 1400 | {
|
|---|
| 1401 | /* Set last entry */
|
|---|
| 1402 | P_diag_i[n_fine] = P_diag_size ;
|
|---|
| 1403 |
|
|---|
| 1404 | hypre_TFree(P_diag_j);
|
|---|
| 1405 | hypre_TFree(P_diag_data);
|
|---|
| 1406 | hypre_CSRMatrixJ(P_diag) = P_diag_j_new;
|
|---|
| 1407 | hypre_CSRMatrixData(P_diag) = P_diag_data_new;
|
|---|
| 1408 | hypre_CSRMatrixNumNonzeros(P_diag) = P_diag_size;
|
|---|
| 1409 | }
|
|---|
| 1410 | }
|
|---|
| 1411 |
|
|---|
| 1412 |
|
|---|
| 1413 | /*
|
|---|
| 1414 | * Synchronize and create new offd data structures
|
|---|
| 1415 | */
|
|---|
| 1416 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 1417 | #pragma omp barrier
|
|---|
| 1418 | #endif
|
|---|
| 1419 | if (num_lost_global_offd)
|
|---|
| 1420 | {
|
|---|
| 1421 | /* Repeat process for off-diagonal */
|
|---|
| 1422 | if(my_thread_num == 0)
|
|---|
| 1423 | {
|
|---|
| 1424 | P_offd_size = P_offd_i[n_fine];
|
|---|
| 1425 | for(i = 0; i < max_num_threads[0]; i++)
|
|---|
| 1426 | {
|
|---|
| 1427 | P_offd_size -= num_lost_offd_per_thread[i];
|
|---|
| 1428 | if(i > 0)
|
|---|
| 1429 | { cum_lost_per_thread[i] = num_lost_offd_per_thread[i] + cum_lost_per_thread[i-1]; }
|
|---|
| 1430 | else
|
|---|
| 1431 | { cum_lost_per_thread[i] = num_lost_offd_per_thread[i]; }
|
|---|
| 1432 | }
|
|---|
| 1433 |
|
|---|
| 1434 | P_offd_j_new = hypre_CTAlloc(int,P_offd_size);
|
|---|
| 1435 | P_offd_data_new = hypre_CTAlloc(double,P_offd_size);
|
|---|
| 1436 | }
|
|---|
| 1437 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 1438 | #pragma omp barrier
|
|---|
| 1439 | #endif
|
|---|
| 1440 | /* points to next open spot in new data structures for this thread */
|
|---|
| 1441 | if(my_thread_num == 0)
|
|---|
| 1442 | { next_open = 0; }
|
|---|
| 1443 | else
|
|---|
| 1444 | {
|
|---|
| 1445 | /* remember, cum_lost_per_thread[k] stores the num dropped up to and
|
|---|
| 1446 | * including thread k */
|
|---|
| 1447 | next_open = P_offd_i[start] - cum_lost_per_thread[my_thread_num-1];
|
|---|
| 1448 | }
|
|---|
| 1449 |
|
|---|
| 1450 | /* copy the j and data arrays over */
|
|---|
| 1451 | for(i = P_offd_i[start]; i < P_offd_i[stop] - num_lost_offd_per_thread[my_thread_num]; i++)
|
|---|
| 1452 | {
|
|---|
| 1453 | P_offd_j_new[next_open] = P_offd_j[i];
|
|---|
| 1454 | P_offd_data_new[next_open] = P_offd_data[i];
|
|---|
| 1455 | next_open += 1;
|
|---|
| 1456 | }
|
|---|
| 1457 |
|
|---|
| 1458 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 1459 | #pragma omp barrier
|
|---|
| 1460 | #endif
|
|---|
| 1461 | /* update P_offd_i with number of dropped entries by all lower ranked
|
|---|
| 1462 | * threads */
|
|---|
| 1463 | if(my_thread_num > 0)
|
|---|
| 1464 | {
|
|---|
| 1465 | for(i=start; i<stop; i++)
|
|---|
| 1466 | {
|
|---|
| 1467 | P_offd_i[i] -= cum_lost_per_thread[my_thread_num-1];
|
|---|
| 1468 | }
|
|---|
| 1469 | }
|
|---|
| 1470 |
|
|---|
| 1471 | if(my_thread_num == 0)
|
|---|
| 1472 | {
|
|---|
| 1473 | /* Set last entry */
|
|---|
| 1474 | P_offd_i[n_fine] = P_offd_size ;
|
|---|
| 1475 |
|
|---|
| 1476 | hypre_TFree(P_offd_j);
|
|---|
| 1477 | hypre_TFree(P_offd_data);
|
|---|
| 1478 | hypre_CSRMatrixJ(P_offd) = P_offd_j_new;
|
|---|
| 1479 | hypre_CSRMatrixData(P_offd) = P_offd_data_new;
|
|---|
| 1480 | hypre_CSRMatrixNumNonzeros(P_offd) = P_offd_size;
|
|---|
| 1481 | }
|
|---|
| 1482 | }
|
|---|
| 1483 |
|
|---|
| 1484 | } /* end parallel region */
|
|---|
| 1485 |
|
|---|
| 1486 | hypre_TFree(max_num_threads);
|
|---|
| 1487 | hypre_TFree(cum_lost_per_thread);
|
|---|
| 1488 | hypre_TFree(num_lost_per_thread);
|
|---|
| 1489 | hypre_TFree(num_lost_offd_per_thread);
|
|---|
| 1490 |
|
|---|
| 1491 | return ierr;
|
|---|
| 1492 | }
|
|---|
| 1493 |
|
|---|
| 1494 | void hypre_qsort2abs( int *v,
|
|---|
| 1495 | double *w,
|
|---|
| 1496 | int left,
|
|---|
| 1497 | int right )
|
|---|
| 1498 | {
|
|---|
| 1499 | int i, last;
|
|---|
| 1500 | if (left >= right)
|
|---|
| 1501 | return;
|
|---|
| 1502 | swap2( v, w, left, (left+right)/2);
|
|---|
| 1503 | last = left;
|
|---|
| 1504 | for (i = left+1; i <= right; i++)
|
|---|
| 1505 | if (fabs(w[i]) > fabs(w[left]))
|
|---|
| 1506 | {
|
|---|
| 1507 | swap2(v, w, ++last, i);
|
|---|
| 1508 | }
|
|---|
| 1509 | swap2(v, w, left, last);
|
|---|
| 1510 | hypre_qsort2abs(v, w, left, last-1);
|
|---|
| 1511 | hypre_qsort2abs(v, w, last+1, right);
|
|---|
| 1512 | }
|
|---|