| 1 | /*BHEADER**********************************************************************
|
|---|
| 2 | * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
|
|---|
| 3 | * Produced at the Lawrence Livermore National Laboratory.
|
|---|
| 4 | * This file is part of HYPRE. See file COPYRIGHT for details.
|
|---|
| 5 | *
|
|---|
| 6 | * HYPRE is free software; you can redistribute it and/or modify it under the
|
|---|
| 7 | * terms of the GNU Lesser General Public License (as published by the Free
|
|---|
| 8 | * Software Foundation) version 2.1 dated February 1999.
|
|---|
| 9 | *
|
|---|
| 10 | * $Revision: 2.4 $
|
|---|
| 11 | ***********************************************************************EHEADER*/
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 | #include "headers.h"
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
|
|---|
| 19 | int
|
|---|
| 20 | hypre_BoomerAMGInterpTruncation( hypre_ParCSRMatrix *P,
|
|---|
| 21 | double trunc_factor,
|
|---|
| 22 | int max_elmts)
|
|---|
| 23 | {
|
|---|
| 24 | hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
|
|---|
| 25 | int *P_diag_i = hypre_CSRMatrixI(P_diag);
|
|---|
| 26 | int *P_diag_j = hypre_CSRMatrixJ(P_diag);
|
|---|
| 27 | double *P_diag_data = hypre_CSRMatrixData(P_diag);
|
|---|
| 28 | int *P_diag_j_new;
|
|---|
| 29 | double *P_diag_data_new;
|
|---|
| 30 |
|
|---|
| 31 | hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
|
|---|
| 32 | int *P_offd_i = hypre_CSRMatrixI(P_offd);
|
|---|
| 33 | int *P_offd_j = hypre_CSRMatrixJ(P_offd);
|
|---|
| 34 | double *P_offd_data = hypre_CSRMatrixData(P_offd);
|
|---|
| 35 | int *P_offd_j_new;
|
|---|
| 36 | double *P_offd_data_new;
|
|---|
| 37 |
|
|---|
| 38 | int n_fine = hypre_CSRMatrixNumRows(P_diag);
|
|---|
| 39 | int num_cols = hypre_CSRMatrixNumCols(P_diag);
|
|---|
| 40 | int i, j, start_j;
|
|---|
| 41 | int ierr = 0;
|
|---|
| 42 | double max_coef;
|
|---|
| 43 | int next_open = 0;
|
|---|
| 44 | int now_checking = 0;
|
|---|
| 45 | int next_open_offd = 0;
|
|---|
| 46 | int now_checking_offd = 0;
|
|---|
| 47 | int num_lost = 0;
|
|---|
| 48 | int num_lost_offd = 0;
|
|---|
| 49 | int num_lost_global = 0;
|
|---|
| 50 | int num_lost_global_offd = 0;
|
|---|
| 51 | int P_diag_size;
|
|---|
| 52 | int P_offd_size;
|
|---|
| 53 | int num_elmts;
|
|---|
| 54 | int cnt, cnt_diag, cnt_offd;
|
|---|
| 55 | double row_sum;
|
|---|
| 56 | double scale;
|
|---|
| 57 |
|
|---|
| 58 | /* Threading variables. Entry i of num_lost_(offd_)per_thread holds the
|
|---|
| 59 | * number of dropped entries over thread i's row range. Cum_lost_per_thread
|
|---|
| 60 | * will temporarily store the cumulative number of dropped entries up to
|
|---|
| 61 | * each thread. */
|
|---|
| 62 | int my_thread_num, num_threadsID, start, stop;
|
|---|
| 63 | int * max_num_threads = hypre_CTAlloc(int, 1);
|
|---|
| 64 | int * cum_lost_per_thread;
|
|---|
| 65 | int * num_lost_per_thread;
|
|---|
| 66 | int * num_lost_offd_per_thread;
|
|---|
| 67 |
|
|---|
| 68 | /* Initialize threading variables */
|
|---|
| 69 | max_num_threads[0] = hypre_NumThreads();
|
|---|
| 70 | cum_lost_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
|
|---|
| 71 | num_lost_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
|
|---|
| 72 | num_lost_offd_per_thread = hypre_CTAlloc(int, max_num_threads[0]);
|
|---|
| 73 | for(i=0; i < max_num_threads[0]; i++)
|
|---|
| 74 | {
|
|---|
| 75 | num_lost_per_thread[i] = 0;
|
|---|
| 76 | num_lost_offd_per_thread[i] = 0;
|
|---|
| 77 | }
|
|---|
| 78 |
|
|---|
| 79 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 80 | #pragma omp parallel private(i,my_thread_num,num_threadsID,max_coef,j,start_j,row_sum,scale,num_lost,now_checking,next_open,num_lost_offd,now_checking_offd,next_open_offd,start,stop,cnt_diag,cnt_offd,num_elmts,cnt)
|
|---|
| 81 | #endif
|
|---|
| 82 | {
|
|---|
| 83 | my_thread_num = hypre_GetThreadNum();
|
|---|
| 84 | num_threadsID = hypre_NumActiveThreads();
|
|---|
| 85 |
|
|---|
| 86 | /* Compute each thread's range of rows to truncate and compress. Note,
|
|---|
| 87 | * that i, j and data are all compressed as entries are dropped, but
|
|---|
| 88 | * that the compression only occurs locally over each thread's row
|
|---|
| 89 | * range. P_diag_i is only made globally consistent at the end of this
|
|---|
| 90 | * routine. During the dropping phases, P_diag_i[stop] will point to
|
|---|
| 91 | * the start of the next thread's row range. */
|
|---|
| 92 |
|
|---|
| 93 | /* my row range */
|
|---|
| 94 | start = (n_fine/num_threadsID)*my_thread_num;
|
|---|
| 95 | if (my_thread_num == num_threadsID-1)
|
|---|
| 96 | { stop = n_fine; }
|
|---|
| 97 | else
|
|---|
| 98 | { stop = (n_fine/num_threadsID)*(my_thread_num+1); }
|
|---|
| 99 |
|
|---|
| 100 | /*
|
|---|
| 101 | * Truncate based on truncation tolerance
|
|---|
| 102 | */
|
|---|
| 103 |
|
|---|
| 104 |
|
|---|
| 105 | /*P_diag_i[n_fine] -= num_lost;
|
|---|
| 106 | P_offd_i[n_fine] -= num_lost_offd;
|
|---|
| 107 | } */
|
|---|
| 108 |
|
|---|
| 109 |
|
|---|
| 110 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 111 | #pragma omp barrier
|
|---|
| 112 | #endif
|
|---|
| 113 |
|
|---|
| 114 | /*
|
|---|
| 115 | * Synchronize and create new diag data structures
|
|---|
| 116 | */
|
|---|
| 117 | if (num_lost_global)
|
|---|
| 118 | {
|
|---|
| 119 |
|
|---|
| 120 |
|
|---|
| 121 | #ifdef HYPRE_USING_OPENMP
|
|---|
| 122 | #pragma omp barrier
|
|---|
| 123 | #endif
|
|---|
| 124 | /* update P_diag_i with number of dropped entries by all lower ranked
|
|---|
| 125 | * threads */
|
|---|
| 126 | if(my_thread_num > 0)
|
|---|
| 127 | {
|
|---|
| 128 | for(i=start; i<stop; i++)
|
|---|
| 129 | {
|
|---|
| 130 | P_diag_i[i] -= cum_lost_per_thread[my_thread_num-1];
|
|---|
| 131 | }
|
|---|
| 132 | }
|
|---|
| 133 |
|
|---|
| 134 | if(my_thread_num == 0)
|
|---|
| 135 | {
|
|---|
| 136 |
|
|---|
| 137 | hypre_TFree(P_diag_j);
|
|---|
| 138 | hypre_TFree(P_diag_data);
|
|---|
| 139 | hypre_CSRMatrixJ(P_diag) = P_diag_j_new;
|
|---|
| 140 | hypre_CSRMatrixData(P_diag) = P_diag_data_new;
|
|---|
| 141 | hypre_CSRMatrixNumNonzeros(P_diag) = P_diag_size;
|
|---|
| 142 | }
|
|---|
| 143 | }
|
|---|
| 144 |
|
|---|
| 145 |
|
|---|
| 146 | } /* end parallel region */
|
|---|
| 147 |
|
|---|
| 148 | hypre_TFree(max_num_threads);
|
|---|
| 149 | hypre_TFree(cum_lost_per_thread);
|
|---|
| 150 | hypre_TFree(num_lost_per_thread);
|
|---|
| 151 | hypre_TFree(num_lost_offd_per_thread);
|
|---|
| 152 |
|
|---|
| 153 | return ierr;
|
|---|
| 154 | }
|
|---|
| 155 |
|
|---|
| 156 | void hypre_qsort2abs( int *v,
|
|---|
| 157 | double *w,
|
|---|
| 158 | int left,
|
|---|
| 159 | int right )
|
|---|
| 160 | {
|
|---|
| 161 | int i, last;
|
|---|
| 162 | if (left >= right)
|
|---|
| 163 | return;
|
|---|
| 164 | swap2( v, w, left, (left+right)/2);
|
|---|
| 165 | last = left;
|
|---|
| 166 | for (i = left+1; i <= right; i++)
|
|---|
| 167 | if (fabs(w[i]) > fabs(w[left]))
|
|---|
| 168 | {
|
|---|
| 169 | swap2(v, w, ++last, i);
|
|---|
| 170 | }
|
|---|
| 171 | swap2(v, w, left, last);
|
|---|
| 172 | hypre_qsort2abs(v, w, left, last-1);
|
|---|
| 173 | hypre_qsort2abs(v, w, last+1, right);
|
|---|
| 174 | }
|
|---|