| 1 | /*BHEADER**********************************************************************
|
|---|
| 2 | * Copyright (c) 2008, Lawrence Livermore National Security, LLC.
|
|---|
| 3 | * Produced at the Lawrence Livermore National Laboratory.
|
|---|
| 4 | * This file is part of HYPRE. See file COPYRIGHT for details.
|
|---|
| 5 | *
|
|---|
| 6 | * HYPRE is free software; you can redistribute it and/or modify it under the
|
|---|
| 7 | * terms of the GNU Lesser General Public License (as published by the Free
|
|---|
| 8 | * Software Foundation) version 2.1 dated February 1999.
|
|---|
| 9 | *
|
|---|
| 10 | * $Revision: 2.4 $
|
|---|
| 11 | ***********************************************************************EHEADER*/
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 | /******************************************************************************
|
|---|
| 15 | *
|
|---|
| 16 | * Structured inner product routine for overlapped grids. Computes the
|
|---|
| 17 | * inner product of two vectors over an overlapped grid.
|
|---|
| 18 | *
|
|---|
| 19 | *****************************************************************************/
|
|---|
| 20 |
|
|---|
| 21 | #include "headers.h"
|
|---|
| 22 |
|
|---|
| 23 |
|
|---|
| 24 | /*--------------------------------------------------------------------------
|
|---|
| 25 | * hypre_StructOverlapInnerProd
|
|---|
| 26 | *--------------------------------------------------------------------------*/
|
|---|
| 27 |
|
|---|
| 28 | #ifdef HYPRE_USE_PTHREADS
|
|---|
| 29 | double *local_result_ref[hypre_MAX_THREADS];
|
|---|
| 30 | #endif
|
|---|
| 31 |
|
|---|
| 32 | double final_innerprod_result_overlap;
|
|---|
| 33 |
|
|---|
| 34 |
|
|---|
| 35 | double
|
|---|
| 36 | hypre_StructOverlapInnerProd( hypre_StructVector *x,
|
|---|
| 37 | hypre_StructVector *y )
|
|---|
| 38 | {
|
|---|
| 39 | double local_result, overlap_result;
|
|---|
| 40 | double process_result;
|
|---|
| 41 |
|
|---|
| 42 | hypre_Box *x_data_box;
|
|---|
| 43 | hypre_Box *y_data_box;
|
|---|
| 44 |
|
|---|
| 45 | hypre_BoxArray *overlap_boxes;
|
|---|
| 46 |
|
|---|
| 47 | int xi;
|
|---|
| 48 | int yi;
|
|---|
| 49 |
|
|---|
| 50 | double *xp;
|
|---|
| 51 | double *yp;
|
|---|
| 52 |
|
|---|
| 53 | hypre_BoxArray *boxes;
|
|---|
| 54 | hypre_Box *boxi, *boxj, intersect_box;
|
|---|
| 55 | hypre_BoxNeighbors *neighbors= hypre_StructGridNeighbors(hypre_StructVectorGrid(y));
|
|---|
| 56 | int *neighbors_procs= hypre_BoxNeighborsProcs(neighbors);
|
|---|
| 57 | hypre_BoxArray *selected_nboxes;
|
|---|
| 58 | hypre_BoxArray *tmp_box_array, *tmp2_box_array;
|
|---|
| 59 |
|
|---|
| 60 | hypre_Index loop_size;
|
|---|
| 61 | hypre_IndexRef start;
|
|---|
| 62 | hypre_Index unit_stride;
|
|---|
| 63 |
|
|---|
| 64 | int i, j;
|
|---|
| 65 | int myid;
|
|---|
| 66 | int boxarray_size;
|
|---|
| 67 | int loopi, loopj, loopk;
|
|---|
| 68 | #ifdef HYPRE_USE_PTHREADS
|
|---|
| 69 | int threadid = hypre_GetThreadID();
|
|---|
| 70 | #endif
|
|---|
| 71 |
|
|---|
| 72 |
|
|---|
| 73 | local_result = 0.0;
|
|---|
| 74 | process_result = 0.0;
|
|---|
| 75 | hypre_SetIndex(unit_stride, 1, 1, 1);
|
|---|
| 76 |
|
|---|
| 77 | MPI_Comm_rank(hypre_StructVectorComm(y), &myid);
|
|---|
| 78 |
|
|---|
| 79 | /*-----------------------------------------------------------------------
|
|---|
| 80 | * Determine the overlapped boxes on this local processor.
|
|---|
| 81 | *-----------------------------------------------------------------------*/
|
|---|
| 82 | boxes = hypre_StructGridBoxes(hypre_StructVectorGrid(y));
|
|---|
| 83 | boxarray_size= hypre_BoxArraySize(boxes);
|
|---|
| 84 |
|
|---|
| 85 | /*-----------------------------------------------------------------------
|
|---|
| 86 | * To compute the inner product over this local processor, given a box,
|
|---|
| 87 | * the inner product between x & y is computed over the whole box and
|
|---|
| 88 | * over any overlapping between this box and overlap_boxes. The latter
|
|---|
| 89 | * result is subtracted from the former. Overlapping between more than
|
|---|
| 90 | * two boxes are handled.
|
|---|
| 91 | *-----------------------------------------------------------------------*/
|
|---|
| 92 | hypre_ForBoxI(i, boxes)
|
|---|
| 93 | {
|
|---|
| 94 | boxi = hypre_BoxArrayBox(boxes, i);
|
|---|
| 95 | start = hypre_BoxIMin(boxi);
|
|---|
| 96 |
|
|---|
| 97 | x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
|
|---|
| 98 | y_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i);
|
|---|
| 99 |
|
|---|
| 100 | xp = hypre_StructVectorBoxData(x, i);
|
|---|
| 101 | yp = hypre_StructVectorBoxData(y, i);
|
|---|
| 102 |
|
|---|
| 103 | hypre_BoxGetSize(boxi, loop_size);
|
|---|
| 104 |
|
|---|
| 105 | #ifdef HYPRE_USE_PTHREADS
|
|---|
| 106 | local_result_ref[threadid] = &local_result;
|
|---|
| 107 | #endif
|
|---|
| 108 |
|
|---|
| 109 | hypre_BoxLoop2Begin(loop_size,
|
|---|
| 110 | x_data_box, start, unit_stride, xi,
|
|---|
| 111 | y_data_box, start, unit_stride, yi);
|
|---|
| 112 |
|
|---|
| 113 | hypre_BoxLoop2For(loopi, loopj, loopk, xi, yi)
|
|---|
| 114 | {
|
|---|
| 115 | local_result += xp[xi] * yp[yi];
|
|---|
| 116 | }
|
|---|
| 117 | hypre_BoxLoop2End(xi, yi);
|
|---|
| 118 |
|
|---|
| 119 | /*--------------------------------------------------------------------
|
|---|
| 120 | * intersect all boxes from (i+1) to boxarray_size
|
|---|
| 121 | *--------------------------------------------------------------------*/
|
|---|
| 122 | overlap_boxes= hypre_BoxArrayCreate(0);
|
|---|
| 123 | for (j= (i+1); j< boxarray_size; j++)
|
|---|
| 124 | {
|
|---|
| 125 | boxj= hypre_BoxArrayBox(boxes, j);
|
|---|
| 126 | hypre_IntersectBoxes(boxi, boxj, &intersect_box);
|
|---|
| 127 |
|
|---|
| 128 | if (hypre_BoxVolume(&intersect_box))
|
|---|
| 129 | {
|
|---|
| 130 | hypre_AppendBox(&intersect_box, overlap_boxes);
|
|---|
| 131 | }
|
|---|
| 132 | }
|
|---|
| 133 |
|
|---|
| 134 | if (hypre_BoxArraySize(overlap_boxes) > 1)
|
|---|
| 135 | {
|
|---|
| 136 | hypre_UnionBoxes(overlap_boxes);
|
|---|
| 137 | }
|
|---|
| 138 |
|
|---|
| 139 | /*--------------------------------------------------------------------
|
|---|
| 140 | * compute inner product over overlap
|
|---|
| 141 | *--------------------------------------------------------------------*/
|
|---|
| 142 | if (hypre_BoxArraySize(overlap_boxes))
|
|---|
| 143 | {
|
|---|
| 144 | overlap_result= 0.0;
|
|---|
| 145 | hypre_ForBoxI(j, overlap_boxes)
|
|---|
| 146 | {
|
|---|
| 147 | boxj = hypre_BoxArrayBox(overlap_boxes, j);
|
|---|
| 148 | start = hypre_BoxIMin(boxj);
|
|---|
| 149 |
|
|---|
| 150 | hypre_BoxGetSize(boxj, loop_size);
|
|---|
| 151 | hypre_BoxLoop2Begin(loop_size,
|
|---|
| 152 | x_data_box, start, unit_stride, xi,
|
|---|
| 153 | y_data_box, start, unit_stride, yi);
|
|---|
| 154 |
|
|---|
| 155 | hypre_BoxLoop2For(loopi, loopj, loopk, xi, yi)
|
|---|
| 156 | {
|
|---|
| 157 | overlap_result += xp[xi] * yp[yi];
|
|---|
| 158 | }
|
|---|
| 159 | hypre_BoxLoop2End(xi, yi);
|
|---|
| 160 | }
|
|---|
| 161 | local_result-= overlap_result;
|
|---|
| 162 | }
|
|---|
| 163 | hypre_BoxArrayDestroy(overlap_boxes);
|
|---|
| 164 | }
|
|---|
| 165 |
|
|---|
| 166 | /*-----------------------------------------------------------------------
|
|---|
| 167 | * Determine the across processor overlap. The inner product is computed
|
|---|
| 168 | * and subtracted on processors that share the overlap except the one
|
|---|
| 169 | * with the lowest processor id. Therefore, on this processor, we need
|
|---|
| 170 | * to subtract only overlaps with boxes on processors with id < myid.
|
|---|
| 171 | *-----------------------------------------------------------------------*/
|
|---|
| 172 | boxes= hypre_BoxNeighborsBoxes(neighbors);
|
|---|
| 173 | selected_nboxes= hypre_BoxArrayCreate(0);
|
|---|
| 174 |
|
|---|
| 175 | /* extract only boxes on processors with ids < myid. */
|
|---|
| 176 | hypre_ForBoxI(i, boxes)
|
|---|
| 177 | {
|
|---|
| 178 | if (neighbors_procs[i] < myid)
|
|---|
| 179 | {
|
|---|
| 180 | hypre_AppendBox(hypre_BoxArrayBox(boxes, i), selected_nboxes);
|
|---|
| 181 | }
|
|---|
| 182 | }
|
|---|
| 183 |
|
|---|
| 184 | boxes= hypre_StructGridBoxes(hypre_StructVectorGrid(y));
|
|---|
| 185 | overlap_boxes= hypre_BoxArrayCreate(0);
|
|---|
| 186 | hypre_ForBoxI(i, boxes)
|
|---|
| 187 | {
|
|---|
| 188 | boxi= hypre_BoxArrayBox(boxes, i);
|
|---|
| 189 | hypre_ForBoxI(j, selected_nboxes)
|
|---|
| 190 | {
|
|---|
| 191 | boxj= hypre_BoxArrayBox(selected_nboxes, j);
|
|---|
| 192 |
|
|---|
| 193 | hypre_IntersectBoxes(boxi, boxj, &intersect_box);
|
|---|
| 194 | if (hypre_BoxVolume(&intersect_box))
|
|---|
| 195 | {
|
|---|
| 196 | hypre_AppendBox(&intersect_box, overlap_boxes);
|
|---|
| 197 | }
|
|---|
| 198 | }
|
|---|
| 199 | }
|
|---|
| 200 | hypre_BoxArrayDestroy(selected_nboxes);
|
|---|
| 201 |
|
|---|
| 202 | /*-----------------------------------------------------------------------
|
|---|
| 203 | * Union the overlap_boxes and then begin to compute and subtract chunks
|
|---|
| 204 | * and norms.
|
|---|
| 205 | *-----------------------------------------------------------------------*/
|
|---|
| 206 | if (hypre_BoxArraySize(overlap_boxes) > 1)
|
|---|
| 207 | {
|
|---|
| 208 | hypre_UnionBoxes(overlap_boxes);
|
|---|
| 209 | }
|
|---|
| 210 |
|
|---|
| 211 | if (hypre_BoxArraySize(overlap_boxes))
|
|---|
| 212 | {
|
|---|
| 213 | overlap_result= 0.0;
|
|---|
| 214 | hypre_ForBoxI(i, boxes)
|
|---|
| 215 | {
|
|---|
| 216 | boxi = hypre_BoxArrayBox(boxes, i);
|
|---|
| 217 |
|
|---|
| 218 | x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
|
|---|
| 219 | y_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i);
|
|---|
| 220 | xp = hypre_StructVectorBoxData(x, i);
|
|---|
| 221 | yp = hypre_StructVectorBoxData(y, i);
|
|---|
| 222 |
|
|---|
| 223 | tmp_box_array= hypre_BoxArrayCreate(0);
|
|---|
| 224 | hypre_ForBoxI(j, overlap_boxes)
|
|---|
| 225 | {
|
|---|
| 226 | boxj= hypre_BoxArrayBox(overlap_boxes, j);
|
|---|
| 227 |
|
|---|
| 228 | hypre_IntersectBoxes(boxi, boxj, &intersect_box);
|
|---|
| 229 | if (hypre_BoxVolume(&intersect_box))
|
|---|
| 230 | {
|
|---|
| 231 | start= hypre_BoxIMin(&intersect_box);
|
|---|
| 232 | hypre_BoxGetSize(&intersect_box, loop_size);
|
|---|
| 233 |
|
|---|
| 234 | hypre_BoxLoop2Begin(loop_size,
|
|---|
| 235 | x_data_box, start, unit_stride, xi,
|
|---|
| 236 | y_data_box, start, unit_stride, yi);
|
|---|
| 237 |
|
|---|
| 238 | hypre_BoxLoop2For(loopi, loopj, loopk, xi, yi)
|
|---|
| 239 | {
|
|---|
| 240 | overlap_result += xp[xi] * yp[yi];
|
|---|
| 241 | }
|
|---|
| 242 | hypre_BoxLoop2End(xi, yi);
|
|---|
| 243 |
|
|---|
| 244 | hypre_AppendBox(&intersect_box, tmp_box_array);
|
|---|
| 245 |
|
|---|
| 246 | } /* if (hypre_BoxVolume(&intersect_box)) */
|
|---|
| 247 | } /* hypre_ForBoxI(j, overlap_boxes) */
|
|---|
| 248 |
|
|---|
| 249 | /*-------------------------------------------------------------------------
|
|---|
| 250 | * Subtract the intersection boxes so that norm on higher degree overlaps
|
|---|
| 251 | * on this processor are computed only once.
|
|---|
| 252 | *-------------------------------------------------------------------------*/
|
|---|
| 253 | tmp2_box_array= hypre_BoxArrayCreate(0);
|
|---|
| 254 | hypre_SubtractBoxArrays(overlap_boxes, tmp_box_array, tmp2_box_array);
|
|---|
| 255 | hypre_BoxArrayDestroy(tmp_box_array);
|
|---|
| 256 | hypre_BoxArrayDestroy(tmp2_box_array);
|
|---|
| 257 |
|
|---|
| 258 | } /* hypre_ForBoxI(i, boxes) */
|
|---|
| 259 |
|
|---|
| 260 | local_result-= overlap_result;
|
|---|
| 261 | } /* if (hypre_BoxArraySize(overlap_boxes)) */
|
|---|
| 262 |
|
|---|
| 263 | hypre_BoxArrayDestroy(overlap_boxes);
|
|---|
| 264 |
|
|---|
| 265 | #ifdef HYPRE_USE_PTHREADS
|
|---|
| 266 | if (threadid != hypre_NumThreads)
|
|---|
| 267 | {
|
|---|
| 268 | for (i = 0; i < hypre_NumThreads; i++)
|
|---|
| 269 | process_result += *local_result_ref[i];
|
|---|
| 270 | }
|
|---|
| 271 | else
|
|---|
| 272 | process_result = *local_result_ref[threadid];
|
|---|
| 273 | #else
|
|---|
| 274 | process_result = local_result;
|
|---|
| 275 | #endif
|
|---|
| 276 |
|
|---|
| 277 |
|
|---|
| 278 | MPI_Allreduce(&process_result, &final_innerprod_result_overlap, 1,
|
|---|
| 279 | MPI_DOUBLE, MPI_SUM, hypre_StructVectorComm(x));
|
|---|
| 280 |
|
|---|
| 281 |
|
|---|
| 282 | #ifdef HYPRE_USE_PTHREADS
|
|---|
| 283 | if (threadid == 0 || threadid == hypre_NumThreads)
|
|---|
| 284 | #endif
|
|---|
| 285 | hypre_IncFLOPCount(2*hypre_StructVectorGlobalSize(x));
|
|---|
| 286 |
|
|---|
| 287 | return final_innerprod_result_overlap;
|
|---|
| 288 | }
|
|---|