source: CIVL/mods/dev.civl.abc/examples/fevs/gausselim/gausselim_rowdist.c

main
Last change on this file was aad342c, checked in by Stephen Siegel <siegel@…>, 3 years ago

Performing huge refactor to incorporate ABC, GMC, and SARL into CIVL repo and use Java modules.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5664 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 9.4 KB
Line 
1/* FEVS: A Functional Equivalence Verification Suite for High-Performance
2 * Scientific Computing
3 *
4 * Copyright (C) 2004-2010, Stephen F. Siegel, Timothy K. Zirkel,
5 * University of Delaware, University of Massachusetts
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 3 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 * 02110-1301 USA.
21 */
22
23
24/*
25 * Name: gausselim_par.c
26 * Date: 21 Jul 2004
27 * Revised: 24 Jul 2004
28 * Author: Anastasia V. Mironova <mironova@laser.cs.umass.edu>
29 * Author: Stephen Siegel <siegel@cs.umass.edu>
30 * Maintainer: Anastasia V. Mironova <mironova@laser.cs.umass.edu>
31 * Reader:
32 *
33 * Compile: mpicc gausselim_par.c
34 * Run: mpirun -np m a.out n m A[0,0] A[0,1] ... A[n-1,m-1]
35 *
36 * n : number of rows in matrix
37 * m : number of columns in matrix
38 * A[0,0] .. A[n-1,m-1] : entries of matrix (doubles)
39 *
40 * Description: This is a parallel implementation of the Gauss-Jordan
41 * elimination algorithm. The input is an n x m matrix A of double
42 * precision floating point numbers. At termination, A has been
43 * placed in reduced row-echelon form, where the rows are stored on
44 * different processes. This implementation is based on the sequential
45 * algorithm described in many places; see for example Howard Anton,
46 * Elementary Linear Algebra, Wiley, 1977, Section 1.2.In this
47 * implementation a modification to this algorithm has been made to
48 * perform backward subsitution together with the process of reduction
49 * to row-echelon form.
50 *
51 * The rows of the matrix are distributed among the processes so that
52 * the process of rank i has the i-th row. For example, if A is the
53 * 2x3 matrix
54 *
55 * A[0,0] A[0,1] A[0,2]
56 * A[1,0] A[1,1] A[1,2]
57 *
58 * process 0 has A[0,0], A[0,1], A[0,2], and process 1 has A[1,0],
59 * A[1,1], A[1,2]. Naturally, the number of processes must equal the
60 * number of rows.
61 *
62 * Once the computation has taken place, the matrix is in the reduced
63 * row-echelon form and distributed among the processes so that the
64 * process with the lowest rank has the row with the left-most pivot
65 * entry.
66 */
67
68#include "mpi.h"
69#include <stdio.h>
70#include <string.h>
71#include <stdlib.h>
72
73/* Prints the given matrix of doubles to stdout. The parameter numRows
74 * gives the number of rows in the matrix, and numCols the number of
75 * columns. This function uses MPI_Send and MPI_Recv to send all the
76 * rows to process 0, which receives them in the proper order and
77 * prints them out. The string message is printed out by process 0
78 * first.
79 */
80void printMatrix(char* message, double* matrix, int numRows, int numCols) {
81 int i,j;
82 int rank;
83 MPI_Status status;
84
85 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
86 if (rank == 0) {
87 double* row = (double*)malloc(numCols * sizeof(double));
88
89 printf("%s",message);
90 for (i = 0; i < numRows; i++) {
91 if (i == 0) {
92 for (j = 0; j < numCols; j++) {
93 row[j] = matrix[j];
94 }
95 }
96 else {
97 MPI_Recv(row, numCols, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status);
98 }
99 for (j = 0; j < numCols; j++) {
100 printf("%lf ", row[j]);
101 }
102 printf("\n");
103 }
104 fprintf(stdout, "\n");
105 fflush(stdout);
106 free(row);
107 }
108 else {
109 MPI_Send(matrix, numCols, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
110 }
111}
112
113
114/* Performs Gaussian elimination on the given matrix of doubles. The
115 * parameter numRows gives the number of rows in the matrix, and
116 * numCols the number of columns. Upon return, the matrix will be in
117 * reduced row-echelon form.
118 */
119int gausselim(double* matrix, int numRows, int numCols, int debug) {
120 int top = 0; // the current top row of the matrix
121 int col = 0; // column index of the current pivot
122 int pivotRow = 0; // row index of current pivot
123 double pivot = 0.0; // the value of the current pivot
124 int j = 0; // loop variable over columns of matrix
125 double tmp = 0.0; // temporary double variable
126 MPI_Status status; // status object needed for receives
127 int rank; // rank of this process
128 int nprocs; // number of processes
129 double* toprow = (double*)malloc(numCols * sizeof(double));
130
131 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
132 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
133
134 for (top=col=0; top<numRows && col< numCols; top++, col++) {
135
136 /* At this point we know that the submatrix consisting of the
137 * first top rows of A is in reduced row-echelon form. We will now
138 * consider the submatrix B consisting of the remaining rows. We
139 * know, additionally, that the first col columns of B are
140 * all zero.
141 */
142
143 if (debug && rank == 0) {
144 printf("Top: %d\n", top);
145 }
146
147 /* Step 1: Locate the leftmost column of B that does not consist
148 * of all zeros, if one exists. The top nonzero entry of this
149 * column is the pivot. */
150
151 for (; col < numCols; col++) {
152 if (matrix[col] != 0.0 && rank >= top) {
153 MPI_Allreduce(&rank, &pivotRow, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
154 }
155 else {
156 MPI_Allreduce(&nprocs, &pivotRow, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
157 }
158 if (pivotRow < nprocs){
159 break;
160 }
161 }
162
163 if (col >= numCols) {
164 break;
165 }
166
167 if (debug) {
168 if (rank == 0) {
169 printf("Step 1 result: col=%d, pivotRow=%d\n\n", col, pivotRow);
170 }
171 }
172
173 /* At this point we are guaranteed that pivot = A[pivotRow,col] is
174 * nonzero. We also know that all the columns of B to the left of
175 * col consist entirely of zeros. */
176
177 /* Step 2: Interchange the top row with the pivot row, if
178 * necessary, so that the entry at the top of the column found in
179 * Step 1 is nonzero. */
180
181 if (pivotRow != top) {
182 if (rank == top) {
183 MPI_Sendrecv_replace(matrix, numCols, MPI_DOUBLE, pivotRow, 0,
184 pivotRow, 0, MPI_COMM_WORLD, &status);
185 }
186 else if (rank == pivotRow) {
187 MPI_Sendrecv_replace(matrix, numCols, MPI_DOUBLE, top, 0,
188 top, 0, MPI_COMM_WORLD, &status);
189 }
190 }
191
192 if (rank == top) {
193 pivot = matrix[col];
194 }
195
196 if (debug) {
197 printMatrix("Step 2 result: \n", matrix, numRows, numCols);
198 }
199
200 /* At this point we are guaranteed that A[top,col] = pivot is
201 * nonzero. Also, we know that (i>=top and j<col) implies
202 * A[i,j] = 0. */
203
204 /* Step 3: Divide the top row by pivot in order to introduce a
205 * leading 1. */
206
207 if (rank == top) {
208 for (j = col; j < numCols; j++) {
209 matrix[j] /= pivot;
210 toprow[j] = matrix[j];
211 }
212 }
213
214 if (debug) {
215 printMatrix("Step 3 result:\n", matrix, numRows, numCols);
216 }
217
218 /* At this point we are guaranteed that A[top,col] is 1.0,
219 * assuming that floating point arithmetic guarantees that a/a
220 * equals 1.0 for any nonzero double a. */
221
222 MPI_Bcast(toprow, numCols, MPI_DOUBLE, top, MPI_COMM_WORLD);
223
224 /* Step 4: Add suitable multiples of the top row to rows below so
225 * that all entries below the leading 1 become zero. */
226
227 if (rank != top) {
228 tmp = matrix[col];
229 for (j = col; j < numCols; j++) {
230 matrix[j] -= toprow[j]*tmp;
231 }
232 }
233
234 if (debug) {
235 printMatrix("Step 4 result: \n", matrix, numRows, numCols);
236 }
237 }
238 free(toprow);
239}
240
241
242
243/*
244 * Usage: mpirun -np m a.out debug n m A[0,0] A[0,1] ... A[n-1,m-1]
245 *
246 * debug : debug flag, triggers output to screen at every step if non-zero
247 * n : number of rows in matrix
248 * m : number of columns in matrix
249 * A[0,0] .. A[n-1,m-1] : entries of matrix (doubles)
250 *
251 * Computes reduced row-echelon form and prints intermediate steps.
252 */
253int main(int argc, char** argv) {
254 int numRows;
255 int numCols;
256 int j;
257 double* matrix;
258 int rank, nprocs;
259 int debug = 0;
260
261 MPI_Init(&argc, &argv);
262 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
263 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
264 if (argc < 3) {
265 if (rank == 0) {
266 printf("Too few arguments.\n");
267 printf("Usage: ges [-debug] n m A[0,0] A[0,1] ... A[n-1,m-1]\n");
268 printf(" n : number of rows in matrix\n");
269 printf(" m : number of columns in matrix\n");
270 printf(" A[0,0] .. A[n-1,m-1] : entries of matrix (doubles)\n");
271 }
272 exit(1);
273 }
274 if (!strcmp("-debug", argv[1])) {
275 debug = 1;
276 }
277 sscanf(argv[1+debug], "%d", &numRows);
278 sscanf(argv[2+debug], "%d", &numCols);
279 if (argc != 3 + debug + numRows*numCols) {
280 if (rank == 0) {
281 printf("Incorrect number of matrix entries: %d expected, %d given.\n",
282 numRows*numCols, argc-3-debug);
283 }
284 exit(1);
285 }
286 if (nprocs != numRows) {
287 if (rank == 0) {
288 printf("Number of processes must equal the number of rows: %d != %d \n",
289 nprocs, numRows);
290 }
291 exit(1);
292 }
293 matrix = (double *) malloc(numCols * sizeof(double));
294 for (j = 0; j < numCols; j++) {
295 sscanf(argv[rank*numCols+j+3+debug], "%lf", &matrix[j]);
296 }
297 printMatrix("Original matrix:\n", matrix, numRows, numCols);
298 gausselim(matrix, numRows, numCols, debug);
299 printMatrix("Reduced row-echelon form:\n", matrix, numRows, numCols);
300 MPI_Finalize();
301}
Note: See TracBrowser for help on using the repository browser.