| 1 | /* FEVS: A Functional Equivalence Verification Suite for High-Performance
|
|---|
| 2 | * Scientific Computing
|
|---|
| 3 | *
|
|---|
| 4 | * Copyright (C) 2004-2010, Stephen F. Siegel, Timothy K. Zirkel,
|
|---|
| 5 | * University of Delaware, University of Massachusetts
|
|---|
| 6 | *
|
|---|
| 7 | * This program is free software; you can redistribute it and/or
|
|---|
| 8 | * modify it under the terms of the GNU General Public License as
|
|---|
| 9 | * published by the Free Software Foundation; either version 3 of the
|
|---|
| 10 | * License, or (at your option) any later version.
|
|---|
| 11 | *
|
|---|
| 12 | * This program is distributed in the hope that it will be useful, but
|
|---|
| 13 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|---|
| 15 | * General Public License for more details.
|
|---|
| 16 | *
|
|---|
| 17 | * You should have received a copy of the GNU General Public License
|
|---|
| 18 | * along with this program; if not, write to the Free Software
|
|---|
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|---|
| 20 | * 02110-1301 USA.
|
|---|
| 21 | */
|
|---|
| 22 |
|
|---|
| 23 |
|
|---|
| 24 | /*
|
|---|
| 25 | * Name: gausselim_par.c
|
|---|
| 26 | * Date: 21 Jul 2004
|
|---|
| 27 | * Revised: 24 Jul 2004
|
|---|
| 28 | * Author: Anastasia V. Mironova <mironova@laser.cs.umass.edu>
|
|---|
| 29 | * Author: Stephen Siegel <siegel@cs.umass.edu>
|
|---|
| 30 | * Maintainer: Anastasia V. Mironova <mironova@laser.cs.umass.edu>
|
|---|
| 31 | * Reader:
|
|---|
| 32 | *
|
|---|
| 33 | * Compile: mpicc gausselim_par.c
|
|---|
| 34 | * Run: mpirun -np m a.out n m A[0,0] A[0,1] ... A[n-1,m-1]
|
|---|
| 35 | *
|
|---|
| 36 | * n : number of rows in matrix
|
|---|
| 37 | * m : number of columns in matrix
|
|---|
| 38 | * A[0,0] .. A[n-1,m-1] : entries of matrix (doubles)
|
|---|
| 39 | *
|
|---|
| 40 | * Description: This is a parallel implementation of the Gauss-Jordan
|
|---|
| 41 | * elimination algorithm. The input is an n x m matrix A of double
|
|---|
| 42 | * precision floating point numbers. At termination, A has been
|
|---|
| 43 | * placed in reduced row-echelon form, where the rows are stored on
|
|---|
| 44 | * different processes. This implementation is based on the sequential
|
|---|
| 45 | * algorithm described in many places; see for example Howard Anton,
|
|---|
| 46 | * Elementary Linear Algebra, Wiley, 1977, Section 1.2.In this
|
|---|
| 47 | * implementation a modification to this algorithm has been made to
|
|---|
| 48 | * perform backward subsitution together with the process of reduction
|
|---|
| 49 | * to row-echelon form.
|
|---|
| 50 | *
|
|---|
| 51 | * The rows of the matrix are distributed among the processes so that
|
|---|
| 52 | * the process of rank i has the i-th row. For example, if A is the
|
|---|
| 53 | * 2x3 matrix
|
|---|
| 54 | *
|
|---|
| 55 | * A[0,0] A[0,1] A[0,2]
|
|---|
| 56 | * A[1,0] A[1,1] A[1,2]
|
|---|
| 57 | *
|
|---|
| 58 | * process 0 has A[0,0], A[0,1], A[0,2], and process 1 has A[1,0],
|
|---|
| 59 | * A[1,1], A[1,2]. Naturally, the number of processes must equal the
|
|---|
| 60 | * number of rows.
|
|---|
| 61 | *
|
|---|
| 62 | * Once the computation has taken place, the matrix is in the reduced
|
|---|
| 63 | * row-echelon form and distributed among the processes so that the
|
|---|
| 64 | * process with the lowest rank has the row with the left-most pivot
|
|---|
| 65 | * entry.
|
|---|
| 66 | */
|
|---|
| 67 |
|
|---|
| 68 | #include "mpi.h"
|
|---|
| 69 | #include <stdio.h>
|
|---|
| 70 | #include <string.h>
|
|---|
| 71 | #include <stdlib.h>
|
|---|
| 72 |
|
|---|
| 73 | /* Prints the given matrix of doubles to stdout. The parameter numRows
|
|---|
| 74 | * gives the number of rows in the matrix, and numCols the number of
|
|---|
| 75 | * columns. This function uses MPI_Send and MPI_Recv to send all the
|
|---|
| 76 | * rows to process 0, which receives them in the proper order and
|
|---|
| 77 | * prints them out. The string message is printed out by process 0
|
|---|
| 78 | * first.
|
|---|
| 79 | */
|
|---|
| 80 | void printMatrix(char* message, double* matrix, int numRows, int numCols) {
|
|---|
| 81 | int i,j;
|
|---|
| 82 | int rank;
|
|---|
| 83 | MPI_Status status;
|
|---|
| 84 |
|
|---|
| 85 | MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|---|
| 86 | if (rank == 0) {
|
|---|
| 87 | double* row = (double*)malloc(numCols * sizeof(double));
|
|---|
| 88 |
|
|---|
| 89 | printf("%s",message);
|
|---|
| 90 | for (i = 0; i < numRows; i++) {
|
|---|
| 91 | if (i == 0) {
|
|---|
| 92 | for (j = 0; j < numCols; j++) {
|
|---|
| 93 | row[j] = matrix[j];
|
|---|
| 94 | }
|
|---|
| 95 | }
|
|---|
| 96 | else {
|
|---|
| 97 | MPI_Recv(row, numCols, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status);
|
|---|
| 98 | }
|
|---|
| 99 | for (j = 0; j < numCols; j++) {
|
|---|
| 100 | printf("%lf ", row[j]);
|
|---|
| 101 | }
|
|---|
| 102 | printf("\n");
|
|---|
| 103 | }
|
|---|
| 104 | fprintf(stdout, "\n");
|
|---|
| 105 | fflush(stdout);
|
|---|
| 106 | free(row);
|
|---|
| 107 | }
|
|---|
| 108 | else {
|
|---|
| 109 | MPI_Send(matrix, numCols, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
|
|---|
| 110 | }
|
|---|
| 111 | }
|
|---|
| 112 |
|
|---|
| 113 |
|
|---|
| 114 | /* Performs Gaussian elimination on the given matrix of doubles. The
|
|---|
| 115 | * parameter numRows gives the number of rows in the matrix, and
|
|---|
| 116 | * numCols the number of columns. Upon return, the matrix will be in
|
|---|
| 117 | * reduced row-echelon form.
|
|---|
| 118 | */
|
|---|
| 119 | int gausselim(double* matrix, int numRows, int numCols, int debug) {
|
|---|
| 120 | int top = 0; // the current top row of the matrix
|
|---|
| 121 | int col = 0; // column index of the current pivot
|
|---|
| 122 | int pivotRow = 0; // row index of current pivot
|
|---|
| 123 | double pivot = 0.0; // the value of the current pivot
|
|---|
| 124 | int j = 0; // loop variable over columns of matrix
|
|---|
| 125 | double tmp = 0.0; // temporary double variable
|
|---|
| 126 | MPI_Status status; // status object needed for receives
|
|---|
| 127 | int rank; // rank of this process
|
|---|
| 128 | int nprocs; // number of processes
|
|---|
| 129 | double* toprow = (double*)malloc(numCols * sizeof(double));
|
|---|
| 130 |
|
|---|
| 131 | MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
|---|
| 132 | MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|---|
| 133 |
|
|---|
| 134 | for (top=col=0; top<numRows && col< numCols; top++, col++) {
|
|---|
| 135 |
|
|---|
| 136 | /* At this point we know that the submatrix consisting of the
|
|---|
| 137 | * first top rows of A is in reduced row-echelon form. We will now
|
|---|
| 138 | * consider the submatrix B consisting of the remaining rows. We
|
|---|
| 139 | * know, additionally, that the first col columns of B are
|
|---|
| 140 | * all zero.
|
|---|
| 141 | */
|
|---|
| 142 |
|
|---|
| 143 | if (debug && rank == 0) {
|
|---|
| 144 | printf("Top: %d\n", top);
|
|---|
| 145 | }
|
|---|
| 146 |
|
|---|
| 147 | /* Step 1: Locate the leftmost column of B that does not consist
|
|---|
| 148 | * of all zeros, if one exists. The top nonzero entry of this
|
|---|
| 149 | * column is the pivot. */
|
|---|
| 150 |
|
|---|
| 151 | for (; col < numCols; col++) {
|
|---|
| 152 | if (matrix[col] != 0.0 && rank >= top) {
|
|---|
| 153 | MPI_Allreduce(&rank, &pivotRow, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
|
|---|
| 154 | }
|
|---|
| 155 | else {
|
|---|
| 156 | MPI_Allreduce(&nprocs, &pivotRow, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
|
|---|
| 157 | }
|
|---|
| 158 | if (pivotRow < nprocs){
|
|---|
| 159 | break;
|
|---|
| 160 | }
|
|---|
| 161 | }
|
|---|
| 162 |
|
|---|
| 163 | if (col >= numCols) {
|
|---|
| 164 | break;
|
|---|
| 165 | }
|
|---|
| 166 |
|
|---|
| 167 | if (debug) {
|
|---|
| 168 | if (rank == 0) {
|
|---|
| 169 | printf("Step 1 result: col=%d, pivotRow=%d\n\n", col, pivotRow);
|
|---|
| 170 | }
|
|---|
| 171 | }
|
|---|
| 172 |
|
|---|
| 173 | /* At this point we are guaranteed that pivot = A[pivotRow,col] is
|
|---|
| 174 | * nonzero. We also know that all the columns of B to the left of
|
|---|
| 175 | * col consist entirely of zeros. */
|
|---|
| 176 |
|
|---|
| 177 | /* Step 2: Interchange the top row with the pivot row, if
|
|---|
| 178 | * necessary, so that the entry at the top of the column found in
|
|---|
| 179 | * Step 1 is nonzero. */
|
|---|
| 180 |
|
|---|
| 181 | if (pivotRow != top) {
|
|---|
| 182 | if (rank == top) {
|
|---|
| 183 | MPI_Sendrecv_replace(matrix, numCols, MPI_DOUBLE, pivotRow, 0,
|
|---|
| 184 | pivotRow, 0, MPI_COMM_WORLD, &status);
|
|---|
| 185 | }
|
|---|
| 186 | else if (rank == pivotRow) {
|
|---|
| 187 | MPI_Sendrecv_replace(matrix, numCols, MPI_DOUBLE, top, 0,
|
|---|
| 188 | top, 0, MPI_COMM_WORLD, &status);
|
|---|
| 189 | }
|
|---|
| 190 | }
|
|---|
| 191 |
|
|---|
| 192 | if (rank == top) {
|
|---|
| 193 | pivot = matrix[col];
|
|---|
| 194 | }
|
|---|
| 195 |
|
|---|
| 196 | if (debug) {
|
|---|
| 197 | printMatrix("Step 2 result: \n", matrix, numRows, numCols);
|
|---|
| 198 | }
|
|---|
| 199 |
|
|---|
| 200 | /* At this point we are guaranteed that A[top,col] = pivot is
|
|---|
| 201 | * nonzero. Also, we know that (i>=top and j<col) implies
|
|---|
| 202 | * A[i,j] = 0. */
|
|---|
| 203 |
|
|---|
| 204 | /* Step 3: Divide the top row by pivot in order to introduce a
|
|---|
| 205 | * leading 1. */
|
|---|
| 206 |
|
|---|
| 207 | if (rank == top) {
|
|---|
| 208 | for (j = col; j < numCols; j++) {
|
|---|
| 209 | matrix[j] /= pivot;
|
|---|
| 210 | toprow[j] = matrix[j];
|
|---|
| 211 | }
|
|---|
| 212 | }
|
|---|
| 213 |
|
|---|
| 214 | if (debug) {
|
|---|
| 215 | printMatrix("Step 3 result:\n", matrix, numRows, numCols);
|
|---|
| 216 | }
|
|---|
| 217 |
|
|---|
| 218 | /* At this point we are guaranteed that A[top,col] is 1.0,
|
|---|
| 219 | * assuming that floating point arithmetic guarantees that a/a
|
|---|
| 220 | * equals 1.0 for any nonzero double a. */
|
|---|
| 221 |
|
|---|
| 222 | MPI_Bcast(toprow, numCols, MPI_DOUBLE, top, MPI_COMM_WORLD);
|
|---|
| 223 |
|
|---|
| 224 | /* Step 4: Add suitable multiples of the top row to rows below so
|
|---|
| 225 | * that all entries below the leading 1 become zero. */
|
|---|
| 226 |
|
|---|
| 227 | if (rank != top) {
|
|---|
| 228 | tmp = matrix[col];
|
|---|
| 229 | for (j = col; j < numCols; j++) {
|
|---|
| 230 | matrix[j] -= toprow[j]*tmp;
|
|---|
| 231 | }
|
|---|
| 232 | }
|
|---|
| 233 |
|
|---|
| 234 | if (debug) {
|
|---|
| 235 | printMatrix("Step 4 result: \n", matrix, numRows, numCols);
|
|---|
| 236 | }
|
|---|
| 237 | }
|
|---|
| 238 | free(toprow);
|
|---|
| 239 | }
|
|---|
| 240 |
|
|---|
| 241 |
|
|---|
| 242 |
|
|---|
| 243 | /*
|
|---|
| 244 | * Usage: mpirun -np m a.out debug n m A[0,0] A[0,1] ... A[n-1,m-1]
|
|---|
| 245 | *
|
|---|
| 246 | * debug : debug flag, triggers output to screen at every step if non-zero
|
|---|
| 247 | * n : number of rows in matrix
|
|---|
| 248 | * m : number of columns in matrix
|
|---|
| 249 | * A[0,0] .. A[n-1,m-1] : entries of matrix (doubles)
|
|---|
| 250 | *
|
|---|
| 251 | * Computes reduced row-echelon form and prints intermediate steps.
|
|---|
| 252 | */
|
|---|
| 253 | int main(int argc, char** argv) {
|
|---|
| 254 | int numRows;
|
|---|
| 255 | int numCols;
|
|---|
| 256 | int j;
|
|---|
| 257 | double* matrix;
|
|---|
| 258 | int rank, nprocs;
|
|---|
| 259 | int debug = 0;
|
|---|
| 260 |
|
|---|
| 261 | MPI_Init(&argc, &argv);
|
|---|
| 262 | MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
|---|
| 263 | MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|---|
| 264 | if (argc < 3) {
|
|---|
| 265 | if (rank == 0) {
|
|---|
| 266 | printf("Too few arguments.\n");
|
|---|
| 267 | printf("Usage: ges [-debug] n m A[0,0] A[0,1] ... A[n-1,m-1]\n");
|
|---|
| 268 | printf(" n : number of rows in matrix\n");
|
|---|
| 269 | printf(" m : number of columns in matrix\n");
|
|---|
| 270 | printf(" A[0,0] .. A[n-1,m-1] : entries of matrix (doubles)\n");
|
|---|
| 271 | }
|
|---|
| 272 | exit(1);
|
|---|
| 273 | }
|
|---|
| 274 | if (!strcmp("-debug", argv[1])) {
|
|---|
| 275 | debug = 1;
|
|---|
| 276 | }
|
|---|
| 277 | sscanf(argv[1+debug], "%d", &numRows);
|
|---|
| 278 | sscanf(argv[2+debug], "%d", &numCols);
|
|---|
| 279 | if (argc != 3 + debug + numRows*numCols) {
|
|---|
| 280 | if (rank == 0) {
|
|---|
| 281 | printf("Incorrect number of matrix entries: %d expected, %d given.\n",
|
|---|
| 282 | numRows*numCols, argc-3-debug);
|
|---|
| 283 | }
|
|---|
| 284 | exit(1);
|
|---|
| 285 | }
|
|---|
| 286 | if (nprocs != numRows) {
|
|---|
| 287 | if (rank == 0) {
|
|---|
| 288 | printf("Number of processes must equal the number of rows: %d != %d \n",
|
|---|
| 289 | nprocs, numRows);
|
|---|
| 290 | }
|
|---|
| 291 | exit(1);
|
|---|
| 292 | }
|
|---|
| 293 | matrix = (double *) malloc(numCols * sizeof(double));
|
|---|
| 294 | for (j = 0; j < numCols; j++) {
|
|---|
| 295 | sscanf(argv[rank*numCols+j+3+debug], "%lf", &matrix[j]);
|
|---|
| 296 | }
|
|---|
| 297 | printMatrix("Original matrix:\n", matrix, numRows, numCols);
|
|---|
| 298 | gausselim(matrix, numRows, numCols, debug);
|
|---|
| 299 | printMatrix("Reduced row-echelon form:\n", matrix, numRows, numCols);
|
|---|
| 300 | MPI_Finalize();
|
|---|
| 301 | }
|
|---|