Laplace Equation Solver

This example is a Laplace solver written in MiniMP. We present a sequential version and a parallel version. TASS can successfully verify that the two versions are functionally equivalent. The interaction from running TASS using 3 processes for the implementation is given below.

Sequential Version

#pragma TASS input 
int NX_BOUND;
#pragma TASS input 
int NY_BOUND;
#pragma TASS input 
int TIME_BOUND;
#pragma TASS input {nx > 2 && nx < NX_BOUND} 
int nx;  // number of x coordinates (including boundary)
#pragma TASS input {ny > 2 && ny < NY_BOUND} 
int ny;  // number of rows including boundary
#pragma TASS input {epsilon > 0 && epsilon < 1} 
double epsilon; // total error tolerance
#pragma TASS input 
double initialValues[ny][nx]; // initial values
#pragma TASS output 
int t;
#pragma TASS output 
double out[ny][nx];

double grid[ny][nx];	// holds values of current iteration

double square(double x) { return x * x; }

void init() {
	int row;
	int col;
	
	for (row=0; row<ny; row++)
		for (col=0; col<nx; col++)
			grid[row][col] = initialValues[row][col];
}

void write_result(int time, double data[][]) {
	int row;
	int col;
	
	t = time;
	for (row=ny-1; row>=0; row--)
		for (col=0; col<nx; col++)
			out[row][col] = data[row][col];
}

void main() {
	double error = epsilon;
	int row;
	int col;
	int time = 0;
	double result;
	double tmp[ny][nx];
	
	init();
	while (error >= epsilon && time < TIME_BOUND) {
		error = 0.0;
		for (row=1; row<ny-1; row++) {
			for (col=1; col<nx-1; col++) {
				tmp[row][col] = (grid[row-1][col]+grid[row+1][col]+
                                        grid[row][col-1]+grid[row][col+1])/4.0;
				result = square(grid[row][col] - tmp[row][col]);
				error += result;
			}
		}
		for (row=1; row<ny-1; row++)
			for (col=1; col<nx-1; col++)
				grid[row][col] = tmp[row][col];
		time++;
	}
	write_result(time, grid);
}       

Parallel Version

#include<stdlib.h>
#include<mpi.h>

#pragma TASS input 
int NX_BOUND;
#pragma TASS input 
int NY_BOUND;
#pragma TASS input 
int TIME_BOUND;
#pragma TASS input {nx > 2 && nx < NX_BOUND} 
int nx;  // number of x coordinates (including boundary)
#pragma TASS input {ny > 2 && ny < NY_BOUND} 
int ny;  // number of rows including boundary
#pragma TASS input {epsilon > 0 && epsilon < 1} 
double epsilon; // total error tolerance
#pragma TASS input 
double initialValues[ny][nx]; // initial values
#pragma TASS output 
int t;
#pragma TASS output 
double out[ny][nx];

int myrank;
int nprocs;
int upper;
int lower;
int firstRow;
int nyl;
int start;
int stop;
int new = 0;   // flag to select which dimension of grid is the new grid
/* layout of local grid:
 *   row\col | 012=nx-1
 *  ---------+----
 *   nyl+1=2 | 678
 *         1 | 345
 *         0 | 012
 * grid_data = (double*)malloc(nx*(nyl+2)*sizeof(double));
 * grid = (double**)malloc((nyl+2)*sizeof(double*));
 * i-th row, j-th column:
 * grid[i][j] = grid_data[i*nx+j]
 * grid[i] = grid_data + i*nx
 *
 * Now there are actually two grids (one old, one new), so
 * need arrays of length 2 for both of above:
 *
 * (double*)[2] grid_data;
 * (double**)[2] grid;
 */
double** grid[2];
double* grid_data[2];
double* buf;

double square(double x){
	return x * x;
}

int owner(int row) { return (nprocs*(row+1)-1)/ny; }

int firstForProc(int rank) { return (rank*ny)/nprocs; }

int countForProc(int rank) {
	int a;
	int b;
	
	a = firstForProc(rank+1);
	b = firstForProc(rank);
	return a-b;
}

void init() {
	int tmp;
	int row;
	int col;
	int s;
	int i;
	
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	for (i=0; i<ny; i++);
	firstRow = firstForProc(myrank);
	nyl = countForProc(myrank);
	for(s=0; s<2; s++) {
		grid_data[s] = (double *) malloc((nx*(nyl+2)) * sizeof(double));
		grid[s] = (double **) malloc((nyl+2) * sizeof(double *));
		for (i=0; i<nyl+2; i++) grid[s][i]=grid_data[s]+i*nx;
	}
	if (firstRow == 0 || nyl == 0)
		lower = MPI_PROC_NULL;
	else
		lower = owner(firstRow-1);
	if (firstRow+nyl >= ny || nyl == 0)
		upper = MPI_PROC_NULL;
	else
		upper = owner(firstRow+nyl);
	tmp = owner(0);
	if (myrank == tmp) start = 2; else start = 1;
	tmp = owner(ny-1);
	if (myrank == tmp) stop = nyl-1; else stop=nyl;
	for (row=1; row<=nyl; row++) {
		for (col=0; col<nx; col++) {
			grid[0][row][col] = initialValues[firstRow+row-1][col];
			grid[1][row][col] = grid[0][row][col];
		}
	}
	buf = (double *) malloc(nx * sizeof(double));
}

void cleanup() {
	int s;
	
	free(buf);
	for (s=0; s<2; s++) {
		free(grid[s]);
		free(grid_data[s]);
	}
}

void write_result(int time, double** data) {
	int row;
	int col;
	int proc;
	
	t = time;
	if (myrank != 0) {
		for (row=nyl; row>=1; row--)
			MPI_Send(grid[new][row], nx, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
	}
	else {
		for (row=ny-1; row>=0; row--) {
			proc = owner(row);
			if (proc != 0) {
				MPI_Recv(buf, nx, MPI_DOUBLE, proc, 0, MPI_COMM_WORLD, 
                                        MPI_STATUS_IGNORE);
				for (col=0; col<nx; col++) out[row][col]=buf[col];
			} else {
				for (col=0; col<nx; col++) out[row][col]=grid[new][row+1][col];
			}
		}
	}
}

void exchange() {
	MPI_Sendrecv(grid[new][1], nx, MPI_DOUBLE, lower, 0, 
                grid[new][nyl+1], nx, MPI_DOUBLE, upper, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	MPI_Sendrecv(grid[new][nyl], nx, MPI_DOUBLE, upper, 0, 
                grid[new][0], nx, MPI_DOUBLE, lower, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}

/* returns local error */
double local_update() {
	int row;
	int col;
	int old=1-new;
	double error = 0.0;
	double result;
	
	for (row=start; row<=stop; row++) {
		for (col=1; col<nx-1; col++) {
			grid[new][row][col] = (grid[old][row-1][col]+grid[old][row+1][col]+
                                grid[old][row][col-1]+grid[old][row][col+1])/4.0;
			result = square(grid[old][row][col] - grid[new][row][col]);
			error += result;
		}
	}
	return error;
}

void main() {
	int argc;
	char **argv;
	double global_error = epsilon;
	double error;
	int time = 0;
	
	MPI_Init(&argc, &argv);
	init();
	while (global_error >= epsilon && time < TIME_BOUND) {
		exchange();
		new = 1-new;
		error = local_update();
		MPI_Allreduce(&error, &global_error, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		time++;
	}
	write_result(time, grid[new]);
	MPI_Finalize();
}       

TASS Interaction

	$ ./tass compare -reduce=urgent -inputNX_BOUND=4 -inputNY_BOUND=6 -inputTIME_BOUND=3 
	-np2=3 examples/laplace/laplaceSpec.c examples/laplace/laplaceImpl.c
+----------------------------------------------------------------------+
|           TASS: Toolkit for Accurate Scientific Software             |
|  version 1.0_r1777 (2010-07-12)        http://vsl.cis.udel.edu/tass  |
+----------------------------------------------------------------------+
       specification : laplaceSpec (numProcs = 1)
      specSourceFile : examples/laplace/laplaceSpec.c
      implementation : laplaceImpl (numProcs = 3)
      implSourceFile : examples/laplace/laplaceImpl.c
                mode : COMPARE
              prover : CVC3
            deadlock : ABSOLUTE
           reduction : URGENT
            simplify : true
      simplifyProver : false
         bufferBound : 10
             verbose : false
         loop method : false
          repository : ./TASSREP
            frontend : MINIMP
          errorBound : 1
            NX_BOUND = 4
            NY_BOUND = 6
          TIME_BOUND = 3

Comparing laplaceSpec and laplaceImpl...

STATS:
   statesSeen    : 904 (spec) + 11093 (impl) = 11997
   statesMatched :   0 (spec) +     0 (impl) =     0
   statesSaved   : 327 (spec) +  3432 (impl) =  3759
   transitions   : 903 (spec) + 11085 (impl) = 11988
   numValues        : 1916
   numQueries       : 135
   numMessages      : 76
   numValidCalls    : 0
   numSimplifyCalls : 0
   time             : 7.303s.

RESULT: laplaceSpec and laplaceImpl are functionally equivalent.