1-dimensional Diffusion Equation Solver

This example is a 1-dimensional diffusion solver written in MiniMP. We present a sequential version and a parallel version. TASS can successfully verify that the two versions are functionally equivalent. The interaction from running TASS using 4 processes for the implementation is given below.

Sequential Version

#include<stdlib.h>

#pragma TASS input 
int NX_BOUND;
#pragma TASS input 
int NSTEPS_BOUND;
#pragma TASS input {nx>=2 && nx<=NX_BOUND} 
int nx;
#pragma TASS input 
double u0[nx];
#pragma TASS input {nsteps>=0 && nsteps<=NSTEPS_BOUND} 
int nsteps;
#pragma TASS input  {dt>0}
double dt;
#pragma TASS input 
double kappa;

#pragma TASS output 
double data[nsteps+1][nx];

double u[nx];

void init() {
	int i;
	
	for (i=0; i<nx; i++) u[i] = u0[i];
}

void write(int time) {
	int i;
	
	for (i=0; i<nx; i++) data[time][i] = u[i];
}

void update() {
	double u_new[nx];
	int i;
	
	for (i=1; i<nx-1; i++) u_new[i] =  u[i] + kappa*(u[i+1] + u[i-1] - 2*u[i]);
	for (i=1; i<nx-1; i++) u[i] = u_new[i];
}

void main() {
	int iter;
	
	init();
	write(0);
	for (iter = 1; iter <= nsteps; iter++) {
		update();
		write(iter);
	}
}  	

Parallel Version

#include<stdlib.h>
#include<mpi.h>

#pragma TASS input 
int NX_BOUND;
#pragma TASS input 
int NSTEPS_BOUND;
#pragma TASS input {nx>=2 && nx<=NX_BOUND} 
int nx;
#pragma TASS input 
double u0[nx];
#pragma TASS input {nsteps>=0 && nsteps<=NSTEPS_BOUND} 
int nsteps;
#pragma TASS input  {dt>0}
double dt;
#pragma TASS input 
double kappa;

#pragma TASS output 
double data[nsteps+1][nx];

int myrank;
int nprocs;
int left;      /* rank of left neighbor */
int right;     /* rank of right neighbor on torus */
int nxl;       /* number cells on this process (excluding ghost cells) */
int first;     /* global index for local index 0 */
int start;     /* first local index to update */
int stop;      /* last local index to update */
double* buf;     /* temp. buffer used on proc 0 only */ 
double* v;       /* temperature function */
double* v_new;   /* temp. used to update u */

int owner(int index) { return (nprocs*(index+1)-1)/nx; }

int firstForProc(int rank) { return (rank*nx)/nprocs; }

int countForProc(int rank) {
	int a;
	int b;
	
	a = firstForProc(rank+1);
	b = firstForProc(rank);
	return a-b;
}

/* init: initializes global variables. */
void init() { 
	int i;
	int owner;
	int bufSize;
	
	for (i=0; i<nx; i++) ;
	first = firstForProc(myrank);  
	nxl = countForProc(myrank);
	if (first == 0 || nxl == 0)
		left = MPI_PROC_NULL;
	else
		left = owner(first-1);
	if (first+nxl >= nx || nxl == 0)
		right = MPI_PROC_NULL;
	else
		right = owner(first+nxl);  
	bufSize = nx/nprocs;
	/* if (nx%nprocs != 0) */ bufSize++;
	buf = (double *) malloc(bufSize * sizeof(double));
	v = (double *) malloc((nxl + 2) * sizeof(double));
	v_new = (double *) malloc((nxl + 2) * sizeof(double));
	for (i=1; i<=nxl; i++) v[i] = u0[first+i-1];
	owner = owner(0);
	if (myrank == owner) start=2; else start=1;
	owner = owner(nx-1);
	if (myrank == owner) stop=nxl-1; else stop=nxl;
}

void cleanup() {
	free(buf);
	free(v);
	free(v_new);
}

void write(int time) {
	int source;
	int i;
	int j;
	int count;
	
	if (myrank != 0) {
		MPI_Send(v+1, nxl, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
	} else { 
		j=0;
		for (source=0; source<nprocs; source++) {
			count = countForProc(source);
			if (source != 0)
				MPI_Recv(buf, count, MPI_DOUBLE, source, 0, MPI_COMM_WORLD, 
                                        MPI_STATUS_IGNORE);
			else
				for (i=0; i<count; i++) buf[i] = v[i+1];
			for (i=0; i<count; i++) {
				data[time][j] = buf[i];
				j++;
			}
		}
	}
}

/* exchange_ghost_cells: updates ghost cells using MPI communication */
void exchange_ghost_cells() {
    MPI_Send(v+1, 1, MPI_DOUBLE, left, 0, MPI_COMM_WORLD);
    MPI_Recv(v+(nxl+1), 1, MPI_DOUBLE, right, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    MPI_Send(v+nxl, 1, MPI_DOUBLE, right, 0, MPI_COMM_WORLD);
    MPI_Recv(v, 1, MPI_DOUBLE, left, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}

/* update: updates v.  Uses ghost cells.  Purely local operation.  Do not update the
 * first and last global cells: they are the constant boundary cells. */
void update() {
	int i;
	
	for (i=start; i<=stop; i++) v_new[i] = v[i] + kappa*(v[i+1] + v[i-1] - 2*v[i]);
	for (i=start; i<=stop; i++) v[i] = v_new[i];
}

/* main: executes simulation, creates one output file for each time step */
void main() {
	int argc;
	char **argv;
	int iter;
	
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
	init();
	write(0);
	for (iter = 1; iter <= nsteps; iter++) {
		exchange_ghost_cells();
		update();
		write(iter);
	}
	cleanup();
	MPI_Finalize();
}

TASS Interaction

 $ ./tass compare -np2=4 -reduce=urgent -inputNX_BOUND=10 -inputNSTEPS_BOUND=4 
   examples/diffusion/diffusion_seq.c examples/diffusion/diffusion_par.c
+----------------------------------------------------------------------+
|           TASS: Toolkit for Accurate Scientific Software             |
|  version 1.0_r1777 (2010-07-12)        http://vsl.cis.udel.edu/tass  |
+----------------------------------------------------------------------+
       specification : diffusion_seq (numProcs = 1)
      specSourceFile : examples/diffusion/diffusion_seq.c
      implementation : diffusion_par (numProcs = 4)
      implSourceFile : examples/diffusion/diffusion_par.c
                mode : COMPARE
              prover : CVC3
            deadlock : ABSOLUTE
           reduction : URGENT
            simplify : true
      simplifyProver : false
         bufferBound : 10
             verbose : false
         loop method : false
          repository : ./TASSREP
            frontend : MINIMP
          errorBound : 1
            NX_BOUND = 10
        NSTEPS_BOUND = 4

Comparing diffusion_seq and diffusion_par...

STATS:
   statesSeen    : 2292 (spec) + 62970 (impl) = 65262
   statesMatched :    0 (spec) +     0 (impl) =     0
   statesSaved   :  732 (spec) + 18570 (impl) = 19302
   transitions   : 2291 (spec) + 62925 (impl) = 65216
   numValues        : 6788
   numQueries       : 81
   numMessages      : 268
   numValidCalls    : 0
   numSimplifyCalls : 0
   time             : 11.156s.

RESULT: diffusion_seq and diffusion_par are functionally equivalent.