#pragma TASS input
int NX_BOUND;
#pragma TASS input
int NY_BOUND;
#pragma TASS input
int TIME_BOUND;
#pragma TASS input {nx > 2 && nx < NX_BOUND}
int nx; // number of x coordinates (including boundary)
#pragma TASS input {ny > 2 && ny < NY_BOUND}
int ny; // number of rows including boundary
#pragma TASS input {epsilon > 0 && epsilon < 1}
double epsilon; // total error tolerance
#pragma TASS input
double initialValues[ny][nx]; // initial values
#pragma TASS output
int t;
#pragma TASS output
double out[ny][nx];
double grid[ny][nx]; // holds values of current iteration
double square(double x) { return x * x; }
void init() {
int row;
int col;
for (row=0; row<ny; row++)
for (col=0; col<nx; col++)
grid[row][col] = initialValues[row][col];
}
void write_result(int time, double data[][]) {
int row;
int col;
t = time;
for (row=ny-1; row>=0; row--)
for (col=0; col<nx; col++)
out[row][col] = data[row][col];
}
void main() {
double error = epsilon;
int row;
int col;
int time = 0;
double result;
double tmp[ny][nx];
init();
while (error >= epsilon && time < TIME_BOUND) {
error = 0.0;
for (row=1; row<ny-1; row++) {
for (col=1; col<nx-1; col++) {
tmp[row][col] = (grid[row-1][col]+grid[row+1][col]+
grid[row][col-1]+grid[row][col+1])/4.0;
result = square(grid[row][col] - tmp[row][col]);
error += result;
}
}
for (row=1; row<ny-1; row++)
for (col=1; col<nx-1; col++)
grid[row][col] = tmp[row][col];
time++;
}
write_result(time, grid);
}
#include<stdlib.h>
#include<mpi.h>
#pragma TASS input
int NX_BOUND;
#pragma TASS input
int NY_BOUND;
#pragma TASS input
int TIME_BOUND;
#pragma TASS input {nx > 2 && nx < NX_BOUND}
int nx; // number of x coordinates (including boundary)
#pragma TASS input {ny > 2 && ny < NY_BOUND}
int ny; // number of rows including boundary
#pragma TASS input {epsilon > 0 && epsilon < 1}
double epsilon; // total error tolerance
#pragma TASS input
double initialValues[ny][nx]; // initial values
#pragma TASS output
int t;
#pragma TASS output
double out[ny][nx];
int myrank;
int nprocs;
int upper;
int lower;
int firstRow;
int nyl;
int start;
int stop;
int new = 0; // flag to select which dimension of grid is the new grid
/* layout of local grid:
* row\col | 012=nx-1
* ---------+----
* nyl+1=2 | 678
* 1 | 345
* 0 | 012
* grid_data = (double*)malloc(nx*(nyl+2)*sizeof(double));
* grid = (double**)malloc((nyl+2)*sizeof(double*));
* i-th row, j-th column:
* grid[i][j] = grid_data[i*nx+j]
* grid[i] = grid_data + i*nx
*
* Now there are actually two grids (one old, one new), so
* need arrays of length 2 for both of above:
*
* (double*)[2] grid_data;
* (double**)[2] grid;
*/
double** grid[2];
double* grid_data[2];
double* buf;
double square(double x){
return x * x;
}
int owner(int row) { return (nprocs*(row+1)-1)/ny; }
int firstForProc(int rank) { return (rank*ny)/nprocs; }
int countForProc(int rank) {
int a;
int b;
a = firstForProc(rank+1);
b = firstForProc(rank);
return a-b;
}
void init() {
int tmp;
int row;
int col;
int s;
int i;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
for (i=0; i<ny; i++);
firstRow = firstForProc(myrank);
nyl = countForProc(myrank);
for(s=0; s<2; s++) {
grid_data[s] = (double *) malloc((nx*(nyl+2)) * sizeof(double));
grid[s] = (double **) malloc((nyl+2) * sizeof(double *));
for (i=0; i<nyl+2; i++) grid[s][i]=grid_data[s]+i*nx;
}
if (firstRow == 0 || nyl == 0)
lower = MPI_PROC_NULL;
else
lower = owner(firstRow-1);
if (firstRow+nyl >= ny || nyl == 0)
upper = MPI_PROC_NULL;
else
upper = owner(firstRow+nyl);
tmp = owner(0);
if (myrank == tmp) start = 2; else start = 1;
tmp = owner(ny-1);
if (myrank == tmp) stop = nyl-1; else stop=nyl;
for (row=1; row<=nyl; row++) {
for (col=0; col<nx; col++) {
grid[0][row][col] = initialValues[firstRow+row-1][col];
grid[1][row][col] = grid[0][row][col];
}
}
buf = (double *) malloc(nx * sizeof(double));
}
void cleanup() {
int s;
free(buf);
for (s=0; s<2; s++) {
free(grid[s]);
free(grid_data[s]);
}
}
void write_result(int time, double** data) {
int row;
int col;
int proc;
t = time;
if (myrank != 0) {
for (row=nyl; row>=1; row--)
MPI_Send(grid[new][row], nx, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
else {
for (row=ny-1; row>=0; row--) {
proc = owner(row);
if (proc != 0) {
MPI_Recv(buf, nx, MPI_DOUBLE, proc, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
for (col=0; col<nx; col++) out[row][col]=buf[col];
} else {
for (col=0; col<nx; col++) out[row][col]=grid[new][row+1][col];
}
}
}
}
void exchange() {
MPI_Sendrecv(grid[new][1], nx, MPI_DOUBLE, lower, 0,
grid[new][nyl+1], nx, MPI_DOUBLE, upper, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Sendrecv(grid[new][nyl], nx, MPI_DOUBLE, upper, 0,
grid[new][0], nx, MPI_DOUBLE, lower, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
/* returns local error */
double local_update() {
int row;
int col;
int old=1-new;
double error = 0.0;
double result;
for (row=start; row<=stop; row++) {
for (col=1; col<nx-1; col++) {
grid[new][row][col] = (grid[old][row-1][col]+grid[old][row+1][col]+
grid[old][row][col-1]+grid[old][row][col+1])/4.0;
result = square(grid[old][row][col] - grid[new][row][col]);
error += result;
}
}
return error;
}
void main() {
int argc;
char **argv;
double global_error = epsilon;
double error;
int time = 0;
MPI_Init(&argc, &argv);
init();
while (global_error >= epsilon && time < TIME_BOUND) {
exchange();
new = 1-new;
error = local_update();
MPI_Allreduce(&error, &global_error, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
time++;
}
write_result(time, grid[new]);
MPI_Finalize();
}