wiki:Examples

Sparse Matrix-Vector Multiplication

In this example, we verify the functional correctness of a matrix-vector multiplication routine where the matrix is represented in a sparse (Compressed Sparse Row) representation.

File csr.h:

#ifndef _CSR_H
#define _CSR_H
/* Compressed Sparse Row (CSR) sparse matrix representation. */

struct csr_matrix {
  double *val; // length NZ (number of non-0s)
  int *col_ind; // length NZ
  int *row_ptr; // length N+1.  row_ptr[N]=NZ.
  int rows, cols; // number of rows (N), number of columns
};

/* Multiplies matrix m and (dense) vector v, storing result in already
   allocated vector p.  If m has dimensions NxM then v has length M
   and p has length N. */
void csr_matrix_vector_multiply(struct csr_matrix *m, double *v, double *p);

#endif

File csr.c:

/* Implementation of CSR matrix-vector multiplication from "LAProof: A
   Library of Formal Proofs of Accuracy and Correctness for Linear
   Algebra Programs" by Kellison, Appel, Tekriwal, and Bindel, IEEE
   30th Symposium on Computer Arithmetic, 2023.  That paper cites
   Barrett et al., "Templates for the Solution of Linear Systems:
   Building Blocks for Iterative Methods", SIAM, 1994, as the source
   of the algorithm. */
#include "csr.h"

#define fma(x,y,s) (x*y+s)

void csr_matrix_vector_multiply(struct csr_matrix *m, double *v, double *p) {
  int i, rows=m->rows;
  double *val = m->val;
  int *col_ind = m->col_ind;
  int *row_ptr = m->row_ptr;
  int next=row_ptr[0];
  for (i=0; i<rows; i++) {
    double s=0.0;
    int h=next;
    next=row_ptr[i+1];
    for (; h<next; h++) {
      double x = val[h];
      int j = col_ind[h];
      double y = v[j];
      s = fma(x,y,s);
    }
    p[i]=s;
  }
}

File csr_driver.cvl:

/* Filename : csr_driver.cvl
   Author   : Stephen F. Siegel
   Created  : 2023-08-03
   Modified : 2023-08-07

   Verification of matrix-vector multiplication routine using
   Compressed Sparse Row representation for the matrix.  Start with an
   arbitrary CSR matrix with N and M under specified bounds.  Take an
   arbitrary vector v of length M.  Call sparse matrix-vector
   multiplication routine to compute "actual" resulting vector.
   Convert the sparse matrix to a dense matrix and compute dense
   matrix-vector product to get "expected" result.  Check the actual
   and expected agree.
*/
#include <stdio.h>
#include <stdlib.h>
#include <pointer.cvh>
#include "csr.h"

typedef struct $mat {
  int n, m; // num rows, columns;
  double data[][];
} $mat;

void $mat_vec_mul($mat mat, double * v, double *p) {
  int n = mat.n, m = mat.m;
  for (int i=0; i<n; i++) {
    double s = 0.0;
    for (int j=0; j<m; j++) s += mat.data[i][j]*v[j];
    p[i] = s;
  }
}

$mat $mat_csr(struct csr_matrix csr) {
  int n = csr.rows, m = csr.cols;
  $mat mat;
  mat.n = n;
  mat.m = m;
  mat.data = (double[n][m])$lambda(int i,j) 0.0;
  for (int i=0; i<n; i++) {
    int r = csr.row_ptr[i], rnxt = csr.row_ptr[i+1];
    for (int k=r; k<rnxt; k++)
      mat.data[i][csr.col_ind[k]] = csr.val[k];
  }
  return mat;
}

// N,M=number of rows,columns.  N_B,M_B=upper bounds on N,M
$input int N_B = 3, M_B = 3, N, M;
$assume (1<=N && N<=N_B && 1<=M && M<=M_B);
// the non-0 entries for the matrix and the (dense) vector
$input double A[N*M], V[M];

/* Fills in p[0],...,p[len-1] with a strictly increasing sequence
   of integers in [0,max].  Precondition: 0 <= len <= max+1 */
void strict_inc(int * p, int len, int max) {
  //$assert(0<=len && len <= max+1);
  for (int i=0; i<len; i++) {
    int a = (i == 0 ? 0 : p[i-1]+1), b = max - len + i + 1;
    p[i] = a + $choose_int(b-a+1); // choose in a..b
  }
}

/* Nondeterministically create a CSR matrix with n rows and
   m columns */
struct csr_matrix make_csr(int n, int m) {
  int * row_ptr = malloc((n+1)*sizeof(int));
  row_ptr[0] = 0;
  for (int i=1; i<=n; i++)
    row_ptr[i] = row_ptr[i-1]+$choose_int(m+1);
  int NZ = row_ptr[n];
  $assert(0<=NZ && NZ<=n*m);
  int * col_ind = malloc(NZ*sizeof(int));
  for (int i=0; i<n; i++)
    strict_inc(col_ind+row_ptr[i], row_ptr[i+1]-row_ptr[i], m-1);
  double * val = malloc(NZ*sizeof(double));
  for (int i=0; i<NZ; i++) val[i] = A[i];
  return (struct csr_matrix){ val, col_ind, row_ptr, n, m };
}

void print_int_array(int * p, int n) {
  printf("{ ");
  for (int i=0; i<n; i++) printf("%d ", p[i]);
  printf("}");
}

void print_dbl_array(double * p, int n) {
  printf("{ ");
  for (int i=0; i<n; i++) printf("%lf ", p[i]);
  printf("}");
}

void print_csr(struct csr_matrix mat) {
  int nz = mat.row_ptr[mat.rows];
  printf("begin CSR[nrow=%d, ncol=%d, nz=%d]",
	 mat.rows, mat.cols, nz);
  printf("\n  row_ptr = ");
  print_int_array(mat.row_ptr, mat.rows+1);
  printf("\n  col_ind = ");
  print_int_array(mat.col_ind, nz);
  printf("\n  val     = ");
  print_dbl_array(mat.val, nz);
  printf("\nend CSR\n");
}

void destroy_csr(struct csr_matrix mat) {
  free(mat.row_ptr);
  free(mat.col_ind);
  free(mat.val);
}

int main(void) {
  double v[M], actual[N], expected[N];
  for (int i=0; i<M; i++) v[i] = V[i];
  struct csr_matrix mat = make_csr(N, M);
  csr_matrix_vector_multiply(&mat, v, actual);
  $mat dense = $mat_csr(mat);
  $mat_vec_mul(dense, v, expected);
#ifdef VERBOSE
  printf("\n");
  print_csr(mat);
  printf("v         = ");
  print_dbl_array(v, M);
  printf("\nactual    = ");
  print_dbl_array(actual, N);
  printf("\nexpected  = ");
  print_dbl_array(expected, N);
  printf("\n");
#endif
  destroy_csr(mat);
  $assert($equals(actual, expected));
}

File Makefile:

small: csr.h csr.c csr_driver.cvl
	civl verify -DVERBOSE csr.c csr_driver.cvl

medium: csr.h csr.c csr_driver.cvl
	civl verify -inputN_B=3 -inputM_B=4 csr.c csr_driver.cvl

medium2: csr.h csr.c csr_driver.cvl
	civl verify -inputN_B=4 -inputM_B=3 csr.c csr_driver.cvl

large: csr.h csr.c csr_driver.cvl
	civl verify -inputN_B=4 -inputM_B=4 csr.c csr_driver.cvl

clean:
	rm -rf CIVLREP *~ *.out *.tmp

Output from make (excerpted):

civl verify -DVERBOSE csr.c csr_driver.cvl
CIVL v1.22+ of 2023-10-09 -- http://vsl.cis.udel.edu/civl

begin CSR[nrow=3, ncol=3, nz=0]
  row_ptr = { 0 0 0 0 }
  col_ind = { }
  val     = { }
end CSR
v         = { X_V[0] X_V[1] X_V[2] }
actual    = { 0 0 0 }
expected  = { 0 0 0 }

begin CSR[nrow=3, ncol=3, nz=1]
  row_ptr = { 0 0 0 1 }
  col_ind = { 0 }
  val     = { X_A[0] }
end CSR
v         = { X_V[0] X_V[1] X_V[2] }
actual    = { 0 0 X_V[0]*X_A[0] }
expected  = { 0 0 X_V[0]*X_A[0] }

begin CSR[nrow=3, ncol=3, nz=1]
  row_ptr = { 0 0 0 1 }
  col_ind = { 1 }
  val     = { X_A[0] }
end CSR
v         = { X_V[0] X_V[1] X_V[2] }
actual    = { 0 0 X_V[1]*X_A[0] }
expected  = { 0 0 X_V[1]*X_A[0] }

begin CSR[nrow=3, ncol=3, nz=1]
  row_ptr = { 0 0 0 1 }
  col_ind = { 2 }
  val     = { X_A[0] }
end CSR
v         = { X_V[0] X_V[1] X_V[2] }
actual    = { 0 0 X_V[2]*X_A[0] }
expected  = { 0 0 X_V[2]*X_A[0] }

begin CSR[nrow=3, ncol=3, nz=2]
  row_ptr = { 0 0 0 2 }
  col_ind = { 0 1 }
  val     = { X_A[0] X_A[1] }
end CSR
v         = { X_V[0] X_V[1] X_V[2] }
actual    = { 0 0 (X_V[1]*X_A[1])+(X_V[0]*X_A[0]) }
expected  = { 0 0 (X_V[1]*X_A[1])+(X_V[0]*X_A[0]) }

begin CSR[nrow=3, ncol=3, nz=2]
  row_ptr = { 0 0 0 2 }
  col_ind = { 0 2 }
  val     = { X_A[0] X_A[1] }
end CSR
v         = { X_V[0] X_V[1] X_V[2] }
actual    = { 0 0 (X_V[2]*X_A[1])+(X_V[0]*X_A[0]) }
expected  = { 0 0 (X_V[2]*X_A[1])+(X_V[0]*X_A[0]) }
.
.
.
begin CSR[nrow=3, ncol=2, nz=6]
  row_ptr = { 0 2 4 6 }
  col_ind = { 0 1 0 1 0 1 }
  val     = { X_A[0] X_A[1] X_A[2] X_A[3] X_A[4] X_A[5] }
end CSR
v         = { X_V[0] X_V[1] }
actual    = { (X_V[1]*X_A[1])+(X_V[0]*X_A[0]) (X_V[1]*X_A[3])+(X_V[0]*X_A[2]) (X_V[1]*X_A[5])+(X_V[0]*X_A[4]) }
expected  = { (X_V[1]*X_A[1])+(X_V[0]*X_A[0]) (X_V[1]*X_A[3])+(X_V[0]*X_A[2]) (X_V[1]*X_A[5])+(X_V[0]*X_A[4]) }
.
.
.
begin CSR[nrow=1, ncol=1, nz=1]
  row_ptr = { 0 1 }
  col_ind = { 0 }
  val     = { X_A[0] }
end CSR
v         = { X_V[0] }
actual    = { X_A[0]*X_V[0] }
expected  = { X_A[0]*X_V[0] }

=== Source files ===
csr.c  (csr.c)
csr.h  (csr.h)
csr_driver.cvl  (csr_driver.cvl)

=== Command ===
civl verify -DVERBOSE csr.c csr_driver.cvl 

=== Stats ===
   time (s)          : 9.22          transitions  : 215091  
   memory (bytes)    : 6.62700032E8  trace steps  : 175869  
   max process count : 1             valid calls  : 661988  
   states            : 175197        provers      : cvc4, z3
   states saved      : 246990        prover calls : 19      
   state matches     : 673           

=== Result ===
All errors marked with '+' are absent on all executions.
 + Dereference errors                  + Functional equivalence violations
 + Internal errors                     + Library loading errors
 + Other errors                        + Assertion violations
 + Communication errors                + Writes to constant variables
 + Absolute deadlocks                  + Division by zero
 + Writes to $input variables          + Invalid casts
 + Malloc errors                       + Memory leaks
 + Memory management errors            + MPI usage errors
 + Out of bounds errors                + Reads from $output variables
 + Pointer errors                      + Process leaks
 + Sequence errors                     - Non-termination
 + Use of undefined values             + Union errors

Times on a 2020 M1 MacBook Pro: for matrices up to size 3x3: 6s. Up to 3x4: 17s. Up to 4x3: 17s. Up to 4x4: 19 minutes.

Last modified 21 months ago Last modified on 08/07/24 13:10:18
Note: See TracWiki for help on using the wiki.