\documentclass[math]{beamer}
%\usepackage{booktabs} 
\usepackage{comment} 
\usepackage{MnSymbol,wasysym, multirow}
\input{preambular}
%\def\blfootnote{\xdef\@thefnmark{}\@footnotetext}
\usetheme[compress]{Singapore}
\title{Contracts For Message-Passing Programs}
\subtitle{Dissertation Defense}
\author{\textbf{Ziqing Luo}}
\institute{University of Delaware} \date{\today}

%\def\insertnavigation#1{\relax}
%\setbeamertemplate{section in toc}{\hspace*{1em}\inserttocsectionnumber.~\inserttocsection\par}
\setbeamersize{text margin left=2mm, text margin right=1mm, %
  sidebar width left=0mm, sidebar width right=3mm}

\usefoottemplate{%
  \hspace{0mm}%
  \raisebox{1mm}[0mm][0mm]{%
    \tiny{\color{black!100} \insertframenumber}}%
  \hspace{4mm}%
  \raisebox{1mm}[0mm][0mm]{\tiny\textcolor{gray}{%
      Ziqing Luo $\diamond$ Dissertation Defense $\diamond$ %
      \emph{Contracts for Message-Passing}}}}



\newenvironment{mybox}[1]{%
  \begin{footnotesize}%
    \color{red}%
    \begin{tabular}{@{}|c|@{}}%
      \hline\\[-2.5mm]%
      \begin{minipage}{#1}%
        \color{black}%
      }{%
      \end{minipage}%
      \\[-2.5mm] \\ \hline%
    \end{tabular}%
  \end{footnotesize}%
}


\begin{document}
 
\frame{\titlepage
  \scriptsize{
    \begin{tabular}{ll}
      Committee: & Dr. Stephen F. Siegel (Advisor)  \\
      &Dr. James Clause  \\
      &Dr. Sunita Chandrasekaran  \\
      &Dr. Matthew B. Dwyer  \\
      &Dr. William D. Gropp  
    \end{tabular}
  }
}

\begin{frame}
\frametitle{Outline}
\footnotesize\tableofcontents
\end{frame}

\section{Motivation}
%% MOTIVATION
\begin{frame}[t]{Motivation}
  \begin{itemize}
  \item MPI (the Message-Passing Interface) is and will continue to be widely used in HPC applications.
    \begin{itemize}
    \item DOE Exascale Project:
    \item[-] 28/28 applications \& 28/49 software technology projects 
      use MPI
    \item[-] 93\% will use MPI for their Exascale version in the future
    \end{itemize} \vspace{.1cm}

  \item MPI scientific applications are critical
    \begin{itemize}
    \item earthquake, climate predication, \& chemical, biological simulation, etc.
    \end{itemize} \vspace{.1cm}

  \item only a few tools for the \emph{program-specific correctness}
    of MPI programs
    \begin{itemize}
    \item \emph{CIVL}, \emph{MPI-Spin}, \emph{ISP}, \emph{ParTypes},
      \emph{HERMES} prove \textbf{deadlock}-free
    \item \emph{MPI-Checker}, \emph{PARCOACH}, \emph{MUST} detect
      \textbf{MPI-Specific} errors
    \end{itemize}\vspace{.1cm}

  \item advantages in procedure contract-based specification and verification
    \begin{itemize} %% abstract means: communication details are hidden
    \item program-specific correctness, rigorous, abstract, divide \&
      conquer, sub-problem independence
    \item \textbf{\emph{but}} most tools are only for sequential programs
    \end{itemize}     
  \end{itemize}
\end{frame}

\section{Background}
%% BACKGROUND
\begin{frame}[t]{The Message-Passing Interface (MPI)}
  \begin{itemize}
  \item  a standard for message-passing programming
  \item  the message-passing parallel programming model
    \includegraphics[scale=.2]{../images/messagepassing}
  \item conceptually: $\forall{(p, q)}.\,$ a message channel from $p$ to $q$ 
  \item  the MPI library provides hundreds of functions, datatypes and constants
    for C/C++ and Fortran
  \end{itemize}
\end{frame}

\begin{frame}[t]{The Message-Passing Interface (MPI): An Example}
  \centering
  \begin{programSuperSmall}
    \#include<mpi.h> \\
    \myccode{int} myrank, nprocs, left, right;\\
    \myccode{void} main(\myccode{int} argc,  \myccode{char} **argv) \lb{} \\
\ \     \myccode{int} i, data = 0; \\
\ \     MPI\U{}Init(\&argc, \&argv); \\
\ \     MPI\U{}Comm\U{}size(MPI\U{}COMM\U{}WORLD, \&nprocs); \\
\ \     MPI\U{}Comm\U{}rank(MPI\U{}COMM\U{}WORLD, \&myrank);\\
\ \     left = (myrank+nprocs-1)\%nprocs; \\
\ \     right = (myrank+nprocs+1)\%nprocs; \\
\ \     i=0; \\
\ \     \myccode{if} (myrank\%2==0) \lb{} \\
\ \ \ \         MPI\U{}Send(\&data, 0, MPI\U{}INT, right, 0, MPI\U{}COMM\U{}WORLD); \\
\ \ \ \         MPI\U{}Recv(\&data, 0, MPI\U{}INT, left, 0, MPI\U{}COMM\U{}WORLD, MPI\U{}STATUS\U{}IGNORE); \\
\ \     \rb{} \myccode{else} \lb{} \\
\ \ \ \         MPI\U{}Recv(\&data, 0, MPI\U{}INT, left, 0, MPI\U{}COMM\U{}WORLD, MPI\U{}STATUS\U{}IGNORE); \\
\ \ \ \         MPI\U{}Send(\&data, 0, MPI\U{}INT, right, 0, MPI\U{}COMM\U{}WORLD); \\
\ \     \rb{} \\
\ \     MPI\U{}Finalize();
\rb{}
  \end{programSuperSmall}
%\begin{flushleft}
\small
\begin{itemize}
\item every process runs a copy 
\item every process sends \texttt{data} to right and receives a message in
  \texttt{data} from left
\end{itemize}
%\end{flushleft}
\end{frame}

\begin{frame}{Hoare Logic}  
  \begin{itemize}
  \item Procedure Contract is based on \emph{Hoare Logic}
    \begin{itemize}
      \item[] a set of inference rules (Hoare Calculus) for reasoning
        about \emph{Hoare triples}
    \end{itemize}
  \item Hoare triple: $\{P\}~S~\{Q\}$
    \begin{itemize}
    \item $P\colon$ a pre-condition
    \item $S\colon$ a statement
    \item $Q\colon$ a post-condition
    \item if $P$ holds before $S$, $Q$ will hold after the completion of $S$
    \end{itemize}
  \item a procedure with a contract is essentially a Hoare triple
    \begin{itemize}
    \item[] a contract comprises a pair of pre- and post-conditions
    \end{itemize}    
  \end{itemize}
\end{frame}

\begin{frame}[t]{Example of Inference Rules}
  \LARGE
  \[
  \begin{array}{rll}      
    \frac{}{ \{Q[e / a]\}~a\texttt{ := }e~\{Q\} }
    & \text{\footnotesize(assignment)} &\vspace{.2cm}\\
      \frac{\{P'\}~S~\{Q'\}}{\{P\}~S~\{Q\}}
         & \text{\footnotesize(consequence)}
         &
         \text{\footnotesize if $P \Rightarrow P' 
                              \wedge{} Q' \Rightarrow{} Q$} \vspace{.2cm}\\
         \frac{ \{P \wedge{} C\}~S_0~\{Q\},\, \{P \wedge{}
           \neg{}C\}~S_1~\{Q\}}%
              {\{P\}~\texttt{if } C \texttt{ then }S_0\texttt{ else }S_1~\{Q\}}
              & \text{\footnotesize(cond)} & 
  \end{array}
  \]
\end{frame}

\begin{frame}[t]{Example of Hoare Calculus Derivation}
\footnotesize
\[\begin{array}{lll}
\hspace{-.2cm}    & \{\texttt{x} = X \wedge{} \texttt{y} = Y\}~~  
    \texttt{if x>=0 then y = x else y = -x} 
    ~~\{\texttt{y} = abs(\texttt{x})\} & \hspace{-.2cm}\text{(cond 1, 2)}\vspace{.22cm}\\
    
\hspace{-.2cm}    1 & \{\texttt{x} = X \wedge{} \texttt{y} = Y \wedge{} X \geq 0\}~~
    \texttt{y = x} ~~\{\texttt{y} = abs(\texttt{x})\} & \hspace{-.2cm}\text{(conseq 1.1)}\vspace{.22cm}\\

    %%% 1.1
\hspace{-.2cm}    1.1 & \{\texttt{x} = \mymath{abs(}\texttt{x})\}~~ \texttt{y = x} ~~\{\texttt{y} =
                        abs(\texttt{x})\} &
                                            \hspace{-.2cm}\text{(assign)}\vspace{.22cm}\\

\hspace{-.2cm}    2 & \{\texttt{x} = X \wedge{} \texttt{y} = Y \wedge{} X < 0\}~~
    \texttt{y = -x} ~~\{\texttt{y} = abs(\texttt{x})\} & \hspace{-.2cm}\text{(conseq 1.2)}\vspace{.22cm}\\

    %%% 1.2
\hspace{-.2cm}    1.2 & \{-\texttt{x} = \mymath{abs(}\texttt{x})\}~~
                        \texttt{y = -x} ~~\{\texttt{y} =
                        abs(\texttt{x})\} &
                                            \hspace{-.2cm}\text{(assign)}
  \end{array}\]
\end{frame}

\begin{frame}[t]{A Contract Language Example}
\begin{itemize}
\item ANSI/ISO C Specification Language (ACSL)
\item function contract
\item contracts as annotations
\item rich primitives for specifying sequential C
\end{itemize}
\centering
\includegraphics[scale=.3]{../images/ACSLImage}
%    \begin{programSuperSmall}
%      \ccode{int}~area;\\
%      \mycomment{/*@~requires~w~$>$~0~\&\&~h~$>$~0;}\\
%      \mycomment{~~@~assigns~area;}\\
%      \mycomment{~~@~ensures~area~==~w~*~h;}\\
%      \mycomment{~~@*/}\\
%      \ccode{void}~compute\civlU{}area(\ccode{int}~w,~\ccode{int}~h)~\lb~\\
%      ~~area~=~w~*~h;~\\
%      \rb\\
%    \end{programSuperSmall}
%\begin{itemize}
%\item \mycomment{requires} specifies a pre-condition
%\item \mycomment{ensures} specifies a post-condition
%\item \mycomment{assigns} specifies a \emph{frame condition}, which essentially is a post-condition
%\end{itemize}
\end{frame}

\begin{frame}{Model Checking \& Symbolic Execution}
  \begin{itemize}
  \item Model Checking
  \begin{itemize}
    \item a system $\rightarrow$ a finite state model
    \item check against a specification (temporal logic)
    \item searching a \emph{state space graph} of a model
      \begin{itemize}
      \item node: a state
      \item edge: an action that alters a state in an atomic step
      \end{itemize}
    \item effective for concurrent system
    \item state explosion problem
  \end{itemize}
  \item Symbolic Execution
  \begin{itemize}
  \item concrete inputs $\rightarrow{}$ unconstrained symbols
  \item a path is associated with a path condition (PC)
  \item PC is updated at branches
  \item unsatisfiable PC means the associated path is infeasible
  \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}{Combining Model Checking and Symbolic Execution}
  \begin{itemize}
  \item values in states are symbolic expressions
  \item a state is associated with a PC
  \end{itemize}
an example: \\

\begin{minipage}{.3\textwidth}
  \includegraphics[scale=.25]{../images/example4MCSE}
\end{minipage}
\begin{minipage}{.55\textwidth}
  \includegraphics[scale=.28]{../images/example4MCSEGraph}
\end{minipage}
\end{frame}

\section{Theory}
%\subsection{A Toy Language: \minimp{}}
\begin{frame}[t]{A Toy Message-Passing Language: \minimp{}}
\begin{itemize}
\item a toy language without irrelevant features (e.g., pointers, static type)
\item focuses on message-passing
\item sequential statements are similar to many imperative languages (e.g.\ C or Java)
\item dynamic type
\item 3 communication statements:
  \begin{itemize}
  \item \send{\textsf{e}}{\textsf{p}}
  \item \recv{\textsf{e}}{\textsf{p}}
  \item \recvany{\textsf{var}$_0$}{\textsf{var}$_1$}
  \begin{itemize}
    non-deterministically receive a message that has been sent to itself
  \end{itemize}
  \end{itemize}
\end{itemize}
\end{frame}

\begin{frame}[t]{A \minimp{} Example}
\only<1>{
\centering
\includegraphics[scale=.5]{../images/worker_manager}
}
\only<2>{
\begin{itemize}
\item if specify procedure contracts:
\item \texttt{manager}: sends messages in a roll to every other process except \texttt{manager}-itself;
  then receives messages in a roll $\ldots$
\item \texttt{worker}: receives a message from \texttt{manager}; then sends a message back
\end{itemize}
\centering
\includegraphics[scale=.4]{../images/worker_manager2}
}
\only<3>{
\begin{itemize}
\item \textcolor{red}{too much communication detail}
\item \textcolor{red}{environmental dependency}
\item \textcolor{red}{complicated}
\item \textcolor{red}{\textbf{not the idea way of dividing the program}}
\end{itemize}
\centering
\includegraphics[scale=.4]{../images/worker_manager2}
}
\end{frame}

\begin{frame}[t]{An Example of The Decomposition Intuition}
  \begin{itemize}
\only<1>{
  \item \texttt{bcast(dat, root)}: process \texttt{root} ``broadcasts'' \texttt{dat} to all processes
  \item \texttt{gather(dat, root)}: process \texttt{root} ``gathers'' \texttt{dat} from all processes
  \item \texttt{compute}: process local computation
  \item communication is invisible from the view of the \texttt{main} function
  \item many realistic MPI applications are programmed in such a style 
}
\only<2>{
\item[] informal verbal reasoning like Hoare-style:
\item \texttt{bcast}: all processes have proper \texttt{dat}
\item \texttt{compute}: all processes have computed result w.r.t.\ the proper \texttt{dat}
\item \texttt{gather}: root process have gathered all computed results 
\item[]
}
  \end{itemize}
\begin{center}
\includegraphics[scale=.6]{../images/CollectiveMiniMPExample}
\end{center}
\end{frame}

\begin{frame}[t]{Collective-Style Procedure}
\begin{itemize}
\item \textbf{pre-state} of $S$: all processes are at the entry of $S$
\item \textbf{post-state} of $S$: all processes are at the exit of $S$ and corresponds to a pre-state
\item[]

\onslide<2->{
\item[Def] a procedure $S$ is \textbf{collective-style}, if
\item from a pre-state of $S$ where message channels are empty,
\item if the execution eventually reaches a post-state of $S$ corresponding to the pre-state,
\item all message channels are still empty $\qed$
\item[] 
\item[] process-local procedures are also collective-style
}

\end{itemize}
\onslide<3->{
\centering
\large{\textbf{To divide a message-passing program to collective-style procedures.}}
}
\end{frame}

\begin{frame}[t]{Why Collective Style?}
  \begin{enumerate}
  \item A \minimp{} (or MPI) program is always able to be decomposed to
    at least such one
  \item MPI programs are normally written in a style that facilitates
    collective style decomposition
    \begin{itemize}
    \item MPI collective functions are used by 80\% of the ECP teams
    \item callers of MPI collective functions are very likely collective style
    \item OpenMC[\textcolor{blue}{1}]\let\thefootnote\relax\footnotetext{\vspace{.2cm}[\textcolor{blue}{1}]
      \scriptsize P.\ K.\ Romano et al., \emph{OpenMC: A state-of-the-art Monte Carlo code for research and development.}} (24 thousand lines of C++ code) has only
      collective style functions
    \item the \texttt{parcsr\U{}ls} module (35 thousand lines of C code) in AMG[\textcolor{blue}{2}]\let\thefootnote\relax\footnotetext{\vspace{.2cm}[\textcolor{blue}{2}]
      \scriptsize U.\ Yang et al., \emph{Algebraic Multigrid Benchmark.}} has only collective style functions
    \end{itemize}
  \item specification can mainly focus on inputs and outputs
  \item potential to enable Hoare style reasoning
  \end{enumerate}
\end{frame}



%\subsection{Collective Contract}
%% COLLECTIVE SPECIFICATION
\begin{frame}[t]{Specifying Collective Procedures with \minispec{}}
  \centering
\only<1>{  
  \includegraphics[scale=.55]{../images/minimoBcast}

  A ``broadcast'' procedure
}
\only<2->{
  \includegraphics[scale=.55]{../images/minimoBcastContract}
  \vspace{.5cm}
} 
\only<2>{
  \begin{itemize}
  \item \ccode{requires} specifies a \textbf{state-requirement} (pre-condition)
  \item \ccode{ensures} specifies a \textbf{state-guarantee} (post-condition)
  \item \ccode{assigns} specifies a \textbf{frame condition}
  \item \acslold($e$) referring to expression $e$ in the pre-state
  \item \acslon($id$,$e$) referring to expression $e$ on process $id$
  \item \acslforall{} specifies a universally quantified condition (also \acslexists)
  \end{itemize}
}
\end{frame}

\begin{frame}[t]{Are State-Requirement \& -Guarantee Good Enough?}
  \centering
  \includegraphics[scale=.55]{../images/minimpGather}

  A ``gather'' procedure
\end{frame}

\begin{frame}[t]{Are State-Requirement \& -Guarantee Good Enough?}
  \centering
  \includegraphics[scale=.55]{../images/minimpInterfere}
  \\
 \onslide<2>{\textbf{The ``gather'' procedure is ``interfered'' by the following code.}}
\end{frame}

\begin{frame}[t]{Interference}
  \begin{itemize}
  \item For a sequence of two collective-style procedures $C_0C_1$,
  \item[] $C_0$ is said to be \textbf{interfered} by $C_1$ if
  \item[]  a process in $C_0$ receives a message sent by a process in $C_1$.

  \item more precisely, for an execution from a pre-state of $C_0C_1$
    to a post-state 
    \begin{itemize}
    \item[] if $\exists(p, q)$ s.t.\
    \item[] \#msg received from $q$ by $p$ before $p$ exits $C_0$
    \item[] is greater than
    \item[] \#msg sent by $q$ to $p$ before $q$ exits $C_0$
    \end{itemize}
  \item if $C_0$ is interfered, the behavior of $C_0$ is \textbf{undefined}
  \end{itemize}
\end{frame}

\begin{frame}[t]{Interference}
  \begin{itemize}
  \item for $C_0C_1$, the \textbf{symmetry case:}
  \item[] a process in $C_1$ receives a message sent by a process in $C_0$ 
  \item[] can be \textbf{ignored}
  \item if the symmetry case happens, there is an ordered pair
    of process $(p, q)$ s.t.\
  \item[] \#msg sent from $p$ to $q$ before $p$ exits $C_0$ and
  \item[] \#msg received by $q$ from $p$ before $q$ exits $C_0$
  \item[] are unbalanced
  \item[] message channels cannot be empty at the post-state of $C_0$
  \end{itemize}
\end{frame}

\begin{frame}[t]{Absence Assertions}
  \begin{itemize}
  \item To prevent interference, the contract has to 
  \item[1] make an assumption to the environment
  \item[2] express some knowledge about communication

  \item intuition:
  \item[] ``\emph{no message sent by a exited process, until another
    process exits}''
  \item minimizing the communication detail exposed in a contract 
\only<2>{
  \item we designed \textbf{absence assertion:}
  \item is associated with a procedure $C$
  \item \acslabsent{$a$}{$b$}{$c$}: consumes an execution $\pi$ returns true/false
  \item[] no event $a$ shall happen after event $b$ until event $c$ happens
  \item essentially LTL ``absence of'' pattern
    formula[\textcolor{blue}{1}]\let\thefootnote\relax\footnotetext{\vspace{.5cm}[\textcolor{blue}{1}]
      \scriptsize M.\ B.\ Dwyer et al., \emph{Patterns in property
        specifications for finite-state verification.}}  }
  \end{itemize}
\end{frame}

\begin{frame}[t]{Absence Assertion}
  \begin{itemize}
  \item we
    designed 3 kinds of \textbf{absence assertions}
  \item[]1. \acslgamma{$p$}{$q$}{$r$} \vspace{.1cm}
  \item[]2. \acslguarantee{$p$}{$q$}{$r$} \vspace{.1cm}
  \item[]3. \acslwaitsfor{$p$}{$q$} 
\item Let $\gamma$ be an absence assertion, 
  \begin{itemize}
  \item to make an assumption to the environment: \texttt{requires} $\gamma$
      \begin{itemize}
      \item such a clause specifies a \textbf{path-requirement}
      \end{itemize}
  \item to express the behavior of the specified procedure: \texttt{ensures} $\gamma$
      \begin{itemize}
      \item such a clause specifies a \textbf{path-guarantee}
      \end{itemize}
  \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}{Restrictions For Simplicity}
\begin{itemize}
  \item[1] \acslgamma{$p$}{$q$}{$r$} \vspace{.1cm}
  \item[2] \acslguarantee{$p$}{$q$}{$r$} \vspace{.1cm}
  \item[3] \acslwaitsfor{$p$}{$q$}
\item[]
\item kind 1 is only used to assume the environment (\texttt{requires} clause)
\item kind 2 \& 3 are only used to express the behavior (\texttt{ensures} clause)
\item for a path-requirement or -guarantee, it can only have the form:
\item[] $e \texttt{ ==> } \gamma_0 \texttt{ \&\& } \ldots{} \gamma_{n}$
\item evaluates to the conjunct of a absence assertion set
\end{itemize}
\end{frame}

\begin{frame}[t]{Collective Triples: An Abstract Representation}
\vspace{.6cm}  
\only<2>{\centering $\cotriplenoloc$  is valid} 

\only<1>{
\centering{$\cotriplenoloc$}
\begin{itemize}
\item[] a collective triple represents a procedure with a contract
\item $C$ is a procedure
\item $\psi$ is a state-requirement
\item $\phi$ is a state-guarantee
\item $\Gamma$ is a path-requirement
\item $\Upsilon$ is a path-guarantee
\end{itemize}
}
\only<2>{
\begin{itemize}
\item from a pre-state of $C$ where
  \begin{itemize}
  \item[] message channels are empty and
  \item[] $\psi$ holds for all processes,
  \end{itemize}
\item[] if the execution $\pi$ reaches a post-state corresponding to the pre-state
  \begin{itemize}
  \item[] message channels stay empty
  \item[] $\phi$ holds for all processes.
  \end{itemize}
\item $\pi$ satisfies $\Upsilon$
%\item[]
\item Let $C'$ be any procedure, for $CC'$, from the same pre-state,
\item[] if the execution $\zeta$ reaches some state, and
\item[] $C$ is interfered by $C'$ along $\zeta$, $\zeta$ fails to satisfy $\Gamma$.
  
\end{itemize}
}
\end{frame}


\begin{frame}[t]{Fixing The Contract of ``gather''}
  \centering
  \includegraphics[scale=.5]{../images/minimpGatherFix}
\end{frame}


%\subsection{Verification}
%% VERIFICATION
\begin{frame}[t]{Inference Rules}
\begin{itemize}
\item to reason about collective triples, 4 inference rules are defined
\item side conditions are assertions over contracts, mainly
  absence assertions
\item[] \textbf{\footnotesize preliminary:}
\item rules are defined for pre- and post-states
\begin{itemize}
  \item executions containing pre- and post-states
  \item e.g.\ $\pi_0 \circ{} \pi_1$ for procedure sequence $C_0C_1$, where
  \item[] $\pi_0$ starts from pre-state of $C_0$ ends with post-state of $C_0$
  \item[] similar for $\pi_1$
  \item for executions where there are unique states for contract evaluation
\end{itemize}

\item ``set-like'' operations:  $\in_\pi$ \& $\subseteq_\pi$, where $\pi$ is an execution
\begin{itemize}
  \item[-] let $\lambda$: an absence assertion
  \item[-] let $\Gamma$ and $\Upsilon$: $e \texttt{ ==>} \lambda_0 \texttt{ \&\& } \ldots{} \texttt{ \&\& }\lambda_{m-1}$
  \item[-] let $\myeval{e}_\pi$: the expression where non-constants are evaluated on proper states in $\pi$
  \item[-] we have
  \item[] $\lambda \in_\pi \Gamma$ iff $\myeval{e}_\pi = T \wedge{} \exists{i.} 0 \leq i < m \wedge{} \lambda = \lambda_i$
  \item[] $\Upsilon \subseteq_\pi \Gamma$ iff $\forall{\myeval{\upsilon}_\pi \in_\pi \Upsilon}. \upsilon \in_\pi \Gamma$ 
\end{itemize}
\end{itemize}
\end{frame}

\begin{frame}[t]{Inference Rules: The Sequence Rule}
  \begin{align}
    & \frac{
      \mycosubtriple{0}{\psi}{\mu}{C_0},\,\,
      \mycosubtriple{1}{\mu}{\phi}{{C_1}}
      }
      {
      \cotriplearg{C_0C_1}
      } ~~~\textit{(sequence)}\nonumber \\
    \text{if } & \text{for every execution } \pi_0 \circ{} \pi_1 \text{ of }
                 C_0C_1 \text{ s.t.\ } \nonumber \\
    \pi_0 &\text{ is an execution of } C_0 \text{
    and } \pi_1 \text{ is an execution of } C_1, \nonumber \\
    \text{1. }& \Upsilon_1~\ruleguarantee_{\pi_0 \circ{} \pi_1}~\Gamma_0, \nonumber \\
    \text{2. }&  \myruleinfer{\Upsilon_0}{\Upsilon_1}{\nosendsubset(\Upsilon)}{\pi_0 \circ{} \pi_1},    \nonumber \\
    \text{3. }& \Upsilon \subseteq_{\pi_0 \circ{} \pi_1} \Upsilon_0 \wedge{}  \Upsilon_1,
                \text{ and}\nonumber \\
    \text{4. }& \{\gamma \in \Gamma_0 \mid \neg{}(\Upsilon_1~\rulecancels_{\pi_0 \circ{} \pi_1}~\gamma)\} \cup \Gamma_1~\subseteq\
_{\pi_0 \circ{} \pi_1}~\Gamma. \nonumber
  \end{align}
\end{frame}

\begin{frame}[t]{Inference Rules: The Sequence Rule}
  \[
    \frac{
      \mycosubtriple{0}{\psi}{\mu}{C_0},\,\,
      \mycosubtriple{1}{\mu}{\phi}{{C_1}}
      }
      {
      \cotriplearg{C_0C_1}
      } ~~~\textit{(sequence)}
  \]
  \begin{itemize}
\only<1>{
  \item[1] $\Upsilon_1~\ruleguarantee_{\pi_0\pi_1}~\Gamma_0$ iff
    for every
    \[
    \texttt{\acslgamma{$p$}{$q$}{$r$}} \in_{\pi_0\pi_1} \Gamma_0
    \]
    there is
    \[
    \texttt{\acslguarantee{$p$}{$q$}{$r$}} \in_{\pi_0\pi_1} \Upsilon_1
    \]
  \item means that the behavior of $C_1$ satisfies the path-requirement of $C_0$    
}
\only<2>{
\item[2] $\myruleinfer{\Upsilon_0}{\Upsilon_1}{\nosendsubset(\Upsilon)}{\pi_0\pi_1}$, where
\item[] $\nosendsubset(\Upsilon)$ is obtained from $\Upsilon$ by only keeping absence assertions of the form
  \texttt{\small\acslguarantee{$p$}{$q$}{$r$}}
\item means that for every
  \[
  \texttt{\small\acslguarantee{$p$}{$q$}{$r$}} \in_{\pi_0\pi_1} \Upsilon
  \]
  $C_0$ guarantees the same condition, and $C_1$ also needs to
  guarantee that there is no send from $p$ to $q$ before some process
  $x$ enter $C_1$ such that $x$ enters $C_1$ implies $r$ enters $C_0$.
}
\only<3>{
  \item[3] $\Upsilon \subseteq_{\pi_0\pi_1} \Upsilon_0 \wedge{} \Upsilon_1$
  \item means that $C_0C_1$ cannot guarantee what $C_0$ or $C_1$ does
    not guarantee 
  \item \texttt{\acslguarantee{$p$}{$q$}{$r$}} guaranteed by $C_0C_1$ must also be guaranteed by $C_0$
  \item \texttt{\acslwaitsfor{$p$}{$q$}}, i.e.,\\
    $p$ waits for $q$, 
    guaranteed by $C_0C_1$ must be the case that $p$ waits for $q$ in either $C_0$ or $C_1$
}
\only<4>{
\item $\{\gamma \in_\pi \Gamma_0 \mid \neg{}(\Upsilon_1~\rulecancels_{\pi_0 \circ{} \pi_1}~\gamma)\} \cup \Gamma_1~\subseteq\
_{\pi_0 \circ{} \pi_1}~\Gamma$

\item means in general that $C_0C_1$ shall have a no weaker 
  path-requirement than $C_0$ or $C_1$
\item but there are cases that $\Upsilon_1$ can cancel a path-requirement of $C_0$ for $C_0C_1$, e.g.
  \[
  \texttt{\acslgamma{$p$}{$q$}{$r$}}, \text{ and}
  \]
  \[
  \texttt{\acslwaitsfor{$p$}{$r$}} \in_{\pi_0\pi_1} \Upsilon_1
  \]  
}
  \end{itemize}
\end{frame}

\begin{frame}[t]{Inference Rules: The Consequence Rule}
  \begin{align}
      &\frac{
      \cocond{\psi', \Gamma'}~C~\cocond{\phi', \Upsilon'}
      }
      {
      \cocond{\psi, \Gamma}~C~\cocond{\phi, \Upsilon}
    } ~~~ \textit{(consequence)}  \nonumber \\
       \text{if for every } & \text{execution } \pi \text{ of } C,~ %\nonumber \\                                                 
       \Gamma' \subseteq_{\pi} \Gamma~\wedge{}~\Upsilon \subseteq_\pi \Upsilon'
        ~\wedge{}~\psi{} \Rightarrow \psi' ~\wedge{}~\phi' \Rightarrow \phi
        \nonumber
  \end{align}
  \begin{itemize}
  \item[] means that one can freely 
  \item strengthen the state- or path-requirement, or
  \item weaken the state- or path-guarantee,
  \item the validity of a collective triple will preserve
  \end{itemize}
\end{frame}

\begin{frame}[t]{Inference Rules: The Loop Rule}
  \begin{align}
    & \frac{
    \cocond{\mymath{inv} \wedge{} c,\, \Gamma}~C~
    \cocond{\mymath{inv},\, \Upsilon}
    }{
    \cocond{\mymath{inv},\,\Gamma{}}~ \texttt{while(}c\texttt{)}
    ~C ~\cocond{\neg{}c \wedge{} \mymath{inv},\, \Upsilon{}}
      } ~~~ \textit{(collective loop)} \nonumber \\
    & \text{if for every execution } \pi \text{ of } C,
      \Upsilon{}~\ruleguarantee_\pi~\Gamma
      \nonumber
  \end{align}
  \begin{itemize}
  \item the loop rule can be considered as a sequence of applications
    of the sequence rule to the unrolled loop $CC\ldots{}$
  \item the side conditions 2-4 hold for this special case that
    \begin{itemize}
    \item[] $\Upsilon_0 = \Upsilon_1 = \Upsilon$ and $\Gamma_0 = \Gamma_1 = \Gamma$ 
    \end{itemize}
  \end{itemize}
\end{frame}


\begin{frame}[t]{Example}
\centering
\vspace{-.2cm}
  \includegraphics[scale=.63]{../images/derivation}
\end{frame}

\begin{frame}{Incompleteness of the Inference Rules}
  \begin{itemize}
  \item The rules are incomplete, i.e., there are collective triples
    whose validity cannot be derived by only using these rules.
  
  \item For a collective triple specifying an ``undividable''
    procedure, we use \textbf{model checking \& symbolic execution} to
    verify its validity
  \end{itemize}
\centering
\Large{
$\frac{\overrightarrow{C}\subseteq{}\overrightarrow{C_0C_1},\, \cocond{\psi, \Gamma}C_0C_1\cocond{\phi, \Upsilon}}
    {\cotriplenoloc}$
}
\vspace{.5cm}
\normalsize
\begin{itemize}
\item $\overrightarrow{C}\subseteq{}\overrightarrow{C_0C_1}$: every execution of $C$, there is an equivalent one of $C_0C_1$
\end{itemize}
\vspace{.5cm}
\begin{programSuperSmall}
  if ($e$) \lb{}\emph{gather}; \emph{bcast};\rb\\
  else \lb{}\emph{gather}; \emph{bcast};\rb{}
\end{programSuperSmall}
\end{frame}

\begin{frame}[t]{Model Checking \& Symbolic Execution}
  For $\cotriplenoloc{}$,
  \begin{itemize}
  \item a model checking \& symbolic execution approach, denoted
    $\mcalgo$ 
  \item[] exhaustively explores all
    possible behaviors of $C$ from an arbitrary pre-state

  \item the modular approach, denoted $\mcalgo^{\Delta}$
  \item[] infers the behavior of a sub-procedure $C'$ of $C$ only from
    its contract, if there is a collective triple for $C'$ assumed valid
  \item[]
  \item there is one \textbf{problem} for $\mcalgo^\Delta$:
    \begin{itemize}
    \item for an execution searched by $\mcalgo^\Delta$, 
      a pre- and post-state of $C'$ may not exist
    \end{itemize}    
  \end{itemize}
\end{frame}

\begin{frame}[t]{Model Checking \& Symbolic Execution}
  \begin{itemize}
  \item the problem was solved with \textbf{collective state}
  \item a collective state is associated with an execution $\pi$ and a
    procedure $C$
  \item[]
  \item[] let $s_p$ be the state in $\pi$ where process $p$ enters $C$
  \item a \textbf{collective pre-state} comprises
      \begin{itemize}
      \item a process state for each process $p$ that is equivalent to the one of $p$ in $s_p$
        
      \item the path condition that is the conjunct of the path
        conditions of states in $\{s_p \mid p \in \text{all processes} \}$
        
      \item the message channels that is computed with an algorithm
      \end{itemize}
    \item similar for \textbf{collective post-state}

    \item for each executed instance of $C'$, collective states are unique,
      so $\in_\pi$ and $\subseteq_\pi$ are still applicable
  \end{itemize}
\end{frame}

\begin{frame}[t]{Model Checking \& Symbolic Execution}
  \only<1>{
    \centering
    \includegraphics[scale=.4]{../images/msgchanintuition}
    \begin{itemize}
    \item the value of the message channel in a collective pre-state
      is same as the state where all processes hit a barrier imposed
      before $C$

    \item a process cannot modify the message channel once it hits the barrier
    \end{itemize}
  }
  \only<2>{
    \centering
    \includegraphics[scale=.4]{../images/msgchanalgo}
  }
  \only<3>{
    \centering
    \includegraphics[scale=.4]{../images/msgchanalgo2}
  }
  \only<2->{
    \begin{itemize}
    \item the case without a barrier is analogous
    \item taking a copy of the message channel from the state where the first process reaches the entry of $C$
    \item a process cannot modify the copy once it reaches the entry
    \item processes that have not reached the entry can modify the copy with the communication statements they execute
    \end{itemize}
  }
\end{frame}

\begin{frame}[t]{Model Checking \& Symbolic Execution: Inference}
  \begin{itemize}
\item \textbf{context:} When verifying $\cotriplenoloc$ with $n$ processes, 
\item[] $C'$ is a sub-procedure of $C$ s.t.\ $\cocond{\psi',
  \Gamma'}~C'~\cocond{\phi', \Upsilon'}$ is assumed,
\item[] for every explored execution $\pi$:
  \item[] \textbf{state predicate inference:}
  \item check that $\psi'$ holds for all processes at the collective
    pre-state of $C'$
  \item assume that $\phi'$ holds for all processes at the collective
    post-state of $C'$\vspace{.1cm}

  \item[] \textbf{path predicate inference:} 
    \item synchronization for the ``waits-for'' assertion

  \item[] \textbf{path predicate verification:} 
  \item search for defined
    cases representing path predicate violations and interferences
  \item assuming the worst case for $C'$ if an assertion is not guaranteed
  \end{itemize}
\end{frame}

\begin{frame}[t]{Model Checking \& Symbolic Execution}
\small{
\begin{itemize}
\item \textbf{context:} When verifying $\cotriplenoloc$ with $n$ processes, 
\item[] $C'$ is a sub-procedure of $C$ s.t.\ $\cocond{\psi',
  \Gamma'}~C'~\cocond{\phi', \Upsilon'}$ is assumed,
\item[] for every explored execution $\pi$:
\end{itemize}
}
\only<1>{
\begin{itemize}
\item[1] a \textbf{defined case} for a path containing $\Gamma'$ violation:
\begin{align}
&\exists{p, q, x, y \in \nprocs{n}}. \nonumber \\
&\exists{}\texttt{\acslgamma{$p$}{$q$}{$x$}} \in_\pi \Gamma'. \nonumber\\
&\neg\exists{}\texttt{\acslgamma{$p$}{$q$}{$y$}} \in_\pi \Gamma. \nonumber\\
&\text{$p$ exits $C$ before $x$ exits $C'$ on $\pi$} \wedge
\text{$x$ exits $C'$ before $y$ exits $C$ on $\pi$} \nonumber
\end{align}
\textbf{intuition:} $C'$ can be interfered in an ``actual'' execution
due to the weakness of $\Gamma$ } 
\only<2>{
\begin{itemize}
\item[2] a \textbf{defined case} for a vulnerable state $s$ to an
  interference:
\begin{align}
  & \exists{p, q, r \in \nprocs{n}}.\ \nonumber \\
  & \text{channel($p$, $q$) is empty} \wedge{} \nonumber\\
  & \text{recv($p$, $q$) is enabled if channel($p$, $q$) is nonempty} \wedge{} \nonumber\\
  & \text{process $p$ has completed $C$} \nonumber \\
  & \neg{}\exists{} \texttt{\acslgamma{$p$}{$q$}{$r$}} \in_\pi \Gamma.\,\nonumber\\
  & \text{$r$ has completed $C$} \nonumber
\end{align}
\textbf{intuition:} if $p$ sends a message to $q$ after $s$, which is allowed by $\Gamma$, an interference is enabled.
}

\end{itemize}
\end{frame}

\begin{frame}[t]{Concluding Theory Part}
  \begin{itemize}
  \item collective triple
    \item an \textbf{inference system} comprising 4 rules
  \begin{itemize}
    \item soundness proof is given
  \end{itemize}
    \item developed a \textbf{Model Checking \& Symbolic Execution}
      based verification approach for \minimp \& \minispec{}
  \begin{itemize}
  \item the approach is composite and modular
    \item the soundness of $\mcalgo^{\Delta}$ is proved
      w.r.t.\ to the inference system.      
  \end{itemize}
  \end{itemize}
\end{frame}

\begin{comment}
\begin{frame}[t]{Relating $\mcalgo^{\Delta}$ with The Inference System}
  \small{ 
    \begin{itemize}
    \item For verifying $\cotriplenoloc{}$ with $\mcalgo^{\Delta}$
    \item[] Let $\{C_0, C_1, \ldots{}\}$ be a set of
        procedures specified by assumed collective triples
    \item[] Let $\{S_0, S_1, \ldots{}\}$ be a set of procedures that
        disjoint $\{C_0, C_1, \ldots{}\}$.  No sub-procedure of any
        $S_i, i \geq 0$ is in $\{C_0, C_1, \ldots{}\}$.

      \item[] Since collective-style procedures must be
        called collectively, we have either 
        \[C =
        S_0C_1S_1C_1\ldots{}S_{m-1}C_{m-1}, m \geq 0
        \], or
        \[\overrightarrow{C} \subseteq
        \overrightarrow{S_0C_1S_1C_1\ldots{}S_{m-1}C_{m-1}}, m \geq
        0\]
      \item[] In either case, we prove 
        \begin{align}
        &\model \models \cocond{\psi,
          \Gamma}~S_0C_1S_1C_1\ldots{}S_{m-1}C_{m-1}~\cocond{\phi,
            \Upsilon} \text{ iff} \nonumber \\
        &\mcalgo^{\Delta}(\model, \cocond{\psi,
          \Gamma}~S_0C_1S_1C_1\ldots{}S_{m-1}C_{m-1}~\cocond{\phi,
          \Upsilon}, n) \text{ for every positive } n \nonumber 
          \end{align}
    \end{itemize}
      }
\end{frame}

\begin{frame}[t]{Relating $\mcalgo^{\Delta}$ with The Inference System}
\small{ 
\textbf{lemma 1}
\begin{align}
  & \mcalgo^\Delta(\model, \cocond{\psi, \Gamma}~XC_i~\cocond{\phi,
    \Upsilon}, n), \text{ iff} \nonumber  \\
  & \text{there exists a contract for } X \text{ s.t. } 
\mcalgo^\Delta(\model, \cocond{\psi_x, \Gamma_x}~X~\cocond{\phi_x,
    \Upsilon_x}, n) \nonumber \\
  &  \text{side conditions for } 
\frac{\cocond{\psi_x, \Gamma_x}~X~\cocond{\phi_x,
    \Upsilon_x}, \cocond{\psi_i, \Gamma_i}~C_i~\cocond{\phi_i,
    \Upsilon_i}}{\cocond{\psi, \Gamma}~XC_i~\cocond{\phi,
    \Upsilon}}
     \text{ hold}\nonumber
\end{align}

\textbf{lemma 2}
\begin{align}
  & \mcalgo^\Delta(\model, \cocond{\psi, \Gamma}~XC_iS_{i+1}~\cocond{\phi,
    \Upsilon}, n), \text{ iff} \nonumber  \\
  & \text{there exists a contract for } XC_i \text{ s.t. } \mcalgo^{\Delta}(\model, \cocond{\psi, \Gamma}~XC_i~\cocond{\phi,
    \Upsilon}, n) \nonumber \\
  & \text{there exists a contract for } S_{i+1} \text{ s.t. }  \mcalgo(\model, \cocond{\psi, \Gamma}~S_{i+1}~\cocond{\phi,
    \Upsilon}, n) \nonumber \\
  & \text{side conditions for }
\frac{\cocond{\psi, \Gamma}~XC_i~\cocond{\phi,
    \Upsilon}, \cocond{\psi, \Gamma}~S_{i+1}~\cocond{\phi,
    \Upsilon}}{\cocond{\psi, \Gamma}~XC_iS_{i+1}~\cocond{\phi,
    \Upsilon}}
\text{ hold} \nonumber
\end{align}
Repeatedly use lemma 1 \& 2 to our goal, the proof is converted to
prove the soundness of $\mcalgo$ and the sequence rule.
}
\end{frame}
\end{comment}

\section{Implementation \& Evaluation}
%% MiniMP to MPI
\begin{frame}{Developing Function-Based Contract System For MPI}
\begin{itemize}
\item multiple communication universes
\begin{itemize}
\item  contracts are sensitive to universes
\item   within a single universe the theory is applicable
\end{itemize}  
\item message tags
\begin{itemize}
\item absence assertions need to be extended with message tags
\end{itemize}  
\item API \& language complexity, e.g. void pointer, memory
  allocation, MPI type system
\begin{itemize}
\item CIVL framework[\textcolor{blue}{1}]
    \let\thefootnote\relax\footnotetext{\vspace{.5cm}[\textcolor{blue}{1}]\scriptsize 
          Z.\ Luo et al.,  \emph{Verification of MPI programs using CIVL.}}
 
\item implementation challenges, e.g.,
\begin{itemize}
\item a dereference-able \texttt{void} pointer with a type signature
  \texttt{MPI\U{}INT} and \texttt{count} is initialized by a reference to 
  a fresh new array of type \texttt{char[\textit{extentof}(MPI\U{}INT) * count]}
\end{itemize}
\end{itemize}  
\end{itemize}
\end{frame}

%\subsection{MPI Contract Language}
%% MPI Contract Language
\begin{frame}{The MPI Contract Language}
\begin{itemize}
\item Extended from ACSL
\item MPI specific constructs
\end{itemize}
\centering
\begin{programSmall}
\textcolor{red}{//\textit{local-behavior}}\\
\texttt{requires} $\ldots$\\
\texttt{ensures} $\ldots$\\
\BSL{}mpi\U{}collective(\textit{comm}, P2P|COL):\\
\ \ \textcolor{red}{//\textit{collective-behavior}}\\
\ \ \texttt{requires} $\ldots$\\
\ \ \texttt{ensures} $\ldots$\\
\end{programSmall}
\small{
\begin{itemize}
\item local behavior: pure ACSL construct, specify process-local properties
\item collective behavior: specify collective properties
\item \textit{comm}: MPI communicator
\item P2P: point-to-point communication
\item COL: collective communication
\end{itemize}
}
\end{frame}

\begin{frame}{The MPI Contract Language}
 \textbf{MPI specific:}
\small{
 \begin{itemize}
 \item \texttt{\BSL{}mpi\U{}comm\U{}rank}, \texttt{\BSL{}mpi\U{}comm\U{}size}
 \item \texttt{\BSL{}mpi\U{}extent}: extent of an \texttt{MPI\U{}Datatype}
 \item \texttt{\BSL{}mpi\U{}offset(ptr, count, datatype)}: pointer arithmic
 \item \texttt{\BSL{}mpi\U{}region(ptr, count, datatype)}: memory region 
 \item \texttt{\BSL{}mpi\U{}valid(ptr, count, datatype)}: asserting a pointer dereference-able
 \item \texttt{\BSL{}mpi\U{}equals(region0, region1)}: compares two memory region
 \item \texttt{\BSL{}mpi\U{}reduce(ptr, count, datatype, op)}: reduction over the regions of all processes
 \end{itemize}
}
\begin{minipage}{.65\textwidth}
\textbf{Absence Assertions:}
\small{
\begin{itemize}
\item \texttt{\BSL{}absentof} \textit{event} \texttt{\BSL{}after} \textit{event} \texttt{\BSL{}until} \textit{event}
\item \texttt{\BSL{}sendto(dest, tag)}, \texttt{\BSL{}sendfrom(src, tag)}
\item \texttt{\BSL{}enter(rank)}, \texttt{\BSL{}enter}, \texttt{\BSL{}exit(rank)}, \texttt{\BSL{}exit}
\end{itemize}
}
\end{minipage}
\begin{minipage}{.3\textwidth}
\textbf{Other:}
\begin{itemize}
\item \texttt{\BSL{}on(rank, expr)}
\end{itemize}
\end{minipage}
\end{frame}

%\subsection{Transformation}
%% MPI Contract System Impl
\begin{frame}[t]{Implementation: Source Code Transformation}
  \begin{center}
  \includegraphics[scale=.4]{../images/framework3}
  \end{center}
\small{
\begin{itemize}
\item \textbf{Transformation}: MPI $\rightarrow{}$ MPI
  \begin{itemize}
  \item for a function $f$ with a contract,
    \begin{itemize}
    \item[] $f$\texttt{\U{}origin}: original definition of $f$
    \item[] $f$\texttt{\U{}driver}: verifies $f$ w.r.t.\ the contract
    \item[] $f$:    infers the behavior of $f$ w.r.t.\ the contract
    \end{itemize}
  \end{itemize}
\item  \textbf{Helper Library}: a \texttt{collate} library for collective state construction
  \begin{itemize}
  \item an MPI communicator is associated with a queue of ``collates''
  \end{itemize}
\item \textbf{POR}: make send, recv, enter, exit \textbf{visible} conditionally
\end{itemize}
}
\end{frame}

\begin{frame}{Transformation Template}
  \only<1>{
    \centering
  \begin{programSuperSmall}
    T3 g, ...\ ; \\
    \mycomment{/*@ \ \BSL{}mpi\U{}collective(comm, $\mymath{kind}$):}\\
    \mycomment{\ \ \ \ \ \ requires $\Gamma$;}\\
    \mycomment{\ \ \ \ \ \ requires $\psi$;}\\
    \mycomment{\ \ \ \ \ \ assigns $\Delta$;}\\
    \mycomment{\ \ \ \ \ \ ensures $\phi$;}\\
    \mycomment{\ \ \ \ \ \ ensures $\Upsilon$; */}\\
    T f(T1 a, T2 * b, ...\ ) \lb{} 
     ...\ 
    \rb{}
  \end{programSuperSmall}
  }
\only<2>{  
  \begin{programSuperSmall}
    \ccode{void} f\U{}driver() \lb{} \\
    \ \ \ccode{int} \$mpi\U{}comm\U{}rank, \$mpi\U{}comm\U{}size; MPI\U{}Comm comm; \\
    \ \ \textcolor{red}{// init rank, size, copies MPI\U{}COMM\U{}WORLD to comm} \\
    \ \ T1 a, T2 *b, T2 b\U{}obj ...\ ; \\
    \ \ \$havoc(\&g); \$havoc(\&a); \$havoc(\&b); \$havoc(\&b\U{}obj);
    ...\; \\
     \ \ MPI\U{}Barrier(comm); \\
     \ \ \$assume($\psi$); \\
     \ \ MPI\U{}Barrier(comm); \\
    \ \ \$collate\U{}state \U{}cs\U{}pre = \$mpi\U{}snapshot(comm,\$here,\$true,\$true,$\Gamma$,$\Upsilon$,''f'');\\    \\
    \ \ \$write\U{}set\U{}push(); \\
    \ \ T \$result = f\U{}origin(comm, a, b, ...\ );\\
    \ \ \$mem m = \$write\U{}set\U{}pop();\\
    \ \ \$assert(\$mem\U{}contains($\Delta$, m));\\
    \\
    \ \ \$collate\U{}state \U{}cs\U{}post = \$mpi\U{}snapshot(comm,\$here,\$false,\$true,$\Gamma$,$\Upsilon$,''f''); \\
    \ \ MPI\U{}Barrier(comm);\\
    \ \ \$state* \U{}s\U{}pre = \$collate\U{}get\U{}state(\U{}cs\U{}pre);\\
    \ \ \$assert($\phi$ \&\& \&\& \$comm\U{}empty); \\
    \ \ MPI\U{}Barrier(comm);\\
    \ \ $\ldots{}$ \\
    \rb{}
  \end{programSuperSmall}
}
\only<3>{
  \begin{programSuperSmall}
    T3 g, ...\ ;\\
    T f(MPI\U{}Comm comm, T1 a, T2 *b, ...\ ) \lb{} \\
    \ \ \ccode{int} \$mpi\U{}comm\U{}rank, \$mpi\U{}comm\U{}size;\\
    \ \ \textcolor{red}{// init rank, size}\\
  \ \ \$collate\U{}state \U{}cs\U{}pre = \$mpi\U{}snapshot(comm,\$here,\$true,\$false,$\Gamma$,$\Upsilon$,''f''); \\
  \ \ \$state* \U{}s\U{}pre =
  \$collate\U{}get\U{}state(\U{}cs\U{}pre); \\
  \ \ \$run \\
  \ \ \ \ \$when (\$collate\U{}complete(\U{}cs\U{}pre))  \\
  \ \ \ \ \ \  \$with (*\U{}s\U{}pre) \$assert($\psi$ \&\& \$comm\U{}empty); \\
  \\
  \ \ T \$result; \\
  \ \  $\mymath{\forall{d \in \Delta}.\, }$
  \$havoc(\&$d$);\\
  \ \ \$havoc(\&\$result);\\
\\
  \ \ \$collate\U{}state \U{}cs\U{}post = \$mpi\U{}snapshot(comm,\$here,\$false,\$false,$\Gamma$, $\Upsilon$,''f'');\\
  \ \ \$state* \U{}s\U{}post =
  \$collate\U{}get\U{}state(\U{}cs\U{}post); \\
  \ \ \$run \\
  \ \ \ \ \$when (\$collate\U{}complete(\U{}cs\U{}post)) \\
  \ \ \ \ \ \  \$with (*\U{}s\U{}post) \$assume($\phi$);\\
  \ \ \$when(\$collate\U{}arrived(\U{}cs\U{}pre, $\mymath{p}$)); \\
  \ \ $\ldots$\\
\rb{}
\end{programSuperSmall}
}
\end{frame}

%\subsection{Evaluation}
%% Evaluation
\begin{frame}{Evaluation}
  Experiments are done in two parts:
  \begin{itemize}
  \item Part I: assuming interference-free
    \begin{itemize}
    \item stay with CIVL's POR algorithm
    \item better scalability (more than 6 processes)
    \item examples include CIVL's MPI collective impl, advanced MPI collective algorithm impl and
      MPI applications (e.g., diffusion, odd-even parallel sorting)
    \end{itemize}
  \item Part II: no specific assumption
    \begin{itemize}
    \item enable the simple POR algorithm for contracts
    \item absence assertions are verified 
    \item state explosion (up to 3 processes)
    \item examples using ``wildcard gather''
    \end{itemize}
  \end{itemize}
  Every example has multiple negative versions that expose specific contract violations.
\end{frame}

\begin{frame}{Evaluation}
% wildcard example

\only<1>{
\begin{programSuperSmall}
\mycomment{/*@ \BSL{}mpi\U{}collective(comm, P2P):}\\
\mycomment{  @\ \  requires \BSL{}mpi\U{}agree(sendcount * \BSL{}mpi\U{}extent(sendtype)) \&\& $\ldots$;}\\
\mycomment{  @\ \  ensures \underline{\BSL{}absentof \BSL{}exit \BSL{}after \BSL{}enter \BSL{}until \BSL{}enter(0 .. NPROCS-1);}} \\
\mycomment{  @\ \  behavior not\U{}inplace:}\\
\mycomment{  @\ \ \ \  assumes sendbuf != MPI\U{}IN\U{}PLACE;}\\
\mycomment{  @\ \ \ \  requires \BSL{}mpi\U{}valid(sendbuf, sendcount, sendtype);}\\
\mycomment{  @\ \ \ \  ensures \BSL{}mpi\U{}equals(\BSL{}mpi\U{}region(sendbuf, sendcount, sendtype),}\\
\mycomment{  @\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \      \BSL{}mpi\U{}region(}\\
\mycomment{  @\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \  \BSL{}mpi\U{}offset(recvbuf, \BSL{}mpi\U{}comm\U{}rank * recvcount, recvtype),}\\
\mycomment{  @\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \  recvcount, recvtype));}\\
\mycomment{  @\ \    behavior inplace:}\\
\mycomment{  @\ \ \ \   assumes sendbuf == MPI\U{}IN\U{}PLACE;}\\
\mycomment{  @\ \ \ \   requires $\ldots$  @*/} \\
\ccode{int} allgather(\ccode{void} *sendbuf, \ccode{int} sendcount, MPI\U{}Datatype sendtype,\\
\ \ \ \ \ \ \ \ \     \ccode{void} *recvbuf, \ccode{int} recvcount, MPI\U{}Datatype recvtype,\\
\ \ \ \ \ \ \ \ \      MPI\U{}Comm comm) \lb{}\\
\mycomment{// gather($\ldots$)} \\
\mycomment{// bcast($\ldots$)}\\
\rb{}
\end{programSuperSmall}
}

\only<2>{
\begin{programSuperSmall}
\mycomment{/*@ \BSL{}mpi\U{}collective(comm, P2P) :}\\
\mycomment{\ \  requires \BSL{}mpi\U{}agree(root) \&\& $\ldots$;}\\
\mycomment{\ \  behavior imroot\U{}not\U{}inplace:} \\
\mycomment{\ \ \ \        assumes  \BSL{}mpi\U{}comm\U{}rank == root \&\& sendbuf != MPI\U{}IN\U{}PLACE;}\\
\mycomment{\ \ \ \        requires $\ldots{}$} \\
\mycomment{\ \ \ \        requires \underline{\BSL{}absentof \BSL{}sendfrom(0 ..\ NPROCS-1, GATHER\U{}TAG) \BSL{}after}} \\
\mycomment{\ \ \ \ \ \ \ \ \ \ \ \ \ \underline{\BSL{}exit(0 ..\ NPROCS-1) \BSL{}until \BSL{}exit;}}\\
\mycomment{\ \ \ \        ensures  $\ldots$}\\
\mycomment{\ \ \ \        ensures  \underline{\BSL{}absentof \BSL{}exit \BSL{}after \BSL{}enter \BSL{}until \BSL{}enter(0 ..\ NPROCS-1);}}\\
\mycomment{\ \       behavior imroot\U{}inplace:}\\
\mycomment{\ \ \ \        assumes  \BSL{}mpi\U{}comm\U{}rank == root \&\& sendbuf == MPI\U{}IN\U{}PLACE;}\\
\mycomment{\ \ \ \        requires $\ldots$}\\ 
\mycomment{\ \ \ \        requires \underline{\BSL{}absentof \BSL{}sendfrom(0 ..\ NPROCS-1, GATHER\U{}TAG) \BSL{}after}}\\
\mycomment{\ \ \ \ \ \ \ \ \ \ \ \ \  \underline{\BSL{}exit(0 ..\ NPROCS-1) \BSL{}until \BSL{}exit;}}\\
\mycomment{\ \ \ \        ensures  $\ldots$}\\
\mycomment{\ \ \ \        ensures  \underline{\BSL{}absentof \BSL{}exit \BSL{}after \BSL{}enter \BSL{}until \BSL{}enter(0 ..\ NPROCS-1);}}\\
\mycomment{\ \      behavior not\U{}root:} \\
\mycomment{\ \ \ \       assumes  \BSL{}mpi\U{}comm\U{}rank != root;}\\
\mycomment{\ \ \ \       requires $\ldots$}\\
\mycomment{\ \ \ \       ensures $\ldots$; */}\\
\ccode{int} gather(\ccode{void}* sendbuf, \ccode{int} sendcount, MPI\U{}Datatype sendtype, \\
\ \ \ \ \ \ \ \ \ \ \ \ccode{void}* recvbuf, \ccode{int} recvcount, MPI\U{}Datatype recvtype,\\
\ \ \ \ \ \ \ \ \ \ \ \ccode{int} root, MPI\U{}Comm comm);\\
\end{programSuperSmall}
}

\only<3>{
\begin{programSuperSmall}
\mycomment{/*@ \BSL{}mpi\U{}collective(comm, P2P):}\\
\mycomment{ @ \ \   requires 0 <= root \&\& root < NPROCS \&\& \BSL{}mpi\U{}agree(root)\&\&$\ldots$}\\
\mycomment{ @ \ \   ensures  \BSL{}mpi\U{}agree(\BSL{}mpi\U{}region(buf, count, datatype));}\\
\mycomment{ @ \ \   ensures  \underline{\BSL{}absentof \BSL{}sendto(root, BCAST\U{}TAG) \BSL{}after \BSL{}enter \BSL{}until \BSL{}enter(root);}}\\
\mycomment{ @ \ \   behavior nonroot:}\\
\mycomment{ @ \ \ \ \  assumes \BSL{}mpi\U{}comm\U{}rank != root;} \\
\mycomment{ @ \ \ \ \  assigns \BSL{}mpi\U{}region(buf, count, datatype);}\\
\mycomment{ @ \ \ \ \  ensures \underline{\BSL{}absentof \BSL{}exit \BSL{}after \BSL{}enter \BSL{}until \BSL{}enter(root);}}\\
\mycomment{ @ \ \   behavior root:}\\
\mycomment{ @ \ \ \ \   assumes \BSL{}mpi\U{}comm\U{}rank == root;}\\
\mycomment{ @ \ \ \ \   assigns \BSL{}nothing;*/}\\
\ccode{int} bcast(\ccode{void} * buf, \ccode{int} count, MPI\U{}Datatype datatype, \ccode{int} root, MPI\U{}Comm comm);
\end{programSuperSmall}
}
\end{frame}

\begin{frame}{\texttt{dotProd} from hypre[\textcolor{blue}{1}]
    \let\thefootnote\relax\footnotetext{\vspace{.5cm}[\textcolor{blue}{1}]\scriptsize 
      R.\ D.\ Falgout and U.\ M.\ Yang. \emph{hypre: A Library of High Performance Preconditioners.}}}
\only<1>{
\begin{programSuperSmall}
    \ccode{\#define} PAR\U{}SIZE
    ((x->local\U{}vector->size)*(x->local\U{}vector->num\U{}vectors)) \\
\mycomment{/*@ \BSL{}mpi\U{}collective(hypre\U{}ParVectorComm(x), COL):}\\
\mycomment{\ \ \ \   requires \BSL{}valid(x) \&\& \BSL{}valid(y) \&\& 0 < PAR\U{}SIZE  \&\&}\\
\mycomment{\ \ \ \ \ \ \ \ \ \ \ \ \   \BSL{}valid(x->local\U{}vector) \&\& \BSL{}valid(y->local\U{}vector) \&\&}\\
\mycomment{\ \ \ \ \ \ \ \ \ \ \ \ \ \BSL{}valid(x->local\U{}vector->data + (0 .. PAR\U{}SIZE-1))  \&\&}\\
\mycomment{\ \ \ \ \ \ \ \ \ \ \ \ \  \BSL{}valid(y->local\U{}vector->data + (0 .. PAR\U{}SIZE-1));}\\
\mycomment{\ \ \ \    assigns \BSL{}nothing;}\\
\mycomment{\ \ \ \    ensures \BSL{}result == \BSL{}sum(0,  NPROCS-1, \BSL{}lambda int k;}\\
\mycomment{\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \BSL{}on(k,  \BSL{}sum(0,  PAR\U{}SIZE-1, \BSL{}lambda int t;}\\
\mycomment{\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \   x->local\U{}vector->data[t] *
  y->local\U{}vector->data[t]))); */}\\
HYPRE\U{}Real hypre\U{}ParVectorInnerProd(hypre\U{}ParVector *x,
				     hypre\U{}ParVector *y) \lb{} \\
\ \  MPI\U{}Comm      comm    = hypre\U{}ParVectorComm(x);\\
\ \   hypre\U{}Vector *x\U{}local = hypre\U{}ParVectorLocalVector(x);\\
\ \   hypre\U{}Vector *y\U{}local = hypre\U{}ParVectorLocalVector(y);\\
\ \   HYPRE\U{}Real result = 0.0;\\
\ \   HYPRE\U{}Real local\U{}result = hypre\U{}SeqVectorInnerProd(x\U{}local, y\U{}local);\\    
\ \   hypre\U{}MPI\U{}Allreduce(\&local\U{}result, \&result, 1, HYPRE\U{}MPI\U{}REAL,\\
\ \ \ \ \ \ \ \ 		      hypre\U{}MPI\U{}SUM, comm);   \\
\ \  \ccode{return} result;\\
\rb{}
\end{programSuperSmall}
}
\only<2>{
\begin{programSuperSmall}
\ccode{\#define} SIZE ((x->size)*(x->num\U{}vectors))\\
\mycomment{/*@ requires \BSL{}valid(x) \&\& \BSL{}valid(y) \&\& 0 < SIZE}\\
\mycomment{\ \ \ \ \ \ \ \ \ \ \&\& \BSL{}valid(x->data + (0 .. SIZE-1))}\\
\mycomment{\ \ \ \ \ \ \ \ \ \ \&\& \BSL{}valid(y->data + (0 .. SIZE-1));}\\
\mycomment{\ \ \ \  assigns  \BSL{}nothing;}\\
\mycomment{\ \ \ \  ensures  \BSL{}result == \BSL{}sum(0, SIZE-1, \BSL{}lambda int j;
  y->data[j] * x->data[j]);}\\
\mycomment{*/}\\
HYPRE\U{}Real  hypre\U{}SeqVectorInnerProd(hypre\U{}Vector *x,
				      hypre\U{}Vector *y) \lb{}\\
\ \   HYPRE\U{}Complex *x\U{}data = hypre\U{}VectorData(x);\\
\ \   HYPRE\U{}Complex *y\U{}data = hypre\U{}VectorData(y);\\
\ \   HYPRE\U{}Int    i, size   = hypre\U{}VectorSize(x);  \\         
\ \   HYPRE\U{}Real     result = 0.0;\\
\ \   size *=hypre\U{}VectorNumVectors(x);\\
\ \ \mycomment{/*@ loop invariant 0 <= i <= size}\\
\ \ \mycomment{\ \ \ \ \ \ \ \ \ \ \ \ \&\& result == \BSL{}sum(0, i-1, \BSL{}lambda int j; y\U{}data[j] * x\U{}data[j]);}\\
\ \  \mycomment{\ \ \ \ loop assigns result, i; */}\\
\ \  \ccode{for} (i = 0; i < size; i++) \\
\ \ \ \  result += hypre\U{}conj(y\U{}data[i]) * x\U{}data[i]; \\
\ \  \ccode{return} result;\\
\rb{}
\end{programSuperSmall}
} 
\only<3>{
\begin{itemize}
\item realistic MPI program with complicated data structures
\item only MPI collective function is used, non-interference is guaranteed
\item nothing but \#procs is bounded 
\item verified with 6 processes in 213s, 174 prover
  calls, and 7, 973 states
\end{itemize}
}
\end{frame}

\begin{frame}{Odd-Even Parallel Sort[\textcolor{blue}{1}]
    \let\thefootnote\relax\footnotetext{\vspace{.5cm}[\textcolor{blue}{1}]\tiny
      Z.\ Luo and S.\ F.\ Siegel. \emph{Symbolic Execution and
        Deductive Verification Approaches to VerifyThis 2017
        Challenges.}}}
\only<2>{
\begin{itemize}
\item take one iteration of the sorting algorithm  
\item proved sortedness and permutation
\item by induction, the whole loop has same properties
\item focus on functional correctness, hide communication detail
\item nothing but \#procs is bounded 
\item verified with 6 processes in 19s, 12 prover calls and 34, 757 states
\end{itemize}
}

\only<1>{
\begin{programSuperSmall}
\mycomment{/*@ \BSL{}mpi\U{}collective(comm, P2P):}\\
\mycomment{\ \ requires \BSL{}mpi\U{}agree(i) \&\& 0 <= i < n \&\& id == RANK \&\& n == NPROCS;}\\
\mycomment{\ \ assigns  myvalue;}\\
\mycomment{\ \ ensures  (id \% 2 == 0 \&\& id < n-1 ==> myvalue <= \BSL{}on(id + 1, myvalue)) ||}\\
\mycomment{\ \ \ \  (id \% 2 == 1 \&\& id < n-1 ==> myvalue <= \BSL{}on(id + 1, myvalue));}\\
\mycomment{\ \ ensures  \BSL{}sum(0, n-1, \BSL{}lambda int t; \BSL{}on(t, myvalue) == myvalue ? 1 : 0) ==}\\
\mycomment{\ \ \ \  \BSL{}sum(0, n-1, \BSL{}lambda int t; \BSL{}old(\BSL{}on(t, myvalue)) == myvalue ? 1 : 0);}\\
\mycomment{ */}\\
\ccode{void} oddEvenParIter(\ccode{int} i) \lb{}\\
\ \  \ccode{T} othervalue; \\  
\ \  \ccode{if} ((i+id)\%2 == 0) \lb{}\\
\ \ \ \ \ccode{if} (id<n-1) \lb{} \\
\ \ \ \ \ \   MPI\U{}Send(\&myvalue, 1, MPI\U{}T, id+1, 0, comm);\\
\ \ \ \ \ \   MPI\U{}Recv(\&othervalue, 1, MPI\U{}T, id+1, 0, comm, MPI\U{}STATUS\U{}IGNORE);\\
\ \ \ \ \ \   \ccode{if} (othervalue < myvalue) myvalue = othervalue;\\
\ \ \ \  \rb{}\\
\ \  \rb{} \ccode{else if} (id>0) \lb{} \mycomment{// odd}\\
\ \ \ \      MPI\U{}Recv(\&othervalue, 1, MPI\U{}T, id-1, 0, comm, MPI\U{}STATUS\U{}IGNORE);\\
\ \ \ \      MPI\U{}Send(\&myvalue, 1, MPI\U{}T, id-1, 0, comm);\\
\ \ \ \      \ccode{if} (othervalue > myvalue)  myvalue = othervalue;\\
\ \ \rb{}\\
\rb{}
\end{programSuperSmall}
}
\end{frame}

\section{Conclusion \& Future Work}
\begin{frame}[t]{Conclusion}
\begin{itemize}
\item a theory of contracts for message-passing programs
\item a method for verifying contracts in a collective way
\item an MPI contract language
\item an prototype for contract-based MPI verification
\end{itemize}
\end{frame}

\begin{frame}{Future Work}
\small
\begin{itemize}
\item \textbf{one observation:} the verification of state predicates and path predicates can be separate
  \begin{itemize}
  \item assuming interference-free, the verification can be carried out by imposing barriers at collective procedures
  \item verifying absence assertions without checking post-conditions
  \end{itemize}
\item \textbf{other approaches for checking absence assertions:}
  \begin{itemize}
  \item POR is too simple; CIVL is not designed for checking path predicates
  \item use model checkers (e.g., Spin) that are good at verifying
    temporal logic for absence assertion verification
  \item analyze ``happens-before'' relation for every path explored by
    the symbolic execution engine (like MOPPER)
  \end{itemize}
\item \textbf{towards completeness:}
  \begin{itemize}
  \item implementing the loop rule
  \item using \emph{global invariants} (a preliminary work was done
    in [\textcolor{blue}{1}]
    \let\thefootnote\relax\footnotetext{\vspace{.5cm}[\textcolor{blue}{1}]\tiny
      Z.\ Luo and S.\ F.\ Siegel. \emph{Towards Deductive Verification
        of Message-Passing Parallel Programs.}})
  \end{itemize}
\end{itemize}
\end{frame}

\begin{frame}
\centering
\Large{Thank you!}
\end{frame}

\begin{frame}[t]{Sequentialization \& Global Invariant}
  \begin{itemize}
  \item a global invariant $\Phi$
    \begin{itemize}
    \item an assertion over global states
    \item provided by the user
    \end{itemize}
  \item a sequential program
    \begin{itemize}
    \item corresponds to a single generic process
    \item a \emph{reduction} (Lipton 1975) of the original message-passing program
    \begin{itemize}
    \item group statements into atomic blocks
    \end{itemize}
    \item with inserted calls to \texttt{interleave()}
      \begin{itemize}
      \item models the behavior of other processes
      \item changes global state arbitrarily
      \item preserves the global invariant
      \item partial correctness
      \end{itemize}
    \end{itemize}
  \end{itemize}
  {\emph{\textbf{Invariant-Preserving Projection (IPP):}\\}}
  {\ \
    IPP preserves $\Phi$ iff the original program preserves $\Phi$
  }
  {\\\emph{\textbf{To prove:}}\\
   \ \  If $\Phi$ holds initially, it will hold after each atomic block.
  }
\end{frame}

\begin{frame}{Experiment Part I-1}
\begin{scriptsize}
  \begin{tabular}{| p{5cm} | r | r | r | r | r |}
    \hline 
    name \& description & \#procs & \#states & \#trans & \#prover  & time(s)\\
    \hline
        \multirow{5}{5cm}{\texttt{bcast:}
    root sends in a roll and the rests do a receive, implemented in
    CIVL's MPI library
    } & 2 & 1, 121 & 3, 500 & 7 & 6 \\
                        & 3 & 3, 057 & 8, 235 & 14 & 8 \\
                        & 4 & 5, 805 & 15, 632 & 23 & 12 \\
                        & 5 & 9, 797 & 26, 608 & 34 & 17 \\
                        & 6 & 15, 545 & 42, 832 & 47 & 25 \\
    \hline
    \multirow{5}{5cm}{\texttt{gather:}
    root receives in a roll and the rests do one send, implemented
    in CIVL's MPI library
    } & 2 & 2, 608 & 7, 246 & 47 & 9 \\
                        & 3 & 6, 791 & 18, 226 & 102 & 16 \\
                        & 4 & 14, 788 & 38, 659 & 187 & 28  \\
                        & 5 & 31, 541 & 79, 997 & 297 & 54  \\
                        & 6 & 71, 850 & 175, 925 & 428 & 123  \\
    \hline
    \multirow{5}{5cm}{\texttt{gatherv:}
    root receives in a roll and the rests do one send, implemented
    in CIVL's MPI library
    } & 2 & 1, 266 & 3, 692 & 41 & 8\\
                        & 3 & 3, 053 & 8, 640 & 85 & 13\\
                        & 4 & 5, 852 & 16, 398 & 140 & 27\\
                        & 5 & 10, 121 & 28, 029 & 215 & 78\\
                        & 6 & 16, 612 & 45, 512 & 310 & 236\\
    \hline
    \multirow{5}{5cm}{\texttt{scatter:}
    root sends in a roll and the rests do one receive, implemented
    in CIVL's MPI library
    } & 2 & 2, 466 & 7, 007 & 43 & 8 \\
                        & 3 & 6, 332 & 17, 438 & 101 & 14 \\
                        & 4 & 13, 509 & 36, 428 & 188 & 27 \\
                        & 5 & 28, 067 & 73, 801 & 275 & 46 \\
                        & 6 & 62, 483 & 158, 481 & 386 & 108 \\
    \hline
  \end{tabular}
\end{scriptsize}
\end{frame}

\begin{frame}{Experiment Part I-2}
\begin{scriptsize}
  \begin{tabular}{| p{5cm} | r | r | r | r | r |}
    \hline 
    name \& description & \#procs & \#states & \#trans & \#prover  & time(s)\\
    \hline
    \multirow{5}{5cm}{\texttt{alltoall:}
    each proc performs \#procs sends and receives
    } & 2 & 786 & 2, 038 & 17 & 6\\
                        & 3 & 1, 382 & 3, 424 & 45 & 7\\
                        & 4 & 2, 153 & 5, 111 & 67 & 10\\
                        & 5 & 3, 114 & 7, 135 & 89 & 14 \\
                        & 6 & 4, 287 & 9, 532 & 115 & 19\\
    \hline
    \multirow{5}{5cm}{\texttt{alltoall2}
    each proc collectively calls \texttt{scatter} \#procs times,
    implemented in CIVL's MPI library
    } & 2 & 933 & 2, 986 & 19 & 7\\
                        & 3 & 1, 769 & 5, 905 & 32 & 10\\
                        & 4 & 3, 011 & 10, 351 & 66 & 15\\
                        & 5 & 4, 779 & 16, 972 & 84 & 31 \\
                        & 6 & 7, 174 & 26, 560 & 150 & 69\\
    \hline
    \multirow{5}{5cm}{\texttt{allgather:}
    each proc collectively calls \texttt{gather} then \texttt{bcast},
    implemented in CIVL's MPI library
    } & 2 & 2, 119 & 6, 235 & 23 & 11 \\
                        & 3 & 3, 690 & 10, 699 & 34 & 15 \\
                        & 4 & 5, 794 & 16, 512 & 44 & 21 \\
                        & 5 & 8, 944 & 24, 820 & 55 & 31 \\
                        & 6 & 14, 176 & 37, 915 & 65 & 50 \\
    \hline
    \multirow{5}{5cm}{\texttt{allreduce:}
    each proc collectively calls \texttt{reduce} then \texttt{bcast},
    implemented in CIVL's MPI library
    } & 2 & 841 & 2, 958 & 3 & 6 \\
                        & 3 & 1, 368 & 4, 803 & 3 & 6 \\
                        & 4 & 1, 903 & 6, 763 & 3 & 7 \\
                        & 5 & 2, 446 & 8, 838 & 3 & 9 \\
                        & 6 & 2, 997 & 11, 028 & 3 & 10 \\
    \hline
\end{tabular}
\end{scriptsize}
\end{frame}

\begin{frame}{Experiment Part I-3}
\begin{scriptsize}
  \begin{tabular}{| p{5cm} | r | r | r | r | r |}
    \hline 
    name \& description & \#procs & \#states & \#trans & \#prover  & time(s)\\
        \hline
        \multirow{5}{5cm}{\texttt{allreduce\U{}dr:}
        an implementation of the recursive doubling algorithm given by a
        pseudo code in \cite{Ruefenacht:2017:GRD:3163938.3164013}
        } & 2 & 648 & 1, 904 & 4 & 7\\
                            & 3 & 1, 060 & 3, 006 & 4 & 7\\
                            & 4 & 1, 562 & 4, 349 & 4 & 8\\
                            & 5 & 2, 017 & 5, 615 & 4 & 10\\
                            & 6 & 2, 486 & 6, 942 & 4 & 11\\
        \hline
        \multirow{5}{5cm}{\texttt{reduceScatter:}
        an implementation of an algorithm optimized for
        non-commutative reduction operations \cite{Bernaschi:2002:EIR:1895489.1895529}
        } & 2 & 713 & 1, 905 & 13 & 7\\
                            & 3 & 1, 378 & 3, 163 & 42 & 11\\
                            & 4 & 2, 147 & 4, 679 & 70 & 16\\
                            & 5 & 3, 102 & 6, 489 & 111 & 25\\
                            & 6 & 4, 248 & 8, 629 & 166 & 50\\
        \hline
        \multirow{5}{5cm}{\texttt{diff1dExchange:}
        the exchange ghost cells function in a 1d-diffusion implementation
        } & 2 & 1, 771 & 5, 083 & 19 & 7\\
                            & 3 & 6, 979 & 19, 164 & 40 & 12\\
                            & 4 & 25, 546 & 68, 328 & 69 & 25\\
                            & 5 & 93, 086 & 244, 148 & 102 & 84\\
                            & 6 & 345, 202 & 891, 940 & 135 & 382\\
        \hline 
        \multirow{4}{5cm}{\texttt{diff1dIter:}
        one time step implementation for a 1d-diffusion solver
        } & 2 & 2, 186 & 6, 354 & 58 & 19 \\
                            & 3 & 8, 646 & 23, 677 & 161 & 85 \\
                            & 4 & 31, 492 & 82, 514 & 380 & 275 \\
                            & 5 & 114, 108 & 287, 467 & 829 & 731 \\
        \hline
        \multirow{2}{5cm}{\texttt{diff2dExchange:}
        $n \times{} n$ checkerboard ghost cell exchange 
        } & $2 \times 2$ & 2, 010 & 4, 787 & 52 & 43\\
                            & $3 \times 3$ & 5, 154 & 12, 817 & 135 & 115\\
        & & & & &\\
        \hline
\end{tabular}
\end{scriptsize}
\end{frame}

\begin{frame}{Experiment Part I-4}
\begin{scriptsize}
  \begin{tabular}{| p{5cm} | r | r | r | r | r |}
    \hline 
    name \& description & \#procs & \#states & \#trans & \#prover  & time(s)\\
        \multirow{3}{5cm}{\texttt{diff2dIter:}
        one time step for a 2d-diffusion solver with $n \times{} n$
        checkerboard distribution
        } & $2 \times 2$ & 1, 727 & 5, 126 & 66 & 95\\
                            & & & & & \\
                            & & & & & \\
                            & & & & & \\
%                            & 4 & 1717 & 5110 & 25 & +13\\
%                           & 4 & 1720 & 5110 & 33 & +14\\
%                          & 4 & 1716 & 5110 & 25 & +15\\
%                           & 4 & 1717 & 5110 & 33 & +16\\
        \hline
        \multirow{5}{5cm}{\texttt{dotProd:}
        parallel vector product function extracted from \cite{falgout:2002:hypre}
        } & 2 & 712 & 2, 315 & 26 & 39\\
                            & 3 & 1, 127 & 3, 629 & 51 & 82\\
                            & 4 & 1, 546 & 5, 010 & 84 & 141 \\
                            & 5 & 1, 995 & 6, 458 & 125 & 220 \\
                            & 6 & 2, 434 & 7, 973 & 174 & 213 \\
        \hline
        \multirow{5}{5cm}{\texttt{matmat:}
        collective matrix multiplication, each process computes for
        one row
        } & 2 & 1, 155 & 3, 829 & 37 & 35\\
                            & 3 & 1, 849 & 6, 096 & 40 & 41\\
                            & 4 & 2, 551 & 8, 469 & 46 & 49\\
                            & 5 & 3, 261 & 10, 948 & 52 & 57\\
                            & 6 & 3, 979 & 13, 533 & 56 & 64\\
        \hline
        \multirow{5}{5cm}{\texttt{oddEvenSort:}
        one loop iteration in a parallel odd-even sorting implementation
        } & 2 & 1, 115 & 3, 505 & 4 & 6\\
                            & 3 & 2, 141 & 6, 699 & 6 & 8\\
                            & 4 & 3, 947 & 12, 458 & 8 & 12\\
                            & 5 & 6, 320 & 20, 217 & 10 & 13\\
                            & 6 & 10, 713 & 34, 757 & 12 & 19\\
        \hline
\end{tabular}
\end{scriptsize}
\end{frame}


\begin{frame}{Experiment Part II}
\begin{scriptsize}
  \begin{tabular}{| l | r | r | r | r | r | r |}
    \hline 
    name & \#procs & \#states & \#seen & \#trans & \#prover  & time(s)\\ 
    \hline
    \texttt{gather\U{}w} & 2 & 5, 953 & 5 & 19, 597 & 48 & 12\\
         & 3 & 218, 766  & 302 & 764, 096 & 123 & 208 \\
    \hline
    \texttt{allgather} & 2 & 9, 848 & 6 & 32, 855 & 31 & 22 \\
         & 3 & 164, 155 & 93 & 547, 753 & 92 & 378 \\
    \hline
    \texttt{matmat} & 2 & 3, 116 & 1 & 11, 102 & 39 & 37 \\
         & 3 & 23, 254 & 17 & 83, 359 & 48 & 75 \\
         & 4 & 342, 653 & 285 & 1, 229, 132& 110 & 704\\
    \hline
    \texttt{oddEvenSort}
         & 2 & 1, 824 & 0 & 6, 327 & 4 & 6 \\
         & 3 & 8, 706 & 8 & 30, 164 & 6 & 10 \\
         & 4 & 96, 961 & 115 & 338, 889 & 10 & 66 \\
    \hline
    \texttt{diff1dIter} & 2 & 5, 668 & 5 & 19, 745 & 35 & 10 \\
         & 3 & 492, 163 & 567 & 1, 779, 177 & 182 & 505\\
    \hline
    \texttt{diff1dExchange} & 2 & 3, 485 & 3 &  11, 300 & 21 & 8 \\
         & 3 & 114, 173 & 150 & 388, 751 & 66 & 75\\
    \hline
    \texttt{dotProd} & 2 & 1, 711 & 1 & 6, 058 & 6 & 7\\
         & 3 & 7, 127 & 5 & 25, 431 & 12 & 15\\
         & 4 & 36, 450 & 23 & 130, 936 & 18 & 43 \\
         & 5 & 225, 223 & 119 & 811, 950 & 24 & 389 \\
    \hline
\end{tabular}
\end{scriptsize}
\end{frame}

\end{document}
