c-----------------------------------------------------------------------
      subroutine mxmf2(A,N1,B,N2,C,N3)
c
c     unrolled loop version 
c
      real a(n1,n2),b(n2,n3),c(n1,n3)

      if (n2.le.8) then
         if (n2.eq.1) then
            call mxf1(a,n1,b,n2,c,n3)
         elseif (n2.eq.2) then
            call mxf2(a,n1,b,n2,c,n3)
         elseif (n2.eq.3) then
            call mxf3(a,n1,b,n2,c,n3)
         elseif (n2.eq.4) then
            call mxf4(a,n1,b,n2,c,n3)
         elseif (n2.eq.5) then
            call mxf5(a,n1,b,n2,c,n3)
         elseif (n2.eq.6) then
            call mxf6(a,n1,b,n2,c,n3)
         elseif (n2.eq.7) then
            call mxf7(a,n1,b,n2,c,n3)
         else
            call mxf8(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.16) then
         if (n2.eq.9) then
            call mxf9(a,n1,b,n2,c,n3)
         elseif (n2.eq.10) then
            call mxf10(a,n1,b,n2,c,n3)
         elseif (n2.eq.11) then
            call mxf11(a,n1,b,n2,c,n3)
         elseif (n2.eq.12) then
            call mxf12(a,n1,b,n2,c,n3)
         elseif (n2.eq.13) then
            call mxf13(a,n1,b,n2,c,n3)
         elseif (n2.eq.14) then
            call mxf14(a,n1,b,n2,c,n3)
         elseif (n2.eq.15) then
            call mxf15(a,n1,b,n2,c,n3)
         else
            call mxf16(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.24) then
         if (n2.eq.17) then
            call mxf17(a,n1,b,n2,c,n3)
         elseif (n2.eq.18) then
            call mxf18(a,n1,b,n2,c,n3)
         elseif (n2.eq.19) then
            call mxf19(a,n1,b,n2,c,n3)
         elseif (n2.eq.20) then
            call mxf20(a,n1,b,n2,c,n3)
         elseif (n2.eq.21) then
            call mxf21(a,n1,b,n2,c,n3)
         elseif (n2.eq.22) then
            call mxf22(a,n1,b,n2,c,n3)
         elseif (n2.eq.23) then
            call mxf23(a,n1,b,n2,c,n3)
         elseif (n2.eq.24) then
            call mxf24(a,n1,b,n2,c,n3)
         endif
      else
         call mxm44_0(a,n1,b,n2,c,n3)
      endif
c
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf1(a,n1,b,n2,c,n3)
c
      real a(n1,1),b(1,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf2(a,n1,b,n2,c,n3)
c
      real a(n1,2),b(2,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf3(a,n1,b,n2,c,n3)
c
      real a(n1,3),b(3,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf4(a,n1,b,n2,c,n3)
c
      real a(n1,4),b(4,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf5(a,n1,b,n2,c,n3)
c
      real a(n1,5),b(5,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf6(a,n1,b,n2,c,n3)
c
      real a(n1,6),b(6,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf7(a,n1,b,n2,c,n3)
c
      real a(n1,7),b(7,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf8(a,n1,b,n2,c,n3)
c
      real a(n1,8),b(8,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf9(a,n1,b,n2,c,n3)
c
      real a(n1,9),b(9,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf10(a,n1,b,n2,c,n3)
c
      real a(n1,10),b(10,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf11(a,n1,b,n2,c,n3)
c
      real a(n1,11),b(11,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf12(a,n1,b,n2,c,n3)
c
      real a(n1,12),b(12,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf13(a,n1,b,n2,c,n3)
c
      real a(n1,13),b(13,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf14(a,n1,b,n2,c,n3)
c
      real a(n1,14),b(14,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf15(a,n1,b,n2,c,n3)
c
      real a(n1,15),b(15,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf16(a,n1,b,n2,c,n3)
c
      real a(n1,16),b(16,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf17(a,n1,b,n2,c,n3)
c
      real a(n1,17),b(17,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf18(a,n1,b,n2,c,n3)
c
      real a(n1,18),b(18,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf19(a,n1,b,n2,c,n3)
c
      real a(n1,19),b(19,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf20(a,n1,b,n2,c,n3)
c
      real a(n1,20),b(20,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf21(a,n1,b,n2,c,n3)
c
      real a(n1,21),b(21,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf22(a,n1,b,n2,c,n3)
c
      real a(n1,22),b(22,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf23(a,n1,b,n2,c,n3)
c
      real a(n1,23),b(23,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
     $             + a(i,23)*b(23,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxf24(a,n1,b,n2,c,n3)
c
      real a(n1,24),b(24,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
     $             + a(i,23)*b(23,j)
     $             + a(i,24)*b(24,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxm44_0(a, m, b, k, c, n)
c
c matrix multiply with a 4x4 pencil 
c
      real a(m,k), b(k,n), c(m,n)
      real s11, s12, s13, s14, s21, s22, s23, s24
      real s31, s32, s33, s34, s41, s42, s43, s44

      mresid = iand(m,3) 
      nresid = iand(n,3) 
      m1 = m - mresid + 1
      n1 = n - nresid + 1

      do i=1,m-mresid,4
        do j=1,n-nresid,4
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0
          s41 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          s32 = 0.0d0
          s42 = 0.0d0
          s13 = 0.0d0
          s23 = 0.0d0
          s33 = 0.0d0
          s43 = 0.0d0
          s14 = 0.0d0
          s24 = 0.0d0
          s34 = 0.0d0
          s44 = 0.0d0
          do l=1,k
            s11 = s11 + a(i,l)*b(l,j)
            s12 = s12 + a(i,l)*b(l,j+1)
            s13 = s13 + a(i,l)*b(l,j+2)
            s14 = s14 + a(i,l)*b(l,j+3)

            s21 = s21 + a(i+1,l)*b(l,j)
            s22 = s22 + a(i+1,l)*b(l,j+1)
            s23 = s23 + a(i+1,l)*b(l,j+2)
            s24 = s24 + a(i+1,l)*b(l,j+3)

            s31 = s31 + a(i+2,l)*b(l,j)
            s32 = s32 + a(i+2,l)*b(l,j+1)
            s33 = s33 + a(i+2,l)*b(l,j+2)
            s34 = s34 + a(i+2,l)*b(l,j+3)

            s41 = s41 + a(i+3,l)*b(l,j)
            s42 = s42 + a(i+3,l)*b(l,j+1)
            s43 = s43 + a(i+3,l)*b(l,j+2)
            s44 = s44 + a(i+3,l)*b(l,j+3)
          enddo
          c(i,j)     = s11 
          c(i,j+1)   = s12 
          c(i,j+2)   = s13
          c(i,j+3)   = s14

          c(i+1,j)   = s21 
          c(i+2,j)   = s31 
          c(i+3,j)   = s41 

          c(i+1,j+1) = s22
          c(i+2,j+1) = s32
          c(i+3,j+1) = s42

          c(i+1,j+2) = s23
          c(i+2,j+2) = s33
          c(i+3,j+2) = s43

          c(i+1,j+3) = s24
          c(i+2,j+3) = s34
          c(i+3,j+3) = s44
        enddo
* Residual when n is not multiple of 4
        if (nresid .ne. 0) then
          if (nresid .eq. 1) then
            s11 = 0.0d0
            s21 = 0.0d0
            s31 = 0.0d0
            s41 = 0.0d0
            do l=1,k
              s11 = s11 + a(i,l)*b(l,n)
              s21 = s21 + a(i+1,l)*b(l,n)
              s31 = s31 + a(i+2,l)*b(l,n)
              s41 = s41 + a(i+3,l)*b(l,n)
            enddo
            c(i,n)     = s11 
            c(i+1,n)   = s21 
            c(i+2,n)   = s31 
            c(i+3,n)   = s41 
          elseif (nresid .eq. 2) then
            s11 = 0.0d0
            s21 = 0.0d0
            s31 = 0.0d0
            s41 = 0.0d0
            s12 = 0.0d0
            s22 = 0.0d0
            s32 = 0.0d0
            s42 = 0.0d0
            do l=1,k
              s11 = s11 + a(i,l)*b(l,j)
              s12 = s12 + a(i,l)*b(l,j+1)

              s21 = s21 + a(i+1,l)*b(l,j)
              s22 = s22 + a(i+1,l)*b(l,j+1)

              s31 = s31 + a(i+2,l)*b(l,j)
              s32 = s32 + a(i+2,l)*b(l,j+1)

              s41 = s41 + a(i+3,l)*b(l,j)
              s42 = s42 + a(i+3,l)*b(l,j+1)
            enddo
            c(i,j)     = s11 
            c(i,j+1)   = s12

            c(i+1,j)   = s21 
            c(i+2,j)   = s31 
            c(i+3,j)   = s41 

            c(i+1,j+1) = s22
            c(i+2,j+1) = s32
            c(i+3,j+1) = s42
          else
            s11 = 0.0d0
            s21 = 0.0d0
            s31 = 0.0d0
            s41 = 0.0d0
            s12 = 0.0d0
            s22 = 0.0d0
            s32 = 0.0d0
            s42 = 0.0d0
            s13 = 0.0d0
            s23 = 0.0d0
            s33 = 0.0d0
            s43 = 0.0d0
            do l=1,k
              s11 = s11 + a(i,l)*b(l,j)
              s12 = s12 + a(i,l)*b(l,j+1)
              s13 = s13 + a(i,l)*b(l,j+2)

              s21 = s21 + a(i+1,l)*b(l,j)
              s22 = s22 + a(i+1,l)*b(l,j+1)
              s23 = s23 + a(i+1,l)*b(l,j+2)

              s31 = s31 + a(i+2,l)*b(l,j)
              s32 = s32 + a(i+2,l)*b(l,j+1)
              s33 = s33 + a(i+2,l)*b(l,j+2)

              s41 = s41 + a(i+3,l)*b(l,j)
              s42 = s42 + a(i+3,l)*b(l,j+1)
              s43 = s43 + a(i+3,l)*b(l,j+2)
            enddo
            c(i,j)     = s11 
            c(i+1,j)   = s21 
            c(i+2,j)   = s31 
            c(i+3,j)   = s41 
            c(i,j+1)   = s12 
            c(i+1,j+1) = s22
            c(i+2,j+1) = s32
            c(i+3,j+1) = s42
            c(i,j+2)   = s13
            c(i+1,j+2) = s23
            c(i+2,j+2) = s33
            c(i+3,j+2) = s43
          endif
        endif
      enddo

* Residual when m is not multiple of 4
      if (mresid .eq. 0) then
        return
      elseif (mresid .eq. 1) then
        do j=1,n-nresid,4
          s11 = 0.0d0
          s12 = 0.0d0
          s13 = 0.0d0
          s14 = 0.0d0
          do l=1,k
            s11 = s11 + a(m,l)*b(l,j)
            s12 = s12 + a(m,l)*b(l,j+1)
            s13 = s13 + a(m,l)*b(l,j+2)
            s14 = s14 + a(m,l)*b(l,j+3)
          enddo
          c(m,j)     = s11 
          c(m,j+1)   = s12 
          c(m,j+2)   = s13
          c(m,j+3)   = s14
        enddo
* mresid is 1, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = 0.0d0
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n)
          enddo
          c(m,n) = s11
          return
        elseif (nresid .eq. 2) then
          s11 = 0.0d0
          s12 = 0.0d0
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n-1)
            s12 = s12 + a(m,l)*b(l,n)
          enddo
          c(m,n-1) = s11
          c(m,n) = s12
          return
        else
          s11 = 0.0d0
          s12 = 0.0d0
          s13 = 0.0d0
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n-2)
            s12 = s12 + a(m,l)*b(l,n-1)
            s13 = s13 + a(m,l)*b(l,n)
          enddo
          c(m,n-2) = s11
          c(m,n-1) = s12
          c(m,n) = s13
          return
        endif          
      elseif (mresid .eq. 2) then
        do j=1,n-nresid,4
          s11 = 0.0d0
          s12 = 0.0d0
          s13 = 0.0d0
          s14 = 0.0d0
          s21 = 0.0d0
          s22 = 0.0d0
          s23 = 0.0d0
          s24 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,j)
            s12 = s12 + a(m-1,l)*b(l,j+1)
            s13 = s13 + a(m-1,l)*b(l,j+2)
            s14 = s14 + a(m-1,l)*b(l,j+3)

            s21 = s21 + a(m,l)*b(l,j)
            s22 = s22 + a(m,l)*b(l,j+1)
            s23 = s23 + a(m,l)*b(l,j+2)
            s24 = s24 + a(m,l)*b(l,j+3)
          enddo
          c(m-1,j)   = s11 
          c(m-1,j+1) = s12 
          c(m-1,j+2) = s13
          c(m-1,j+3) = s14
          c(m,j)     = s21
          c(m,j+1)   = s22 
          c(m,j+2)   = s23
          c(m,j+3)   = s24
        enddo
* mresid is 2, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = 0.0d0
          s21 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n)
          enddo
          c(m-1,n) = s11
          c(m,n) = s21
          return
        elseif (nresid .eq. 2) then
          s11 = 0.0d0
          s21 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n-1)
            s12 = s12 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n-1)
            s22 = s22 + a(m,l)*b(l,n)
          enddo
          c(m-1,n-1) = s11
          c(m-1,n)   = s12
          c(m,n-1)   = s21
          c(m,n)     = s22
          return
        else
          s11 = 0.0d0
          s21 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          s13 = 0.0d0
          s23 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n-2)
            s12 = s12 + a(m-1,l)*b(l,n-1)
            s13 = s13 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n-2)
            s22 = s22 + a(m,l)*b(l,n-1)
            s23 = s23 + a(m,l)*b(l,n)
          enddo
          c(m-1,n-2) = s11
          c(m-1,n-1) = s12
          c(m-1,n)   = s13
          c(m,n-2)   = s21
          c(m,n-1)   = s22
          c(m,n)     = s23
          return
        endif
      else
* mresid is 3
        do j=1,n-nresid,4
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0

          s12 = 0.0d0
          s22 = 0.0d0
          s32 = 0.0d0

          s13 = 0.0d0
          s23 = 0.0d0
          s33 = 0.0d0

          s14 = 0.0d0
          s24 = 0.0d0
          s34 = 0.0d0

          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,j)
            s12 = s12 + a(m-2,l)*b(l,j+1)
            s13 = s13 + a(m-2,l)*b(l,j+2)
            s14 = s14 + a(m-2,l)*b(l,j+3)

            s21 = s21 + a(m-1,l)*b(l,j)
            s22 = s22 + a(m-1,l)*b(l,j+1)
            s23 = s23 + a(m-1,l)*b(l,j+2)
            s24 = s24 + a(m-1,l)*b(l,j+3)

            s31 = s31 + a(m,l)*b(l,j)
            s32 = s32 + a(m,l)*b(l,j+1)
            s33 = s33 + a(m,l)*b(l,j+2)
            s34 = s34 + a(m,l)*b(l,j+3)
          enddo
          c(m-2,j)   = s11 
          c(m-2,j+1) = s12 
          c(m-2,j+2) = s13
          c(m-2,j+3) = s14

          c(m-1,j)   = s21 
          c(m-1,j+1) = s22
          c(m-1,j+2) = s23
          c(m-1,j+3) = s24

          c(m,j)     = s31 
          c(m,j+1)   = s32
          c(m,j+2)   = s33
          c(m,j+3)   = s34
        enddo
* mresid is 3, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n)
          enddo
          c(m-2,n) = s11
          c(m-1,n) = s21
          c(m,n) = s31
          return
        elseif (nresid .eq. 2) then
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          s32 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n-1)
            s12 = s12 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n-1)
            s22 = s22 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n-1)
            s32 = s32 + a(m,l)*b(l,n)
          enddo
          c(m-2,n-1) = s11
          c(m-2,n)   = s12
          c(m-1,n-1) = s21
          c(m-1,n)   = s22
          c(m,n-1)   = s31
          c(m,n)     = s32
          return
        else
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          s32 = 0.0d0
          s13 = 0.0d0
          s23 = 0.0d0
          s33 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n-2)
            s12 = s12 + a(m-2,l)*b(l,n-1)
            s13 = s13 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n-2)
            s22 = s22 + a(m-1,l)*b(l,n-1)
            s23 = s23 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n-2)
            s32 = s32 + a(m,l)*b(l,n-1)
            s33 = s33 + a(m,l)*b(l,n)
          enddo
          c(m-2,n-2) = s11
          c(m-2,n-1) = s12
          c(m-2,n)   = s13
          c(m-1,n-2) = s21
          c(m-1,n-1) = s22
          c(m-1,n)   = s23
          c(m,n-2)   = s31
          c(m,n-1)   = s32
          c(m,n)     = s33
          return
        endif
      endif

      return
      end
c-----------------------------------------------------------------------
      subroutine mxm44_2(a, m, b, k, c, n)
      real a(m,2), b(2,n), c(m,n)

      nresid = iand(n,3) 
      n1 = n - nresid + 1

      do j=1,n-nresid,4
         do i=1,m
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
            c(i,j+1) = a(i,1)*b(1,j+1)
     $             + a(i,2)*b(2,j+1)
            c(i,j+2) = a(i,1)*b(1,j+2)
     $             + a(i,2)*b(2,j+2)
            c(i,j+3) = a(i,1)*b(1,j+3)
     $             + a(i,2)*b(2,j+3)
         enddo
      enddo
      if (nresid .eq. 0) then
        return
      elseif (nresid .eq. 1) then
         do i=1,m
            c(i,n) = a(i,1)*b(1,n)
     $             + a(i,2)*b(2,n)
         enddo
      elseif (nresid .eq. 2) then
         do i=1,m
            c(i,n-1) = a(i,1)*b(1,n-1)
     $             + a(i,2)*b(2,n-1)
            c(i,n) = a(i,1)*b(1,n)
     $             + a(i,2)*b(2,n)
         enddo
      else
         do i=1,m
            c(i,n-2) = a(i,1)*b(1,n-2)
     $             + a(i,2)*b(2,n-2)
            c(i,n-1) = a(i,1)*b(1,n-1)
     $             + a(i,2)*b(2,n-1)
            c(i,n) = a(i,1)*b(1,n)
     $             + a(i,2)*b(2,n)
         enddo
      endif

      return
      end
c-----------------------------------------------------------------------
      subroutine mxm_test_all(nid,ivb)
c
c     Collect matrix-matrix product statistics
c
      external mxms,mxmur2,mxmur3,mxmd,mxmfb,mxmf3,mxmu4
      external madd,mxm,mxm44
c
      parameter (nn=24)
      parameter (nt=10)
      character*5 c(3,nt)
      real        s(nn,2,nt,3)
      real        a(nn,2,nt,3)

      call nekgsync

      do k=1,3   ! 3 tests:  N^2 x N, NxN, NxN^2
         call mxmtest(s(1,1, 1,k),nn,c(k, 1),mxm44 ,'mxm44',k,ivb)
         call mxmtest(s(1,1, 2,k),nn,c(k, 2),mxms  ,' std ',k,ivb)
         call mxmtest(s(1,1, 3,k),nn,c(k, 3),mxmur2,'mxmu2',k,ivb)
         call mxmtest(s(1,1, 4,k),nn,c(k, 4),mxmur3,'mxmu3',k,ivb)
         call mxmtest(s(1,1, 5,k),nn,c(k, 5),mxmd  ,'mxmd ',k,ivb)
         call mxmtest(s(1,1, 6,k),nn,c(k, 6),mxmfb ,'mxmfb',k,ivb)
         call mxmtest(s(1,1, 7,k),nn,c(k, 7),mxmu4 ,'mxmu4',k,ivb)
         call mxmtest(s(1,1, 8,k),nn,c(k, 8),mxmf3 ,'mxmf3',k,ivb)
         if (k.eq.2) ! Add works only for NxN case
     $   call mxmtest(s(1,1, 9,k),nn,c(k, 9),madd  ,'madd ',k,ivb)
         call mxmtest(s(1,1,10,k),nn,c(k,10),mxm   ,'mxm  ',k,ivb)
      enddo

      call nekgsync
      if (nid.eq.0) call mxm_analyze(s,a,nn,c,nt,ivb)
      call nekgsync

      return
      end
c-----------------------------------------------------------------------
      subroutine initab(a,b,n)
      real a(1),b(1)
      do i=1,n-1
         x  = i
         k = mod(i,19) + 2
         l = mod(i,17) + 5
         m = mod(i,31) + 3
         a(i) = -.25*(a(i)+a(i+1)) + (x*x + k + l)/(x*x+m)
         b(i) = -.25*(b(i)+b(i+1)) + (x*x + k + m)/(x*x+l)
      enddo
      a(n) = -.25*(a(n)+a(n)) + (x*x + k + l)/(x*x+m)
      b(n) = -.25*(b(n)+b(n)) + (x*x + k + m)/(x*x+l)
      return
      end
c-----------------------------------------------------------------------
      subroutine mxms(a,n1,b,n2,c,n3)
C----------------------------------------------------------------------
C
C     Matrix-vector product routine. 
C     NOTE: Use assembly coded routine if available.
C
C---------------------------------------------------------------------
      REAL A(N1,N2),B(N2,N3),C(N1,N3)
C
         N0=N1*N3
         DO 10 I=1,N0
            C(I,1)=0.
 10      CONTINUE
         DO 100 J=1,N3
         DO 100 K=1,N2
         BB=B(K,J)
         DO 100 I=1,N1
            C(I,J)=C(I,J)+A(I,K)*BB
 100     CONTINUE
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmu4(a,n1,b,n2,c,n3)
C----------------------------------------------------------------------
C
C     Matrix-vector product routine. 
C     NOTE: Use assembly coded routine if available.
C
C---------------------------------------------------------------------
      REAL A(N1,N2),B(N2,N3),C(N1,N3)
C
         N0=N1*N3
         DO 10 I=1,N0
            C(I,1)=0.
 10      CONTINUE
         i1 = n1 - mod(n1,4) + 1
            DO 100 J=1,N3
            DO 100 K=1,N2
            BB=B(K,J)
               DO I=1,N1-3,4
                  C(I  ,J)=C(I  ,J)+A(I  ,K)*BB
                  C(I+1,J)=C(I+1,J)+A(I+1,K)*BB
                  C(I+2,J)=C(I+2,J)+A(I+2,K)*BB
                  C(I+3,J)=C(I+3,J)+A(I+3,K)*BB
               enddo
               DO i=i1,N1
                  C(I  ,J)=C(I  ,J)+A(I  ,K)*BB
               enddo
 100        CONTINUE
      return
      end
c-----------------------------------------------------------------------
      subroutine madd (a,n1,b,n2,c,n3)
c
      real a(n1,n2),b(n2,n3),c(n1,n3)
c
      do j=1,n3
      do i=1,n1
         c(i,j) = a(i,j)+b(i,j)
      enddo
      enddo
c
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmUR2(a,n1,b,n2,c,n3)
C----------------------------------------------------------------------
C
C     Matrix-vector product routine. 
C     NOTE: Use assembly coded routine if available.
C
C---------------------------------------------------------------------
      REAL A(N1,N2),B(N2,N3),C(N1,N3)
C
      if (n2.le.8) then
         if (n2.eq.1) then
            call mxmur2_1(a,n1,b,n2,c,n3)
         elseif (n2.eq.2) then
            call mxmur2_2(a,n1,b,n2,c,n3)
         elseif (n2.eq.3) then
            call mxmur2_3(a,n1,b,n2,c,n3)
         elseif (n2.eq.4) then
            call mxmur2_4(a,n1,b,n2,c,n3)
         elseif (n2.eq.5) then
            call mxmur2_5(a,n1,b,n2,c,n3)
         elseif (n2.eq.6) then
            call mxmur2_6(a,n1,b,n2,c,n3)
         elseif (n2.eq.7) then
            call mxmur2_7(a,n1,b,n2,c,n3)
         else
            call mxmur2_8(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.16) then
         if (n2.eq.9) then
            call mxmur2_9(a,n1,b,n2,c,n3)
         elseif (n2.eq.10) then
            call mxmur2_10(a,n1,b,n2,c,n3)
         elseif (n2.eq.11) then
            call mxmur2_11(a,n1,b,n2,c,n3)
         elseif (n2.eq.12) then
            call mxmur2_12(a,n1,b,n2,c,n3)
         elseif (n2.eq.13) then
            call mxmur2_13(a,n1,b,n2,c,n3)
         elseif (n2.eq.14) then
            call mxmur2_14(a,n1,b,n2,c,n3)
         elseif (n2.eq.15) then
            call mxmur2_15(a,n1,b,n2,c,n3)
         else
            call mxmur2_16(a,n1,b,n2,c,n3)
         endif
      else
         N0=N1*N3
         DO 10 I=1,N0
            C(I,1)=0.
 10      CONTINUE
         DO 100 J=1,N3
         DO 100 K=1,N2
         BB=B(K,J)
         DO 100 I=1,N1
            C(I,J)=C(I,J)+A(I,K)*BB
 100     CONTINUE
      endif
      return
      end
c
      subroutine mxmur2_1(a,n1,b,n2,c,n3)
c
      real a(n1,1),b(1,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_2(a,n1,b,n2,c,n3)
c
      real a(n1,2),b(2,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_3(a,n1,b,n2,c,n3)
c
      real a(n1,3),b(3,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_4(a,n1,b,n2,c,n3)
c
      real a(n1,4),b(4,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_5(a,n1,b,n2,c,n3)
c
      real a(n1,5),b(5,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_6(a,n1,b,n2,c,n3)
c
      real a(n1,6),b(6,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_7(a,n1,b,n2,c,n3)
c
      real a(n1,7),b(7,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_8(a,n1,b,n2,c,n3)
c
      real a(n1,8),b(8,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_9(a,n1,b,n2,c,n3)
c
      real a(n1,9),b(9,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_10(a,n1,b,n2,c,n3)
c
      real a(n1,10),b(10,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_11(a,n1,b,n2,c,n3)
c
      real a(n1,11),b(11,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_12(a,n1,b,n2,c,n3)
c
      real a(n1,12),b(12,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_13(a,n1,b,n2,c,n3)
c
      real a(n1,13),b(13,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_14(a,n1,b,n2,c,n3)
c
      real a(n1,14),b(14,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_15(a,n1,b,n2,c,n3)
c
      real a(n1,15),b(15,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
         enddo
      enddo
      return
      end
      subroutine mxmur2_16(a,n1,b,n2,c,n3)
c
      real a(n1,16),b(16,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmUR3(a,n1,b,n2,c,n3)
C----------------------------------------------------------------------
C
C     Matrix-vector product routine. 
C     NOTE: Use assembly coded routine if available.
C
C---------------------------------------------------------------------
      REAL A(N1,N2),B(N2,N3),C(N1,N3)
C
      N0=N1*N3
      DO 10 I=1,N0
         C(I,1)=0.
 10   CONTINUE
      if (n3.le.8) then
         if (n3.eq.1) then
            call mxmur3_1(a,n1,b,n2,c,n3)
         elseif (n3.eq.2) then
            call mxmur3_2(a,n1,b,n2,c,n3)
         elseif (n3.eq.3) then
            call mxmur3_3(a,n1,b,n2,c,n3)
         elseif (n3.eq.4) then
            call mxmur3_4(a,n1,b,n2,c,n3)
         elseif (n3.eq.5) then
            call mxmur3_5(a,n1,b,n2,c,n3)
         elseif (n3.eq.6) then
            call mxmur3_6(a,n1,b,n2,c,n3)
         elseif (n3.eq.7) then
            call mxmur3_7(a,n1,b,n2,c,n3)
         else
            call mxmur3_8(a,n1,b,n2,c,n3)
         endif
      elseif (n3.le.16) then
         if (n3.eq.9) then
            call mxmur3_9(a,n1,b,n2,c,n3)
         elseif (n3.eq.10) then
            call mxmur3_10(a,n1,b,n2,c,n3)
         elseif (n3.eq.11) then
            call mxmur3_11(a,n1,b,n2,c,n3)
         elseif (n3.eq.12) then
            call mxmur3_12(a,n1,b,n2,c,n3)
         elseif (n3.eq.13) then
            call mxmur3_13(a,n1,b,n2,c,n3)
         elseif (n3.eq.14) then
            call mxmur3_14(a,n1,b,n2,c,n3)
         elseif (n3.eq.15) then
            call mxmur3_15(a,n1,b,n2,c,n3)
         else
            call mxmur3_16(a,n1,b,n2,c,n3)
         endif
      else
         DO 100 J=1,N3
         DO 100 K=1,N2
         BB=B(K,J)
         DO 100 I=1,N1
            C(I,J)=C(I,J)+A(I,K)*BB
 100     CONTINUE
      endif
      return
      end
c
      subroutine mxmur3_16(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,16),c(n1,16)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         tmp9  =  b(k, 9)
         tmp10 =  b(k,10)
         tmp11 =  b(k,11)
         tmp12 =  b(k,12)
         tmp13 =  b(k,13)
         tmp14 =  b(k,14)
         tmp15 =  b(k,15)
         tmp16 =  b(k,16)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
            c(i,10)  =  c(i,10) + a(i,k) * tmp10
            c(i,11)  =  c(i,11) + a(i,k) * tmp11
            c(i,12)  =  c(i,12) + a(i,k) * tmp12
            c(i,13)  =  c(i,13) + a(i,k) * tmp13
            c(i,14)  =  c(i,14) + a(i,k) * tmp14
            c(i,15)  =  c(i,15) + a(i,k) * tmp15
            c(i,16)  =  c(i,16) + a(i,k) * tmp16
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_15(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,15),c(n1,15)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         tmp9  =  b(k, 9)
         tmp10 =  b(k,10)
         tmp11 =  b(k,11)
         tmp12 =  b(k,12)
         tmp13 =  b(k,13)
         tmp14 =  b(k,14)
         tmp15 =  b(k,15)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
            c(i,10)  =  c(i,10) + a(i,k) * tmp10
            c(i,11)  =  c(i,11) + a(i,k) * tmp11
            c(i,12)  =  c(i,12) + a(i,k) * tmp12
            c(i,13)  =  c(i,13) + a(i,k) * tmp13
            c(i,14)  =  c(i,14) + a(i,k) * tmp14
            c(i,15)  =  c(i,15) + a(i,k) * tmp15
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_14(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,14),c(n1,14)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         tmp9  =  b(k, 9)
         tmp10 =  b(k,10)
         tmp11 =  b(k,11)
         tmp12 =  b(k,12)
         tmp13 =  b(k,13)
         tmp14 =  b(k,14)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
            c(i,10)  =  c(i,10) + a(i,k) * tmp10
            c(i,11)  =  c(i,11) + a(i,k) * tmp11
            c(i,12)  =  c(i,12) + a(i,k) * tmp12
            c(i,13)  =  c(i,13) + a(i,k) * tmp13
            c(i,14)  =  c(i,14) + a(i,k) * tmp14
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_13(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,13),c(n1,13)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         tmp9  =  b(k, 9)
         tmp10 =  b(k,10)
         tmp11 =  b(k,11)
         tmp12 =  b(k,12)
         tmp13 =  b(k,13)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
            c(i,10)  =  c(i,10) + a(i,k) * tmp10
            c(i,11)  =  c(i,11) + a(i,k) * tmp11
            c(i,12)  =  c(i,12) + a(i,k) * tmp12
            c(i,13)  =  c(i,13) + a(i,k) * tmp13
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_12(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,12),c(n1,12)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         tmp9  =  b(k, 9)
         tmp10 =  b(k,10)
         tmp11 =  b(k,11)
         tmp12 =  b(k,12)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
            c(i,10)  =  c(i,10) + a(i,k) * tmp10
            c(i,11)  =  c(i,11) + a(i,k) * tmp11
            c(i,12)  =  c(i,12) + a(i,k) * tmp12
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_11(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,11),c(n1,11)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         tmp9  =  b(k, 9)
         tmp10 =  b(k,10)
         tmp11 =  b(k,11)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
            c(i,10)  =  c(i,10) + a(i,k) * tmp10
            c(i,11)  =  c(i,11) + a(i,k) * tmp11
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_10(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,10),c(n1,10)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         tmp9  =  b(k, 9)
         tmp10 =  b(k,10)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
            c(i,10)  =  c(i,10) + a(i,k) * tmp10
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_9(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,9),c(n1,9)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         tmp9  =  b(k, 9)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
            c(i, 9)  =  c(i, 9) + a(i,k) * tmp9
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_8(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,8),c(n1,8)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         tmp8  =  b(k, 8)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
            c(i, 8)  =  c(i, 8) + a(i,k) * tmp8
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_7(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,7),c(n1,7)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         tmp7  =  b(k, 7)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
            c(i, 7)  =  c(i, 7) + a(i,k) * tmp7
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_6(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,6),c(n1,6)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         tmp6  =  b(k, 6)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
            c(i, 6)  =  c(i, 6) + a(i,k) * tmp6
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_5(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,5),c(n1,5)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         tmp5  =  b(k, 5)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
            c(i, 5)  =  c(i, 5) + a(i,k) * tmp5
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_4(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,4),c(n1,4)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         tmp4  =  b(k, 4)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
            c(i, 4)  =  c(i, 4) + a(i,k) * tmp4
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_3(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,3),c(n1,3)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         tmp3  =  b(k, 3)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
            c(i, 3)  =  c(i, 3) + a(i,k) * tmp3
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_2(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,2),c(n1,2)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         tmp2  =  b(k, 2)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
            c(i, 2)  =  c(i, 2) + a(i,k) * tmp2
         enddo
c
      enddo
c
      return
      end
      subroutine mxmur3_1(a,n1,b,n2,c,n3)
      real a(n1,n2),b(n2,1),c(n1,1)
c
      do k=1,n2
         tmp1  =  b(k, 1)
         do i=1,n1
            c(i, 1)  =  c(i, 1) + a(i,k) * tmp1
         enddo
      enddo
c
      return
      end
C----------------------------------------------------------------------
      subroutine mxmd(a,n1,b,n2,c,n3)
C
C     Matrix-vector product routine. 
C     NOTE: Use assembly coded routine if available.
C
C---------------------------------------------------------------------
      REAL A(N1,N2),B(N2,N3),C(N1,N3)
      REAL ONE,ZERO,EPS
C
C
C
      one=1.0
      zero=0.0
      call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1)
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb(a,n1,b,n2,c,n3)
C-----------------------------------------------------------------------
C
C     Matrix-vector product routine. 
C     NOTE: Use assembly coded routine if available.
C
C----------------------------------------------------------------------
      REAL A(N1,N2),B(N2,N3),C(N1,N3)
C
      integer wdsize
      save    wdsize
      data    wdsize/0/
c
c     First call: determine word size for dgemm/sgemm discrimination, below.
c
      if (wdsize.eq.0) then
         one = 1.0
         eps = 1.e-12
         wdsize = 8
         if (one+eps.eq.1.0) wdsize = 4
      endif
c
      if (n2.le.8) then
         if (n2.eq.1) then
            call mxmfb_1(a,n1,b,n2,c,n3)
         elseif (n2.eq.2) then
            call mxmfb_2(a,n1,b,n2,c,n3)
         elseif (n2.eq.3) then
            call mxmfb_3(a,n1,b,n2,c,n3)
         elseif (n2.eq.4) then
            call mxmfb_4(a,n1,b,n2,c,n3)
         elseif (n2.eq.5) then
            call mxmfb_5(a,n1,b,n2,c,n3)
         elseif (n2.eq.6) then
            call mxmfb_6(a,n1,b,n2,c,n3)
         elseif (n2.eq.7) then
            call mxmfb_7(a,n1,b,n2,c,n3)
         else
            call mxmfb_8(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.16) then
         if (n2.eq.9) then
            call mxmfb_9(a,n1,b,n2,c,n3)
         elseif (n2.eq.10) then
            call mxmfb_10(a,n1,b,n2,c,n3)
         elseif (n2.eq.11) then
            call mxmfb_11(a,n1,b,n2,c,n3)
         elseif (n2.eq.12) then
            call mxmfb_12(a,n1,b,n2,c,n3)
         elseif (n2.eq.13) then
            call mxmfb_13(a,n1,b,n2,c,n3)
         elseif (n2.eq.14) then
            call mxmfb_14(a,n1,b,n2,c,n3)
         elseif (n2.eq.15) then
            call mxmfb_15(a,n1,b,n2,c,n3)
         else
            call mxmfb_16(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.24) then
         if (n2.eq.17) then
            call mxmfb_17(a,n1,b,n2,c,n3)
         elseif (n2.eq.18) then
            call mxmfb_18(a,n1,b,n2,c,n3)
         elseif (n2.eq.19) then
            call mxmfb_19(a,n1,b,n2,c,n3)
         elseif (n2.eq.20) then
            call mxmfb_20(a,n1,b,n2,c,n3)
         elseif (n2.eq.21) then
            call mxmfb_21(a,n1,b,n2,c,n3)
         elseif (n2.eq.22) then
            call mxmfb_22(a,n1,b,n2,c,n3)
         elseif (n2.eq.23) then
            call mxmfb_23(a,n1,b,n2,c,n3)
         elseif (n2.eq.24) then
            call mxmfb_24(a,n1,b,n2,c,n3)
         endif
      else
c
         one=1.0
         zero=0.0
         call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1)
 
      endif
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_1(a,n1,b,n2,c,n3)
c
      real a(n1,1),b(1,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_2(a,n1,b,n2,c,n3)
c
      real a(n1,2),b(2,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_3(a,n1,b,n2,c,n3)
c
      real a(n1,3),b(3,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_4(a,n1,b,n2,c,n3)
c
      real a(n1,4),b(4,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_5(a,n1,b,n2,c,n3)
c
      real a(n1,5),b(5,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_6(a,n1,b,n2,c,n3)
c
      real a(n1,6),b(6,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_7(a,n1,b,n2,c,n3)
c
      real a(n1,7),b(7,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_8(a,n1,b,n2,c,n3)
c
      real a(n1,8),b(8,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_9(a,n1,b,n2,c,n3)
c
      real a(n1,9),b(9,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_10(a,n1,b,n2,c,n3)
c
      real a(n1,10),b(10,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_11(a,n1,b,n2,c,n3)
c
      real a(n1,11),b(11,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_12(a,n1,b,n2,c,n3)
c
      real a(n1,12),b(12,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_13(a,n1,b,n2,c,n3)
c
      real a(n1,13),b(13,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_14(a,n1,b,n2,c,n3)
c
      real a(n1,14),b(14,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_15(a,n1,b,n2,c,n3)
c
      real a(n1,15),b(15,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_16(a,n1,b,n2,c,n3)
c
      real a(n1,16),b(16,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_17(a,n1,b,n2,c,n3)
c
      real a(n1,17),b(17,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_18(a,n1,b,n2,c,n3)
c
      real a(n1,18),b(18,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_19(a,n1,b,n2,c,n3)
c
      real a(n1,19),b(19,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_20(a,n1,b,n2,c,n3)
c
      real a(n1,20),b(20,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_21(a,n1,b,n2,c,n3)
c
      real a(n1,21),b(21,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_22(a,n1,b,n2,c,n3)
c
      real a(n1,22),b(22,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_23(a,n1,b,n2,c,n3)
c
      real a(n1,23),b(23,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
     $             + a(i,23)*b(23,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmfb_24(a,n1,b,n2,c,n3)
c
      real a(n1,24),b(24,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
     $             + a(i,23)*b(23,j)
     $             + a(i,24)*b(24,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3(a,n1,b,n2,c,n3)
C-----------------------------------------------------------------------
C
C     Matrix-vector product routine. 
C     NOTE: Use assembly coded routine if available.
C
C----------------------------------------------------------------------
      REAL A(N1,N2),B(N2,N3),C(N1,N3)
C
      integer wdsize
      save    wdsize
      data    wdsize/0/
c
c     First call: determine word size for dgemm/sgemm discrimination, below.
c
      if (wdsize.eq.0) then
         one = 1.0
         eps = 1.e-12
         wdsize = 8
         if (one+eps.eq.1.0) wdsize = 4
      endif
c
      if (n2.le.8) then
         if (n2.eq.1) then
            call mxmf3_1(a,n1,b,n2,c,n3)
         elseif (n2.eq.2) then
            call mxmf3_2(a,n1,b,n2,c,n3)
         elseif (n2.eq.3) then
            call mxmf3_3(a,n1,b,n2,c,n3)
         elseif (n2.eq.4) then
            call mxmf3_4(a,n1,b,n2,c,n3)
         elseif (n2.eq.5) then
            call mxmf3_5(a,n1,b,n2,c,n3)
         elseif (n2.eq.6) then
            call mxmf3_6(a,n1,b,n2,c,n3)
         elseif (n2.eq.7) then
            call mxmf3_7(a,n1,b,n2,c,n3)
         else
            call mxmf3_8(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.16) then
         if (n2.eq.9) then
            call mxmf3_9(a,n1,b,n2,c,n3)
         elseif (n2.eq.10) then
            call mxmf3_10(a,n1,b,n2,c,n3)
         elseif (n2.eq.11) then
            call mxmf3_11(a,n1,b,n2,c,n3)
         elseif (n2.eq.12) then
            call mxmf3_12(a,n1,b,n2,c,n3)
         elseif (n2.eq.13) then
            call mxmf3_13(a,n1,b,n2,c,n3)
         elseif (n2.eq.14) then
            call mxmf3_14(a,n1,b,n2,c,n3)
         elseif (n2.eq.15) then
            call mxmf3_15(a,n1,b,n2,c,n3)
         else
            call mxmf3_16(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.24) then
         if (n2.eq.17) then
            call mxmf3_17(a,n1,b,n2,c,n3)
         elseif (n2.eq.18) then
            call mxmf3_18(a,n1,b,n2,c,n3)
         elseif (n2.eq.19) then
            call mxmf3_19(a,n1,b,n2,c,n3)
         elseif (n2.eq.20) then
            call mxmf3_20(a,n1,b,n2,c,n3)
         elseif (n2.eq.21) then
            call mxmf3_21(a,n1,b,n2,c,n3)
         elseif (n2.eq.22) then
            call mxmf3_22(a,n1,b,n2,c,n3)
         elseif (n2.eq.23) then
            call mxmf3_23(a,n1,b,n2,c,n3)
         elseif (n2.eq.24) then
            call mxmf3_24(a,n1,b,n2,c,n3)
         endif
      else
c
         one=1.0
         zero=0.0
         call dgemm( 'N','N',n1,n3,n2,ONE,A,N1,B,N2,ZERO,C,N1)
c
c        N0=N1*N3
c        DO 10 I=1,N0
c           C(I,1)=0.
c  10    CONTINUE
c        DO 100 J=1,N3
c        DO 100 K=1,N2
c        BB=B(K,J)
c        DO 100 I=1,N1
c           C(I,J)=C(I,J)+A(I,K)*BB
c 100    CONTINUE
 
      endif
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_1(a,n1,b,n2,c,n3)
c
      real a(n1,1),b(1,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_2(a,n1,b,n2,c,n3)
c
      real a(n1,2),b(2,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_3(a,n1,b,n2,c,n3)
c
      real a(n1,3),b(3,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_4(a,n1,b,n2,c,n3)
c
      real a(n1,4),b(4,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_5(a,n1,b,n2,c,n3)
c
      real a(n1,5),b(5,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_6(a,n1,b,n2,c,n3)
c
      real a(n1,6),b(6,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_7(a,n1,b,n2,c,n3)
c
      real a(n1,7),b(7,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_8(a,n1,b,n2,c,n3)
c
      real a(n1,8),b(8,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_9(a,n1,b,n2,c,n3)
c
      real a(n1,9),b(9,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_10(a,n1,b,n2,c,n3)
c
      real a(n1,10),b(10,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_11(a,n1,b,n2,c,n3)
c
      real a(n1,11),b(11,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_12(a,n1,b,n2,c,n3)
c
      real a(n1,12),b(12,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_13(a,n1,b,n2,c,n3)
c
      real a(n1,13),b(13,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_14(a,n1,b,n2,c,n3)
c
      real a(n1,14),b(14,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_15(a,n1,b,n2,c,n3)
c
      real a(n1,15),b(15,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_16(a,n1,b,n2,c,n3)
c
      real a(n1,16),b(16,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_17(a,n1,b,n2,c,n3)
c
      real a(n1,17),b(17,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_18(a,n1,b,n2,c,n3)
c
      real a(n1,18),b(18,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_19(a,n1,b,n2,c,n3)
c
      real a(n1,19),b(19,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_20(a,n1,b,n2,c,n3)
c
      real a(n1,20),b(20,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_21(a,n1,b,n2,c,n3)
c
      real a(n1,21),b(21,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_22(a,n1,b,n2,c,n3)
c
      real a(n1,22),b(22,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_23(a,n1,b,n2,c,n3)
c
      real a(n1,23),b(23,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
     $             + a(i,23)*b(23,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxmf3_24(a,n1,b,n2,c,n3)
c
      real a(n1,24),b(24,n3),c(n1,n3)
c
      do i=1,n1
         do j=1,n3
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
     $             + a(i,23)*b(23,j)
     $             + a(i,24)*b(24,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxm44(a,n1,b,n2,c,n3)
C-----------------------------------------------------------------------
C
C     NOTE -- this code has been set up with the "mxmf3" routine
c             referenced in memtime.f.   On most machines, the f2
c             and f3 versions give the same performance (f2 is the
c             nekton standard).  On the t3e, f3 is noticeably faster.
c             pff  10/5/98
C
C
C     Matrix-vector product routine. 
C     NOTE: Use assembly coded routine if available.
C
C----------------------------------------------------------------------
      REAL A(N1,N2),B(N2,N3),C(N1,N3)
c
      if (n2.eq.1) then
         call mxm44_2_t(a,n1,b,2,c,n3)
      elseif (n2.eq.2) then
         call mxm44_2_t(a,n1,b,n2,c,n3)
      else
         call mxm44_0_t(a,n1,b,n2,c,n3)
      endif
c
      return
      end
c
c-----------------------------------------------------------------------
      subroutine mxm44_0_t(a, m, b, k, c, n)
*      subroutine matmul44(m, n, k, a, lda, b, ldb, c, ldc)
*      real*8 a(lda,k), b(ldb,n), c(ldc,n)
      real a(m,k), b(k,n), c(m,n)
      real s11, s12, s13, s14, s21, s22, s23, s24
      real s31, s32, s33, s34, s41, s42, s43, s44
c
c matrix multiply with a 4x4 pencil 
c

      mresid = iand(m,3) 
      nresid = iand(n,3) 
      m1 = m - mresid + 1
      n1 = n - nresid + 1

      do i=1,m-mresid,4
        do j=1,n-nresid,4
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0
          s41 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          s32 = 0.0d0
          s42 = 0.0d0
          s13 = 0.0d0
          s23 = 0.0d0
          s33 = 0.0d0
          s43 = 0.0d0
          s14 = 0.0d0
          s24 = 0.0d0
          s34 = 0.0d0
          s44 = 0.0d0
          do l=1,k
            s11 = s11 + a(i,l)*b(l,j)
            s12 = s12 + a(i,l)*b(l,j+1)
            s13 = s13 + a(i,l)*b(l,j+2)
            s14 = s14 + a(i,l)*b(l,j+3)

            s21 = s21 + a(i+1,l)*b(l,j)
            s22 = s22 + a(i+1,l)*b(l,j+1)
            s23 = s23 + a(i+1,l)*b(l,j+2)
            s24 = s24 + a(i+1,l)*b(l,j+3)

            s31 = s31 + a(i+2,l)*b(l,j)
            s32 = s32 + a(i+2,l)*b(l,j+1)
            s33 = s33 + a(i+2,l)*b(l,j+2)
            s34 = s34 + a(i+2,l)*b(l,j+3)

            s41 = s41 + a(i+3,l)*b(l,j)
            s42 = s42 + a(i+3,l)*b(l,j+1)
            s43 = s43 + a(i+3,l)*b(l,j+2)
            s44 = s44 + a(i+3,l)*b(l,j+3)
          enddo
          c(i,j)     = s11 
          c(i,j+1)   = s12 
          c(i,j+2)   = s13
          c(i,j+3)   = s14

          c(i+1,j)   = s21 
          c(i+2,j)   = s31 
          c(i+3,j)   = s41 

          c(i+1,j+1) = s22
          c(i+2,j+1) = s32
          c(i+3,j+1) = s42

          c(i+1,j+2) = s23
          c(i+2,j+2) = s33
          c(i+3,j+2) = s43

          c(i+1,j+3) = s24
          c(i+2,j+3) = s34
          c(i+3,j+3) = s44
        enddo
* Residual when n is not multiple of 4
        if (nresid .ne. 0) then
          if (nresid .eq. 1) then
            s11 = 0.0d0
            s21 = 0.0d0
            s31 = 0.0d0
            s41 = 0.0d0
            do l=1,k
              s11 = s11 + a(i,l)*b(l,n)
              s21 = s21 + a(i+1,l)*b(l,n)
              s31 = s31 + a(i+2,l)*b(l,n)
              s41 = s41 + a(i+3,l)*b(l,n)
            enddo
            c(i,n)     = s11 
            c(i+1,n)   = s21 
            c(i+2,n)   = s31 
            c(i+3,n)   = s41 
          elseif (nresid .eq. 2) then
            s11 = 0.0d0
            s21 = 0.0d0
            s31 = 0.0d0
            s41 = 0.0d0
            s12 = 0.0d0
            s22 = 0.0d0
            s32 = 0.0d0
            s42 = 0.0d0
            do l=1,k
              s11 = s11 + a(i,l)*b(l,j)
              s12 = s12 + a(i,l)*b(l,j+1)

              s21 = s21 + a(i+1,l)*b(l,j)
              s22 = s22 + a(i+1,l)*b(l,j+1)

              s31 = s31 + a(i+2,l)*b(l,j)
              s32 = s32 + a(i+2,l)*b(l,j+1)

              s41 = s41 + a(i+3,l)*b(l,j)
              s42 = s42 + a(i+3,l)*b(l,j+1)
            enddo
            c(i,j)     = s11 
            c(i,j+1)   = s12

            c(i+1,j)   = s21 
            c(i+2,j)   = s31 
            c(i+3,j)   = s41 

            c(i+1,j+1) = s22
            c(i+2,j+1) = s32
            c(i+3,j+1) = s42
          else
            s11 = 0.0d0
            s21 = 0.0d0
            s31 = 0.0d0
            s41 = 0.0d0
            s12 = 0.0d0
            s22 = 0.0d0
            s32 = 0.0d0
            s42 = 0.0d0
            s13 = 0.0d0
            s23 = 0.0d0
            s33 = 0.0d0
            s43 = 0.0d0
            do l=1,k
              s11 = s11 + a(i,l)*b(l,j)
              s12 = s12 + a(i,l)*b(l,j+1)
              s13 = s13 + a(i,l)*b(l,j+2)

              s21 = s21 + a(i+1,l)*b(l,j)
              s22 = s22 + a(i+1,l)*b(l,j+1)
              s23 = s23 + a(i+1,l)*b(l,j+2)

              s31 = s31 + a(i+2,l)*b(l,j)
              s32 = s32 + a(i+2,l)*b(l,j+1)
              s33 = s33 + a(i+2,l)*b(l,j+2)

              s41 = s41 + a(i+3,l)*b(l,j)
              s42 = s42 + a(i+3,l)*b(l,j+1)
              s43 = s43 + a(i+3,l)*b(l,j+2)
            enddo
            c(i,j)     = s11 
            c(i+1,j)   = s21 
            c(i+2,j)   = s31 
            c(i+3,j)   = s41 
            c(i,j+1)   = s12 
            c(i+1,j+1) = s22
            c(i+2,j+1) = s32
            c(i+3,j+1) = s42
            c(i,j+2)   = s13
            c(i+1,j+2) = s23
            c(i+2,j+2) = s33
            c(i+3,j+2) = s43
          endif
        endif
      enddo

* Residual when m is not multiple of 4
      if (mresid .eq. 0) then
        return
      elseif (mresid .eq. 1) then
        do j=1,n-nresid,4
          s11 = 0.0d0
          s12 = 0.0d0
          s13 = 0.0d0
          s14 = 0.0d0
          do l=1,k
            s11 = s11 + a(m,l)*b(l,j)
            s12 = s12 + a(m,l)*b(l,j+1)
            s13 = s13 + a(m,l)*b(l,j+2)
            s14 = s14 + a(m,l)*b(l,j+3)
          enddo
          c(m,j)     = s11 
          c(m,j+1)   = s12 
          c(m,j+2)   = s13
          c(m,j+3)   = s14
        enddo
* mresid is 1, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = 0.0d0
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n)
          enddo
          c(m,n) = s11
          return
        elseif (nresid .eq. 2) then
          s11 = 0.0d0
          s12 = 0.0d0
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n-1)
            s12 = s12 + a(m,l)*b(l,n)
          enddo
          c(m,n-1) = s11
          c(m,n) = s12
          return
        else
          s11 = 0.0d0
          s12 = 0.0d0
          s13 = 0.0d0
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n-2)
            s12 = s12 + a(m,l)*b(l,n-1)
            s13 = s13 + a(m,l)*b(l,n)
          enddo
          c(m,n-2) = s11
          c(m,n-1) = s12
          c(m,n) = s13
          return
        endif          
      elseif (mresid .eq. 2) then
        do j=1,n-nresid,4
          s11 = 0.0d0
          s12 = 0.0d0
          s13 = 0.0d0
          s14 = 0.0d0
          s21 = 0.0d0
          s22 = 0.0d0
          s23 = 0.0d0
          s24 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,j)
            s12 = s12 + a(m-1,l)*b(l,j+1)
            s13 = s13 + a(m-1,l)*b(l,j+2)
            s14 = s14 + a(m-1,l)*b(l,j+3)

            s21 = s21 + a(m,l)*b(l,j)
            s22 = s22 + a(m,l)*b(l,j+1)
            s23 = s23 + a(m,l)*b(l,j+2)
            s24 = s24 + a(m,l)*b(l,j+3)
          enddo
          c(m-1,j)   = s11 
          c(m-1,j+1) = s12 
          c(m-1,j+2) = s13
          c(m-1,j+3) = s14
          c(m,j)     = s21
          c(m,j+1)   = s22 
          c(m,j+2)   = s23
          c(m,j+3)   = s24
        enddo
* mresid is 2, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = 0.0d0
          s21 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n)
          enddo
          c(m-1,n) = s11
          c(m,n) = s21
          return
        elseif (nresid .eq. 2) then
          s11 = 0.0d0
          s21 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n-1)
            s12 = s12 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n-1)
            s22 = s22 + a(m,l)*b(l,n)
          enddo
          c(m-1,n-1) = s11
          c(m-1,n)   = s12
          c(m,n-1)   = s21
          c(m,n)     = s22
          return
        else
          s11 = 0.0d0
          s21 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          s13 = 0.0d0
          s23 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n-2)
            s12 = s12 + a(m-1,l)*b(l,n-1)
            s13 = s13 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n-2)
            s22 = s22 + a(m,l)*b(l,n-1)
            s23 = s23 + a(m,l)*b(l,n)
          enddo
          c(m-1,n-2) = s11
          c(m-1,n-1) = s12
          c(m-1,n)   = s13
          c(m,n-2)   = s21
          c(m,n-1)   = s22
          c(m,n)     = s23
          return
        endif
      else
* mresid is 3
        do j=1,n-nresid,4
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0

          s12 = 0.0d0
          s22 = 0.0d0
          s32 = 0.0d0

          s13 = 0.0d0
          s23 = 0.0d0
          s33 = 0.0d0

          s14 = 0.0d0
          s24 = 0.0d0
          s34 = 0.0d0

          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,j)
            s12 = s12 + a(m-2,l)*b(l,j+1)
            s13 = s13 + a(m-2,l)*b(l,j+2)
            s14 = s14 + a(m-2,l)*b(l,j+3)

            s21 = s21 + a(m-1,l)*b(l,j)
            s22 = s22 + a(m-1,l)*b(l,j+1)
            s23 = s23 + a(m-1,l)*b(l,j+2)
            s24 = s24 + a(m-1,l)*b(l,j+3)

            s31 = s31 + a(m,l)*b(l,j)
            s32 = s32 + a(m,l)*b(l,j+1)
            s33 = s33 + a(m,l)*b(l,j+2)
            s34 = s34 + a(m,l)*b(l,j+3)
          enddo
          c(m-2,j)   = s11 
          c(m-2,j+1) = s12 
          c(m-2,j+2) = s13
          c(m-2,j+3) = s14

          c(m-1,j)   = s21 
          c(m-1,j+1) = s22
          c(m-1,j+2) = s23
          c(m-1,j+3) = s24

          c(m,j)     = s31 
          c(m,j+1)   = s32
          c(m,j+2)   = s33
          c(m,j+3)   = s34
        enddo
* mresid is 3, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n)
          enddo
          c(m-2,n) = s11
          c(m-1,n) = s21
          c(m,n) = s31
          return
        elseif (nresid .eq. 2) then
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          s32 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n-1)
            s12 = s12 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n-1)
            s22 = s22 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n-1)
            s32 = s32 + a(m,l)*b(l,n)
          enddo
          c(m-2,n-1) = s11
          c(m-2,n)   = s12
          c(m-1,n-1) = s21
          c(m-1,n)   = s22
          c(m,n-1)   = s31
          c(m,n)     = s32
          return
        else
          s11 = 0.0d0
          s21 = 0.0d0
          s31 = 0.0d0
          s12 = 0.0d0
          s22 = 0.0d0
          s32 = 0.0d0
          s13 = 0.0d0
          s23 = 0.0d0
          s33 = 0.0d0
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n-2)
            s12 = s12 + a(m-2,l)*b(l,n-1)
            s13 = s13 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n-2)
            s22 = s22 + a(m-1,l)*b(l,n-1)
            s23 = s23 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n-2)
            s32 = s32 + a(m,l)*b(l,n-1)
            s33 = s33 + a(m,l)*b(l,n)
          enddo
          c(m-2,n-2) = s11
          c(m-2,n-1) = s12
          c(m-2,n)   = s13
          c(m-1,n-2) = s21
          c(m-1,n-1) = s22
          c(m-1,n)   = s23
          c(m,n-2)   = s31
          c(m,n-1)   = s32
          c(m,n)     = s33
          return
        endif
      endif

      return
      end
c-----------------------------------------------------------------------
      subroutine mxm44_2_t(a, m, b, k, c, n)
      real a(m,2), b(2,n), c(m,n)

      nresid = iand(n,3) 
      n1 = n - nresid + 1

      do j=1,n-nresid,4
         do i=1,m
            c(i,j) = a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
            c(i,j+1) = a(i,1)*b(1,j+1)
     $             + a(i,2)*b(2,j+1)
            c(i,j+2) = a(i,1)*b(1,j+2)
     $             + a(i,2)*b(2,j+2)
            c(i,j+3) = a(i,1)*b(1,j+3)
     $             + a(i,2)*b(2,j+3)
         enddo
      enddo
      if (nresid .eq. 0) then
        return
      elseif (nresid .eq. 1) then
         do i=1,m
            c(i,n) = a(i,1)*b(1,n)
     $             + a(i,2)*b(2,n)
         enddo
      elseif (nresid .eq. 2) then
         do i=1,m
            c(i,n-1) = a(i,1)*b(1,n-1)
     $             + a(i,2)*b(2,n-1)
            c(i,n) = a(i,1)*b(1,n)
     $             + a(i,2)*b(2,n)
         enddo
      else
         do i=1,m
            c(i,n-2) = a(i,1)*b(1,n-2)
     $             + a(i,2)*b(2,n-2)
            c(i,n-1) = a(i,1)*b(1,n-1)
     $             + a(i,2)*b(2,n-1)
            c(i,n) = a(i,1)*b(1,n)
     $             + a(i,2)*b(2,n)
         enddo
      endif

      return
      end
c-----------------------------------------------------------------------
      subroutine mxmtest(s,nn,cn,mxmt,name,k,ivb)

      real        s(nn,2)   ! MFLOPS
      character*5 cn        ! name
      character*5 name
      external mxmt

      include 'SIZE'
      parameter (lt=4*lx1*ly1*lz1*lelt)
      common /scrns/ a(lt)
      common /scruz/ b(lt)
      common /scrmg/ c(lt)

      integer ll,icalld
      save    ll,icalld
      data    ll,icalld  /1,0/

      if (icalld.eq.0) then    !     Initialize matrices:
         icalld = icalld + 1
         time1 = dnekclock()
         call initab(a,b,lt)
         time2 = dnekclock()-time1
         if (nid.eq.0) write(6,*) 'mxm test init:',lt,time2,name
      endif


      cn = name

c     Rectangular matrix tests

      nn0 = 1
      nn1 = nn
      if (ivb.eq.0) then
         nn0 = lx1
         nn1 = lx1
      endif

      m = k
      do n=nn0,nn1
         n1 = n
         n2 = n
         n3 = n
         if (m.eq.1) n1 = n*n
         if (m.eq.3) n3 = n*n
         if (lt.gt.n1*n3) then
          n13 = max(n1,n3)
          loop = 250000/(n1*n2*n3) + 500
          if (name.eq.'madd ') loop = 200000/(n1*n3) + 5000

c-------------------------------------------------------
c         mem test
c-------------------------------------------------------

          t0    = dnekclock()
          overh = dnekclock()-t0
          time1 = dnekclock()
          do l=1,loop
            if (ll.ge.lt-n1*n3) ll = 1
            call mxmt(a(ll),n1,b(ll),n2,c(ll),n3)
            ll = ll+n1*n3
          enddo
          time2 = dnekclock()
          time = time2-time1 - overh
          iops=loop*n1*n3*(2*n2-1)
          if (name.eq.'madd ') iops = loop*n1*n3
c         write(6,*) loop,time,time2,time1,overh
          flops=iops/(1.0e6*time)
          s(n,1) = flops
c
          timel = time/loop
          if (nid.eq.0) write(6,199) n,n1,n2,n3,flops,timel,name
  199     format(i3,'m',1x,3i6,f10.4,e16.5,3x,a5,' mem')
c
c-------------------------------------------------------
c         fast test
c-------------------------------------------------------
c
          call mxmt(a,n1,b,n2,c,n3)
          t0    = dnekclock()
          overh = dnekclock()-t0
          time1 = dnekclock()
          do l=1,loop
            call mxmt(a,n1,b,n2,c,n3)
          enddo
          time2 = dnekclock()
          time = time2-time1 - overh
          iops=loop*n1*n3*(2*n2-1)
          if (name.eq.'madd ') iops = loop*n1*n3
          flops=iops/(1.0e6*time)
          s(n,2) = flops
          timel = time/loop
c
          if (nid.eq.0) write(6,198) n,n1,n2,n3,flops,timel,name
  198     format(i3,'f',1x,3i6,f10.4,e16.5,3x,a5,' fast')
c
        endif
       enddo
c
      return
      end
c-----------------------------------------------------------------------
      subroutine mxm_analyze(s,a,nn,c,nt,ivb)
      include 'SIZE'

      character*5 c(3,nt)
      real        s(nn,2,nt,3)  ! Measured Mflops, 3 cases
      real        a(nn,2,nt,3)
c                   ^  ^ ^  |__ N^2xN, NxN, NxN^2
c  matrix order N __|  | |__________which mxm
c                      |
c                      |__cached vs. noncached data
 

      integer itmax(200)

      nn0 = 1
      nn1 = nn
      if (ivb.eq.0) then
         nn0 = lx1
         nn1 = lx1
      endif

      do n = nn0,nn1
         fmax = 0.   ! Peak mflops
         do it=1,nt
            ai = 0.
            di = 0.
            do k=1,3
               if (s(n,1,it,k).gt.0) then  ! Take harmonic means of
                  ai = ai + 1./s(n,1,it,k) ! case I II and III for 
                  di = di + 1.             ! mem test, s(n,1...).
               endif
            enddo
            if (ai.gt.0) ai = di/ai
            a(n,1,it,1) = di/ai
            if (ai.gt.fmax.and.c(2,it).ne.'madd ') then
               fmax     = ai
               itmax(n) = it
            endif
         enddo
         it = itmax(n)
         if (nid.eq.0) write(6,3) n,it,c(2,it),(s(n,1,it,k),k=1,3),fmax
    3    format(i3,i2,1x,a5,4f12.0,' Peak harmonic')
      enddo
      call out_anal(s,a,nn,c,nt,itmax,'Harmonic',1,ivb)
c
c     Case by case
c
      do k=1,3
         do n = nn0,nn1
            fmax = 0.   ! Peak mflops
            do it=1,nt
               ai = s(n,1,it,k)
               if (ai.gt.fmax.and.c(2,it).ne.'madd ') then
                  fmax     = ai
                  itmax(n) = it
               endif
            enddo
         enddo
         if (k.eq.1) call out_anal(s,a,nn,c,nt,itmax,'Case N2N',k,ivb)
         if (k.eq.2) call out_anal(s,a,nn,c,nt,itmax,'Case NxN',k,ivb)
         if (k.eq.3) call out_anal(s,a,nn,c,nt,itmax,'Case NN2',k,ivb)
      enddo

      return
      end
c-----------------------------------------------------------------------
      subroutine out_anal(s,a,nn,c,nt,itmax,name8,k,ivb)
      include 'SIZE'

      character*5 c(3,nt)
      real        s(nn,2,nt,3)
      real        a(nn,2,nt,3)
      integer itmax(200)
      character*8 name8

      if (nid.ne.0) return

      nn0 = 1
      nn1 = nn
      if (ivb.eq.0) then
         nn0 = lx1
         nn1 = lx1
      endif


      do n=nn0,nn1
         it = itmax(n)
         write(6,1) n,s(n,1,it,k),c(2,it),name8
    1    format(i4,f14.0,4x,a5,4x,a8,'   MxM MFLOPS')
      enddo

      return
      end
c-----------------------------------------------------------------------
      subroutine mxma(a,n1,b,n2,c,n3)
C
C     Compute  C = A*B , for contiguously packed matrices A,B, and C.
C
C
C     Compile with -r8 option for 64-bit arithmetic, or convert real
c     to real*8
c
      real a(n1,n2),b(n2,n3),c(n1,n3)
c
      include 'SIZE'
      include 'OPCTR'
      include 'TOTAL'
c
#ifdef TIMER
      if (isclld.eq.0) then
          isclld=1
          nrout=nrout+1
          myrout=nrout
          rname(myrout) = 'mxma  '
      endif
      isbcnt = n1*n3*(2*n2-1)
      dct(myrout) = dct(myrout) + (isbcnt)
      ncall(myrout) = ncall(myrout) + 1
      dcount      =      dcount + (isbcnt)
#endif
c
c
      call mxma2(a,n1,b,n2,c,n3)  ! In some cases, this is faster.
c
      return
      end
c-----------------------------------------------------------------------
      subroutine mxma2(a,n1,b,n2,c,n3)
c
c     unrolled loop routine written by paul fischer
c     
c     this version computines C = C + A x B
c
      real a(n1,n2),b(n2,n3),c(n1,n3)
C
      integer wdsize
      save    wdsize
      data    wdsize/0/
c
c     First call: determine word size for dgemm/sgemm discrimination, below.
c
      if (wdsize.eq.0) then
         one = 1.0
         eps = 1.e-12
         wdsize = 8
         if (one+eps.eq.1.0) wdsize = 4
      endif
c
      if (n2.le.8) then
         if (n2.eq.1) then
            call mxa1(a,n1,b,n2,c,n3)
         elseif (n2.eq.2) then
            call mxa2(a,n1,b,n2,c,n3)
         elseif (n2.eq.3) then
            call mxa3(a,n1,b,n2,c,n3)
         elseif (n2.eq.4) then
            call mxa4(a,n1,b,n2,c,n3)
         elseif (n2.eq.5) then
            call mxa5(a,n1,b,n2,c,n3)
         elseif (n2.eq.6) then
            call mxa6(a,n1,b,n2,c,n3)
         elseif (n2.eq.7) then
            call mxa7(a,n1,b,n2,c,n3)
         else
            call mxa8(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.16) then
         if (n2.eq.9) then
            call mxa9(a,n1,b,n2,c,n3)
         elseif (n2.eq.10) then
            call mxa10(a,n1,b,n2,c,n3)
         elseif (n2.eq.11) then
            call mxa11(a,n1,b,n2,c,n3)
         elseif (n2.eq.12) then
            call mxa12(a,n1,b,n2,c,n3)
         elseif (n2.eq.13) then
            call mxa13(a,n1,b,n2,c,n3)
         elseif (n2.eq.14) then
            call mxa14(a,n1,b,n2,c,n3)
         elseif (n2.eq.15) then
            call mxa15(a,n1,b,n2,c,n3)
         else
            call mxa16(a,n1,b,n2,c,n3)
         endif
      elseif (n2.le.24) then
         if (n2.eq.17) then
            call mxa17(a,n1,b,n2,c,n3)
         elseif (n2.eq.18) then
            call mxa18(a,n1,b,n2,c,n3)
         elseif (n2.eq.19) then
            call mxa19(a,n1,b,n2,c,n3)
         elseif (n2.eq.20) then
            call mxa20(a,n1,b,n2,c,n3)
         elseif (n2.eq.21) then
            call mxa21(a,n1,b,n2,c,n3)
         elseif (n2.eq.22) then
            call mxa22(a,n1,b,n2,c,n3)
         elseif (n2.eq.23) then
            call mxa23(a,n1,b,n2,c,n3)
         elseif (n2.eq.24) then
            call mxa24(a,n1,b,n2,c,n3)
         endif
      else
         call mxm44_0a(a,n1,b,n2,c,n3)
      endif
c
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa1(a,n1,b,n2,c,n3)
c
      real a(n1,1),b(1,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa2(a,n1,b,n2,c,n3)
c
      real a(n1,2),b(2,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa3(a,n1,b,n2,c,n3)
c
      real a(n1,3),b(3,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa4(a,n1,b,n2,c,n3)
c
      real a(n1,4),b(4,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa5(a,n1,b,n2,c,n3)
c
      real a(n1,5),b(5,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa6(a,n1,b,n2,c,n3)
c
      real a(n1,6),b(6,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa7(a,n1,b,n2,c,n3)
c
      real a(n1,7),b(7,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa8(a,n1,b,n2,c,n3)
c
      real a(n1,8),b(8,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa9(a,n1,b,n2,c,n3)
c
      real a(n1,9),b(9,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa10(a,n1,b,n2,c,n3)
c
      real a(n1,10),b(10,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa11(a,n1,b,n2,c,n3)
c
      real a(n1,11),b(11,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa12(a,n1,b,n2,c,n3)
c
      real a(n1,12),b(12,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa13(a,n1,b,n2,c,n3)
c
      real a(n1,13),b(13,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa14(a,n1,b,n2,c,n3)
c
      real a(n1,14),b(14,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa15(a,n1,b,n2,c,n3)
c
      real a(n1,15),b(15,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa16(a,n1,b,n2,c,n3)
c
      real a(n1,16),b(16,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa17(a,n1,b,n2,c,n3)
c
      real a(n1,17),b(17,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa18(a,n1,b,n2,c,n3)
c
      real a(n1,18),b(18,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa19(a,n1,b,n2,c,n3)
c
      real a(n1,19),b(19,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa20(a,n1,b,n2,c,n3)
c
      real a(n1,20),b(20,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa21(a,n1,b,n2,c,n3)
c
      real a(n1,21),b(21,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa22(a,n1,b,n2,c,n3)
c
      real a(n1,22),b(22,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa23(a,n1,b,n2,c,n3)
c
      real a(n1,23),b(23,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
     $             + a(i,23)*b(23,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxa24(a,n1,b,n2,c,n3)
c
      real a(n1,24),b(24,n3),c(n1,n3)
c
      do j=1,n3
         do i=1,n1
            c(i,j) = c(i,j)
     $             + a(i,1)*b(1,j)
     $             + a(i,2)*b(2,j)
     $             + a(i,3)*b(3,j)
     $             + a(i,4)*b(4,j)
     $             + a(i,5)*b(5,j)
     $             + a(i,6)*b(6,j)
     $             + a(i,7)*b(7,j)
     $             + a(i,8)*b(8,j)
     $             + a(i,9)*b(9,j)
     $             + a(i,10)*b(10,j)
     $             + a(i,11)*b(11,j)
     $             + a(i,12)*b(12,j)
     $             + a(i,13)*b(13,j)
     $             + a(i,14)*b(14,j)
     $             + a(i,15)*b(15,j)
     $             + a(i,16)*b(16,j)
     $             + a(i,17)*b(17,j)
     $             + a(i,18)*b(18,j)
     $             + a(i,19)*b(19,j)
     $             + a(i,20)*b(20,j)
     $             + a(i,21)*b(21,j)
     $             + a(i,22)*b(22,j)
     $             + a(i,23)*b(23,j)
     $             + a(i,24)*b(24,j)
         enddo
      enddo
      return
      end
c-----------------------------------------------------------------------
      subroutine mxm44_0a(a, m, b, k, c, n)
c
c     routine written by Bruce Curtiss
c
c     subroutine matmul44(m, n, k, a, lda, b, ldb, c, ldc)
c     real a(lda,k), b(ldb,n), c(ldc,n)
c
      real a(m,k), b(k,n), c(m,n)
      real s11, s12, s13, s14, s21, s22, s23, s24
      real s31, s32, s33, s34, s41, s42, s43, s44
c
c matrix multiply with a 4x4 pencil 
c

      mresid = mod(m,4) 
      nresid = mod(n,4) 
      m1 = m - mresid + 1
      n1 = n - nresid + 1

      do i=1,m-mresid,4
        do j=1,n-nresid,4
          s11 = c(i,j)
          s12 = c(i,j+1)
          s13 = c(i,j+2)
          s14 = c(i,j+3)

          s21 = c(i+1,j)
          s31 = c(i+2,j)
          s41 = c(i+3,j)

          s22 = c(i+1,j+1)
          s32 = c(i+2,j+1)
          s42 = c(i+3,j+1)

          s23 = c(i+1,j+2)
          s33 = c(i+2,j+2)
          s43 = c(i+3,j+2)

          s24 = c(i+1,j+3)
          s34 = c(i+2,j+3)
          s44 = c(i+3,j+3)
c
          do l=1,k
            s11 = s11 + a(i,l)*b(l,j)
            s12 = s12 + a(i,l)*b(l,j+1)
            s13 = s13 + a(i,l)*b(l,j+2)
            s14 = s14 + a(i,l)*b(l,j+3)

            s21 = s21 + a(i+1,l)*b(l,j)
            s22 = s22 + a(i+1,l)*b(l,j+1)
            s23 = s23 + a(i+1,l)*b(l,j+2)
            s24 = s24 + a(i+1,l)*b(l,j+3)

            s31 = s31 + a(i+2,l)*b(l,j)
            s32 = s32 + a(i+2,l)*b(l,j+1)
            s33 = s33 + a(i+2,l)*b(l,j+2)
            s34 = s34 + a(i+2,l)*b(l,j+3)

            s41 = s41 + a(i+3,l)*b(l,j)
            s42 = s42 + a(i+3,l)*b(l,j+1)
            s43 = s43 + a(i+3,l)*b(l,j+2)
            s44 = s44 + a(i+3,l)*b(l,j+3)
          enddo
          c(i,j)     = s11 
          c(i,j+1)   = s12 
          c(i,j+2)   = s13
          c(i,j+3)   = s14

          c(i+1,j)   = s21 
          c(i+2,j)   = s31 
          c(i+3,j)   = s41 

          c(i+1,j+1) = s22
          c(i+2,j+1) = s32
          c(i+3,j+1) = s42

          c(i+1,j+2) = s23
          c(i+2,j+2) = s33
          c(i+3,j+2) = s43

          c(i+1,j+3) = s24
          c(i+2,j+3) = s34
          c(i+3,j+3) = s44
        enddo
c Residual when n is not multiple of 4
        if (nresid .ne. 0) then
          if (nresid .eq. 1) then
            s11 = c(i  ,n)
            s21 = c(i+1,n)
            s31 = c(i+2,n)
            s41 = c(i+3,n)
            do l=1,k
              s11 = s11 + a(i,l)*b(l,n)
              s21 = s21 + a(i+1,l)*b(l,n)
              s31 = s31 + a(i+2,l)*b(l,n)
              s41 = s41 + a(i+3,l)*b(l,n)
            enddo
            c(i,n)     = s11 
            c(i+1,n)   = s21 
            c(i+2,n)   = s31 
            c(i+3,n)   = s41 
          elseif (nresid .eq. 2) then
            s11 = c(i  ,j  )
            s12 = c(i  ,j+1)
            s21 = c(i+1,j  )
            s31 = c(i+2,j  )
            s41 = c(i+3,j  )
            s22 = c(i+1,j+1)
            s32 = c(i+2,j+1)
            s42 = c(i+3,j+1)
            do l=1,k
              s11 = s11 + a(i,l)*b(l,j)
              s12 = s12 + a(i,l)*b(l,j+1)

              s21 = s21 + a(i+1,l)*b(l,j)
              s22 = s22 + a(i+1,l)*b(l,j+1)

              s31 = s31 + a(i+2,l)*b(l,j)
              s32 = s32 + a(i+2,l)*b(l,j+1)

              s41 = s41 + a(i+3,l)*b(l,j)
              s42 = s42 + a(i+3,l)*b(l,j+1)
            enddo
            c(i  ,j  ) = s11 
            c(i  ,j+1) = s12
            c(i+1,j  ) = s21 
            c(i+2,j  ) = s31 
            c(i+3,j  ) = s41 
            c(i+1,j+1) = s22
            c(i+2,j+1) = s32
            c(i+3,j+1) = s42
          else
            s11 = c(i,j)
            s21 = c(i+1,j)
            s31 = c(i+2,j)
            s41 = c(i+3,j)
            s12 = c(i,j+1)
            s22 = c(i+1,j+1)
            s32 = c(i+2,j+1)
            s42 = c(i+3,j+1)
            s13 = c(i,j+2)
            s23 = c(i+1,j+2)
            s33 = c(i+2,j+2)
            s43 = c(i+3,j+2)
            do l=1,k
              s11 = s11 + a(i,l)*b(l,j)
              s12 = s12 + a(i,l)*b(l,j+1)
              s13 = s13 + a(i,l)*b(l,j+2)

              s21 = s21 + a(i+1,l)*b(l,j)
              s22 = s22 + a(i+1,l)*b(l,j+1)
              s23 = s23 + a(i+1,l)*b(l,j+2)

              s31 = s31 + a(i+2,l)*b(l,j)
              s32 = s32 + a(i+2,l)*b(l,j+1)
              s33 = s33 + a(i+2,l)*b(l,j+2)

              s41 = s41 + a(i+3,l)*b(l,j)
              s42 = s42 + a(i+3,l)*b(l,j+1)
              s43 = s43 + a(i+3,l)*b(l,j+2)
            enddo
            c(i,j)     = s11 
            c(i+1,j)   = s21 
            c(i+2,j)   = s31 
            c(i+3,j)   = s41 
            c(i,j+1)   = s12 
            c(i+1,j+1) = s22
            c(i+2,j+1) = s32
            c(i+3,j+1) = s42
            c(i,j+2)   = s13
            c(i+1,j+2) = s23
            c(i+2,j+2) = s33
            c(i+3,j+2) = s43
          endif
        endif
      enddo

c Residual when m is not multiple of 4
      if (mresid .eq. 0) then
        return
      elseif (mresid .eq. 1) then
        do j=1,n-nresid,4
          s11 = c(m,j)
          s12 = c(m,j+1)
          s13 = c(m,j+2)
          s14 = c(m,j+3)
          do l=1,k
            s11 = s11 + a(m,l)*b(l,j)
            s12 = s12 + a(m,l)*b(l,j+1)
            s13 = s13 + a(m,l)*b(l,j+2)
            s14 = s14 + a(m,l)*b(l,j+3)
          enddo
          c(m,j)     = s11 
          c(m,j+1)   = s12 
          c(m,j+2)   = s13
          c(m,j+3)   = s14
        enddo
c mresid is 1, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = c(m,n)
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n)
          enddo
          c(m,n) = s11
          return
        elseif (nresid .eq. 2) then
          s11 = c(m,n-1)
          s12 = c(m,n)
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n-1)
            s12 = s12 + a(m,l)*b(l,n)
          enddo
          c(m,n-1) = s11
          c(m,n)   = s12
          return
        else
          s11 = c(m,n-2)
          s12 = c(m,n-1)
          s13 = c(m,n)
          do l=1,k
            s11 = s11 + a(m,l)*b(l,n-2)
            s12 = s12 + a(m,l)*b(l,n-1)
            s13 = s13 + a(m,l)*b(l,n)
          enddo
          c(m,n-2) = s11
          c(m,n-1) = s12
          c(m,n) = s13
          return
        endif          
      elseif (mresid .eq. 2) then
        do j=1,n-nresid,4
          s11 = c(m-1,j)
          s12 = c(m-1,j+1)
          s13 = c(m-1,j+2)
          s14 = c(m-1,j+3)
          s21 = c(m,j)
          s22 = c(m,j+1)
          s23 = c(m,j+2)
          s24 = c(m,j+3)
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,j)
            s12 = s12 + a(m-1,l)*b(l,j+1)
            s13 = s13 + a(m-1,l)*b(l,j+2)
            s14 = s14 + a(m-1,l)*b(l,j+3)

            s21 = s21 + a(m,l)*b(l,j)
            s22 = s22 + a(m,l)*b(l,j+1)
            s23 = s23 + a(m,l)*b(l,j+2)
            s24 = s24 + a(m,l)*b(l,j+3)
          enddo
          c(m-1,j)   = s11 
          c(m-1,j+1) = s12 
          c(m-1,j+2) = s13
          c(m-1,j+3) = s14
          c(m,j)     = s21
          c(m,j+1)   = s22 
          c(m,j+2)   = s23
          c(m,j+3)   = s24
        enddo
c mresid is 2, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = c(m-1,n)
          s21 = c(m,n)
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n)
          enddo
          c(m-1,n) = s11
          c(m,n) = s21
          return
        elseif (nresid .eq. 2) then
          s11 = c(m-1,n-1)
          s12 = c(m-1,n)
          s21 = c(m,n-1)
          s22 = c(m,n)
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n-1)
            s12 = s12 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n-1)
            s22 = s22 + a(m,l)*b(l,n)
          enddo
          c(m-1,n-1) = s11
          c(m-1,n)   = s12
          c(m,n-1)   = s21
          c(m,n)     = s22
          return
        else
          s11 = c(m-1,n-2)
          s12 = c(m-1,n-1)
          s13 = c(m-1,n)
          s21 = c(m,n-2)
          s22 = c(m,n-1)
          s23 = c(m,n)
          do l=1,k
            s11 = s11 + a(m-1,l)*b(l,n-2)
            s12 = s12 + a(m-1,l)*b(l,n-1)
            s13 = s13 + a(m-1,l)*b(l,n)
            s21 = s21 + a(m,l)*b(l,n-2)
            s22 = s22 + a(m,l)*b(l,n-1)
            s23 = s23 + a(m,l)*b(l,n)
          enddo
          c(m-1,n-2) = s11
          c(m-1,n-1) = s12
          c(m-1,n)   = s13
          c(m,n-2)   = s21
          c(m,n-1)   = s22
          c(m,n)     = s23
          return
        endif
      else
c mresid is 3
        do j=1,n-nresid,4
          s11 = c(m-2,j)
          s12 = c(m-2,j+1)
          s13 = c(m-2,j+2)
          s14 = c(m-2,j+3)

          s21 = c(m-1,j)
          s22 = c(m-1,j+1)
          s23 = c(m-1,j+2)
          s24 = c(m-1,j+3)

          s31 = c(m,j)
          s32 = c(m,j+1)
          s33 = c(m,j+2)
          s34 = c(m,j+3)

          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,j)
            s12 = s12 + a(m-2,l)*b(l,j+1)
            s13 = s13 + a(m-2,l)*b(l,j+2)
            s14 = s14 + a(m-2,l)*b(l,j+3)

            s21 = s21 + a(m-1,l)*b(l,j)
            s22 = s22 + a(m-1,l)*b(l,j+1)
            s23 = s23 + a(m-1,l)*b(l,j+2)
            s24 = s24 + a(m-1,l)*b(l,j+3)

            s31 = s31 + a(m,l)*b(l,j)
            s32 = s32 + a(m,l)*b(l,j+1)
            s33 = s33 + a(m,l)*b(l,j+2)
            s34 = s34 + a(m,l)*b(l,j+3)
          enddo
          c(m-2,j)   = s11 
          c(m-2,j+1) = s12 
          c(m-2,j+2) = s13
          c(m-2,j+3) = s14

          c(m-1,j)   = s21 
          c(m-1,j+1) = s22
          c(m-1,j+2) = s23
          c(m-1,j+3) = s24

          c(m,j)     = s31 
          c(m,j+1)   = s32
          c(m,j+2)   = s33
          c(m,j+3)   = s34
        enddo
c mresid is 3, check nresid
        if (nresid .eq. 0) then
          return
        elseif (nresid .eq. 1) then
          s11 = c(m-2,n)
          s21 = c(m-1,n)
          s31 = c(m,n)
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n)
          enddo
          c(m-2,n) = s11
          c(m-1,n) = s21
          c(m,n) = s31
          return
        elseif (nresid .eq. 2) then
          s11 = c(m-2,n-1)
          s12 = c(m-2,n)
          s21 = c(m-1,n-1)
          s22 = c(m-1,n)
          s31 = c(m,n-1)
          s32 = c(m,n)
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n-1)
            s12 = s12 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n-1)
            s22 = s22 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n-1)
            s32 = s32 + a(m,l)*b(l,n)
          enddo
          c(m-2,n-1) = s11
          c(m-2,n)   = s12
          c(m-1,n-1) = s21
          c(m-1,n)   = s22
          c(m,n-1)   = s31
          c(m,n)     = s32
          return
        else
          s11 = c(m-2,n-2)
          s12 = c(m-2,n-1)
          s13 = c(m-2,n)
          s21 = c(m-1,n-2)
          s22 = c(m-1,n-1)
          s23 = c(m-1,n)
          s31 = c(m,n-2)
          s32 = c(m,n-1)
          s33 = c(m,n)
          do l=1,k
            s11 = s11 + a(m-2,l)*b(l,n-2)
            s12 = s12 + a(m-2,l)*b(l,n-1)
            s13 = s13 + a(m-2,l)*b(l,n)
            s21 = s21 + a(m-1,l)*b(l,n-2)
            s22 = s22 + a(m-1,l)*b(l,n-1)
            s23 = s23 + a(m-1,l)*b(l,n)
            s31 = s31 + a(m,l)*b(l,n-2)
            s32 = s32 + a(m,l)*b(l,n-1)
            s33 = s33 + a(m,l)*b(l,n)
          enddo
          c(m-2,n-2) = s11
          c(m-2,n-1) = s12
          c(m-2,n)   = s13
          c(m-1,n-2) = s21
          c(m-1,n-1) = s22
          c(m-1,n)   = s23
          c(m,n-2)   = s31
          c(m,n-1)   = s32
          c(m,n)     = s33
          return
        endif
      endif

      return
      end
c-----------------------------------------------------------------------
      subroutine mxm44_2a(a, m, b, k, c, n)
      real a(m,2), b(2,n), c(m,n)

      nresid = iand(n,3) 
      n1 = n - nresid + 1

      do j=1,n-nresid,4
         do i=1,m
            c(i,j  ) = c(i,j)
     $               + a(i,1)*b(1,j)
     $               + a(i,2)*b(2,j)
            c(i,j+1) = c(i,j+1)
     $               + a(i,1)*b(1,j+1)
     $               + a(i,2)*b(2,j+1)
            c(i,j+2) = c(i,j+2)
     $               + a(i,1)*b(1,j+2)
     $               + a(i,2)*b(2,j+2)
            c(i,j+3) = c(i,j+3)
     $               + a(i,1)*b(1,j+3)
     $               + a(i,2)*b(2,j+3)
         enddo
      enddo
      if (nresid .eq. 0) then
        return
      elseif (nresid .eq. 1) then
         do i=1,m
            c(i,n) = c(i,n)
     $             + a(i,1)*b(1,n)
     $             + a(i,2)*b(2,n)
         enddo
      elseif (nresid .eq. 2) then
         do i=1,m
            c(i,n-1) = c(i,n-1)
     $               + a(i,1)*b(1,n-1)
     $               + a(i,2)*b(2,n-1)
            c(i,n  ) = c(i,n  )
     $               + a(i,1)*b(1,n  )
     $               + a(i,2)*b(2,n  )
         enddo
      else
         do i=1,m
            c(i,n-2) = c(i,n-2)
     $               + a(i,1)*b(1,n-2)
     $               + a(i,2)*b(2,n-2)
            c(i,n-1) = c(i,n-1)
     $               + a(i,1)*b(1,n-1)
     $               + a(i,2)*b(2,n-1)
            c(i,n  ) = c(i,n  )
     $               + a(i,1)*b(1,n  )
     $               + a(i,2)*b(2,n  )
         enddo
      endif

      return
      end
c-----------------------------------------------------------------------
