subroutine mxmf2(A,N1,B,N2,C,N3) c c unrolled loop version c real a(n1,n2),b(n2,n3),c(n1,n3) if (n2.le.8) then if (n2.eq.1) then call mxf1(a,n1,b,n2,c,n3) elseif (n2.eq.2) then call mxf2(a,n1,b,n2,c,n3) elseif (n2.eq.3) then call mxf3(a,n1,b,n2,c,n3) elseif (n2.eq.4) then call mxf4(a,n1,b,n2,c,n3) elseif (n2.eq.5) then call mxf5(a,n1,b,n2,c,n3) elseif (n2.eq.6) then call mxf6(a,n1,b,n2,c,n3) elseif (n2.eq.7) then call mxf7(a,n1,b,n2,c,n3) else call mxf8(a,n1,b,n2,c,n3) endif elseif (n2.le.16) then if (n2.eq.9) then call mxf9(a,n1,b,n2,c,n3) elseif (n2.eq.10) then call mxf10(a,n1,b,n2,c,n3) elseif (n2.eq.11) then call mxf11(a,n1,b,n2,c,n3) elseif (n2.eq.12) then call mxf12(a,n1,b,n2,c,n3) elseif (n2.eq.13) then call mxf13(a,n1,b,n2,c,n3) elseif (n2.eq.14) then call mxf14(a,n1,b,n2,c,n3) elseif (n2.eq.15) then call mxf15(a,n1,b,n2,c,n3) else call mxf16(a,n1,b,n2,c,n3) endif elseif (n2.le.24) then if (n2.eq.17) then call mxf17(a,n1,b,n2,c,n3) elseif (n2.eq.18) then call mxf18(a,n1,b,n2,c,n3) elseif (n2.eq.19) then call mxf19(a,n1,b,n2,c,n3) elseif (n2.eq.20) then call mxf20(a,n1,b,n2,c,n3) elseif (n2.eq.21) then call mxf21(a,n1,b,n2,c,n3) elseif (n2.eq.22) then call mxf22(a,n1,b,n2,c,n3) elseif (n2.eq.23) then call mxf23(a,n1,b,n2,c,n3) elseif (n2.eq.24) then call mxf24(a,n1,b,n2,c,n3) endif else call mxm44_0(a,n1,b,n2,c,n3) endif return end c----------------------------------------------------------------------- subroutine mxf1(a,n1,b,n2,c,n3) c real a(n1,1),b(1,n3),c(n1,n3) c do j=1,n3 do i=1,n1 c(i,j) = a(i,1)*b(1,j) enddo enddo return end c----------------------------------------------------------------------- subroutine mxf2(a,n1,b,n2,c,n3) c real a(n1,2),b(2,n3),c(n1,n3) c do j=1,n3 do i=1,n1 c(i,j) = a(i,1)*b(1,j) $ + a(i,2)*b(2,j) enddo enddo return end c----------------------------------------------------------------------- subroutine mxf3(a,n1,b,n2,c,n3) c real a(n1,3),b(3,n3),c(n1,n3) c do j=1,n3 do i=1,n1 c(i,j) = a(i,1)*b(1,j) $ + a(i,2)*b(2,j) $ + a(i,3)*b(3,j) enddo enddo return end c----------------------------------------------------------------------- subroutine mxf4(a,n1,b,n2,c,n3) c real a(n1,4),b(4,n3),c(n1,n3) c do j=1,n3 do i=1,n1 c(i,j) = a(i,1)*b(1,j) $ + a(i,2)*b(2,j) $ + a(i,3)*b(3,j) $ + a(i,4)*b(4,j) enddo enddo return end c----------------------------------------------------------------------- c----------------------------------------------------------------------- c subroutine mxf5 .. mxf24 omitted for brevity c----------------------------------------------------------------------- c----------------------------------------------------------------------- subroutine mxm44_0(a, m, b, k, c, n) c c matrix multiply with a 4x4 pencil c real a(m,k), b(k,n), c(m,n) real s11, s12, s13, s14, s21, s22, s23, s24 real s31, s32, s33, s34, s41, s42, s43, s44 mresid = iand(m,3) nresid = iand(n,3) m1 = m - mresid + 1 n1 = n - nresid + 1 do i=1,m-mresid,4 do j=1,n-nresid,4 s11 = 0.0d0 s21 = 0.0d0 s31 = 0.0d0 s41 = 0.0d0 s12 = 0.0d0 s22 = 0.0d0 s32 = 0.0d0 s42 = 0.0d0 s13 = 0.0d0 s23 = 0.0d0 s33 = 0.0d0 s43 = 0.0d0 s14 = 0.0d0 s24 = 0.0d0 s34 = 0.0d0 s44 = 0.0d0 do l=1,k s11 = s11 + a(i,l)*b(l,j) s12 = s12 + a(i,l)*b(l,j+1) s13 = s13 + a(i,l)*b(l,j+2) s14 = s14 + a(i,l)*b(l,j+3) s21 = s21 + a(i+1,l)*b(l,j) s22 = s22 + a(i+1,l)*b(l,j+1) s23 = s23 + a(i+1,l)*b(l,j+2) s24 = s24 + a(i+1,l)*b(l,j+3) s31 = s31 + a(i+2,l)*b(l,j) s32 = s32 + a(i+2,l)*b(l,j+1) s33 = s33 + a(i+2,l)*b(l,j+2) s34 = s34 + a(i+2,l)*b(l,j+3) s41 = s41 + a(i+3,l)*b(l,j) s42 = s42 + a(i+3,l)*b(l,j+1) s43 = s43 + a(i+3,l)*b(l,j+2) s44 = s44 + a(i+3,l)*b(l,j+3) enddo c(i,j) = s11 c(i,j+1) = s12 c(i,j+2) = s13 c(i,j+3) = s14 c(i+1,j) = s21 c(i+2,j) = s31 c(i+3,j) = s41 c(i+1,j+1) = s22 c(i+2,j+1) = s32 c(i+3,j+1) = s42 c(i+1,j+2) = s23 c(i+2,j+2) = s33 c(i+3,j+2) = s43 c(i+1,j+3) = s24 c(i+2,j+3) = s34 c(i+3,j+3) = s44 enddo * Residual when n is not multiple of 4 if (nresid .ne. 0) then if (nresid .eq. 1) then s11 = 0.0d0 s21 = 0.0d0 s31 = 0.0d0 s41 = 0.0d0 do l=1,k s11 = s11 + a(i,l)*b(l,n) s21 = s21 + a(i+1,l)*b(l,n) s31 = s31 + a(i+2,l)*b(l,n) s41 = s41 + a(i+3,l)*b(l,n) enddo c(i,n) = s11 c(i+1,n) = s21 c(i+2,n) = s31 c(i+3,n) = s41 elseif (nresid .eq. 2) then s11 = 0.0d0 s21 = 0.0d0 s31 = 0.0d0 s41 = 0.0d0 s12 = 0.0d0 s22 = 0.0d0 s32 = 0.0d0 s42 = 0.0d0 do l=1,k s11 = s11 + a(i,l)*b(l,j) s12 = s12 + a(i,l)*b(l,j+1) s21 = s21 + a(i+1,l)*b(l,j) s22 = s22 + a(i+1,l)*b(l,j+1) s31 = s31 + a(i+2,l)*b(l,j) s32 = s32 + a(i+2,l)*b(l,j+1) s41 = s41 + a(i+3,l)*b(l,j) s42 = s42 + a(i+3,l)*b(l,j+1) enddo c(i,j) = s11 c(i,j+1) = s12 c(i+1,j) = s21 c(i+2,j) = s31 c(i+3,j) = s41 c(i+1,j+1) = s22 c(i+2,j+1) = s32 c(i+3,j+1) = s42 else s11 = 0.0d0 s21 = 0.0d0 s31 = 0.0d0 s41 = 0.0d0 s12 = 0.0d0 s22 = 0.0d0 s32 = 0.0d0 s42 = 0.0d0 s13 = 0.0d0 s23 = 0.0d0 s33 = 0.0d0 s43 = 0.0d0 do l=1,k s11 = s11 + a(i,l)*b(l,j) s12 = s12 + a(i,l)*b(l,j+1) s13 = s13 + a(i,l)*b(l,j+2) s21 = s21 + a(i+1,l)*b(l,j) s22 = s22 + a(i+1,l)*b(l,j+1) s23 = s23 + a(i+1,l)*b(l,j+2) s31 = s31 + a(i+2,l)*b(l,j) s32 = s32 + a(i+2,l)*b(l,j+1) s33 = s33 + a(i+2,l)*b(l,j+2) s41 = s41 + a(i+3,l)*b(l,j) s42 = s42 + a(i+3,l)*b(l,j+1) s43 = s43 + a(i+3,l)*b(l,j+2) enddo c(i,j) = s11 c(i+1,j) = s21 c(i+2,j) = s31 c(i+3,j) = s41 c(i,j+1) = s12 c(i+1,j+1) = s22 c(i+2,j+1) = s32 c(i+3,j+1) = s42 c(i,j+2) = s13 c(i+1,j+2) = s23 c(i+2,j+2) = s33 c(i+3,j+2) = s43 endif endif enddo * Residual when m is not multiple of 4 if (mresid .eq. 0) then return elseif (mresid .eq. 1) then do j=1,n-nresid,4 s11 = 0.0d0 s12 = 0.0d0 s13 = 0.0d0 s14 = 0.0d0 do l=1,k s11 = s11 + a(m,l)*b(l,j) s12 = s12 + a(m,l)*b(l,j+1) s13 = s13 + a(m,l)*b(l,j+2) s14 = s14 + a(m,l)*b(l,j+3) enddo c(m,j) = s11 c(m,j+1) = s12 c(m,j+2) = s13 c(m,j+3) = s14 enddo * mresid is 1, check nresid if (nresid .eq. 0) then return elseif (nresid .eq. 1) then s11 = 0.0d0 do l=1,k s11 = s11 + a(m,l)*b(l,n) enddo c(m,n) = s11 return elseif (nresid .eq. 2) then s11 = 0.0d0 s12 = 0.0d0 do l=1,k s11 = s11 + a(m,l)*b(l,n-1) s12 = s12 + a(m,l)*b(l,n) enddo c(m,n-1) = s11 c(m,n) = s12 return else s11 = 0.0d0 s12 = 0.0d0 s13 = 0.0d0 do l=1,k s11 = s11 + a(m,l)*b(l,n-2) s12 = s12 + a(m,l)*b(l,n-1) s13 = s13 + a(m,l)*b(l,n) enddo c(m,n-2) = s11 c(m,n-1) = s12 c(m,n) = s13 return endif elseif (mresid .eq. 2) then do j=1,n-nresid,4 s11 = 0.0d0 s12 = 0.0d0 s13 = 0.0d0 s14 = 0.0d0 s21 = 0.0d0 s22 = 0.0d0 s23 = 0.0d0 s24 = 0.0d0 do l=1,k s11 = s11 + a(m-1,l)*b(l,j) s12 = s12 + a(m-1,l)*b(l,j+1) s13 = s13 + a(m-1,l)*b(l,j+2) s14 = s14 + a(m-1,l)*b(l,j+3) s21 = s21 + a(m,l)*b(l,j) s22 = s22 + a(m,l)*b(l,j+1) s23 = s23 + a(m,l)*b(l,j+2) s24 = s24 + a(m,l)*b(l,j+3) enddo c(m-1,j) = s11 c(m-1,j+1) = s12 c(m-1,j+2) = s13 c(m-1,j+3) = s14 c(m,j) = s21 c(m,j+1) = s22 c(m,j+2) = s23 c(m,j+3) = s24 enddo * mresid is 2, check nresid if (nresid .eq. 0) then return elseif (nresid .eq. 1) then s11 = 0.0d0 s21 = 0.0d0 do l=1,k s11 = s11 + a(m-1,l)*b(l,n) s21 = s21 + a(m,l)*b(l,n) enddo c(m-1,n) = s11 c(m,n) = s21 return elseif (nresid .eq. 2) then s11 = 0.0d0 s21 = 0.0d0 s12 = 0.0d0 s22 = 0.0d0 do l=1,k s11 = s11 + a(m-1,l)*b(l,n-1) s12 = s12 + a(m-1,l)*b(l,n) s21 = s21 + a(m,l)*b(l,n-1) s22 = s22 + a(m,l)*b(l,n) enddo c(m-1,n-1) = s11 c(m-1,n) = s12 c(m,n-1) = s21 c(m,n) = s22 return else s11 = 0.0d0 s21 = 0.0d0 s12 = 0.0d0 s22 = 0.0d0 s13 = 0.0d0 s23 = 0.0d0 do l=1,k s11 = s11 + a(m-1,l)*b(l,n-2) s12 = s12 + a(m-1,l)*b(l,n-1) s13 = s13 + a(m-1,l)*b(l,n) s21 = s21 + a(m,l)*b(l,n-2) s22 = s22 + a(m,l)*b(l,n-1) s23 = s23 + a(m,l)*b(l,n) enddo c(m-1,n-2) = s11 c(m-1,n-1) = s12 c(m-1,n) = s13 c(m,n-2) = s21 c(m,n-1) = s22 c(m,n) = s23 return endif else * mresid is 3 do j=1,n-nresid,4 s11 = 0.0d0 s21 = 0.0d0 s31 = 0.0d0 s12 = 0.0d0 s22 = 0.0d0 s32 = 0.0d0 s13 = 0.0d0 s23 = 0.0d0 s33 = 0.0d0 s14 = 0.0d0 s24 = 0.0d0 s34 = 0.0d0 do l=1,k s11 = s11 + a(m-2,l)*b(l,j) s12 = s12 + a(m-2,l)*b(l,j+1) s13 = s13 + a(m-2,l)*b(l,j+2) s14 = s14 + a(m-2,l)*b(l,j+3) s21 = s21 + a(m-1,l)*b(l,j) s22 = s22 + a(m-1,l)*b(l,j+1) s23 = s23 + a(m-1,l)*b(l,j+2) s24 = s24 + a(m-1,l)*b(l,j+3) s31 = s31 + a(m,l)*b(l,j) s32 = s32 + a(m,l)*b(l,j+1) s33 = s33 + a(m,l)*b(l,j+2) s34 = s34 + a(m,l)*b(l,j+3) enddo c(m-2,j) = s11 c(m-2,j+1) = s12 c(m-2,j+2) = s13 c(m-2,j+3) = s14 c(m-1,j) = s21 c(m-1,j+1) = s22 c(m-1,j+2) = s23 c(m-1,j+3) = s24 c(m,j) = s31 c(m,j+1) = s32 c(m,j+2) = s33 c(m,j+3) = s34 enddo * mresid is 3, check nresid if (nresid .eq. 0) then return elseif (nresid .eq. 1) then s11 = 0.0d0 s21 = 0.0d0 s31 = 0.0d0 do l=1,k s11 = s11 + a(m-2,l)*b(l,n) s21 = s21 + a(m-1,l)*b(l,n) s31 = s31 + a(m,l)*b(l,n) enddo c(m-2,n) = s11 c(m-1,n) = s21 c(m,n) = s31 return elseif (nresid .eq. 2) then s11 = 0.0d0 s21 = 0.0d0 s31 = 0.0d0 s12 = 0.0d0 s22 = 0.0d0 s32 = 0.0d0 do l=1,k s11 = s11 + a(m-2,l)*b(l,n-1) s12 = s12 + a(m-2,l)*b(l,n) s21 = s21 + a(m-1,l)*b(l,n-1) s22 = s22 + a(m-1,l)*b(l,n) s31 = s31 + a(m,l)*b(l,n-1) s32 = s32 + a(m,l)*b(l,n) enddo c(m-2,n-1) = s11 c(m-2,n) = s12 c(m-1,n-1) = s21 c(m-1,n) = s22 c(m,n-1) = s31 c(m,n) = s32 return else s11 = 0.0d0 s21 = 0.0d0 s31 = 0.0d0 s12 = 0.0d0 s22 = 0.0d0 s32 = 0.0d0 s13 = 0.0d0 s23 = 0.0d0 s33 = 0.0d0 do l=1,k s11 = s11 + a(m-2,l)*b(l,n-2) s12 = s12 + a(m-2,l)*b(l,n-1) s13 = s13 + a(m-2,l)*b(l,n) s21 = s21 + a(m-1,l)*b(l,n-2) s22 = s22 + a(m-1,l)*b(l,n-1) s23 = s23 + a(m-1,l)*b(l,n) s31 = s31 + a(m,l)*b(l,n-2) s32 = s32 + a(m,l)*b(l,n-1) s33 = s33 + a(m,l)*b(l,n) enddo c(m-2,n-2) = s11 c(m-2,n-1) = s12 c(m-2,n) = s13 c(m-1,n-2) = s21 c(m-1,n-1) = s22 c(m-1,n) = s23 c(m,n-2) = s31 c(m,n-1) = s32 c(m,n) = s33 return endif endif return end