| 1 | subroutine mxm(a,n1,b,n2,c,n3)
|
|---|
| 2 | c
|
|---|
| 3 | c Compute matrix-matrix product C = A*B
|
|---|
| 4 | c for contiguously packed matrices A,B, and C.
|
|---|
| 5 | c
|
|---|
| 6 | real a(n1,n2),b(n2,n3),c(n1,n3)
|
|---|
| 7 | c
|
|---|
| 8 | include 'SIZE'
|
|---|
| 9 | include 'CTIMER'
|
|---|
| 10 | include 'OPCTR'
|
|---|
| 11 | include 'TOTAL'
|
|---|
| 12 | c
|
|---|
| 13 | integer*8 tt
|
|---|
| 14 | parameter(tt = 32)
|
|---|
| 15 |
|
|---|
| 16 | #ifdef TIMER2
|
|---|
| 17 | if (isclld.eq.0) then
|
|---|
| 18 | isclld=1
|
|---|
| 19 | nrout=nrout+1
|
|---|
| 20 | myrout=nrout
|
|---|
| 21 | rname(myrout) = 'mxm '
|
|---|
| 22 | endif
|
|---|
| 23 | isbcnt = n1*n3*(2*n2-1)
|
|---|
| 24 | dct(myrout) = dct(myrout) + (isbcnt)
|
|---|
| 25 | ncall(myrout) = ncall(myrout) + 1
|
|---|
| 26 | dcount = dcount + (isbcnt)
|
|---|
| 27 | etime1 = dnekclock()
|
|---|
| 28 | #endif
|
|---|
| 29 |
|
|---|
| 30 |
|
|---|
| 31 | #ifdef BGQ
|
|---|
| 32 | if (n2 .eq. 8 .and. mod(n1,4) .eq. 0
|
|---|
| 33 | $ .and. MOD(LOC(a),tt).eq.0
|
|---|
| 34 | $ .and. MOD(LOC(b),tt).eq.0
|
|---|
| 35 | $ .and. MOD(LOC(c),tt).eq.0
|
|---|
| 36 | $ ) then
|
|---|
| 37 | call mxm_bgq_8(a, n1, b, n2, c, n3)
|
|---|
| 38 | goto 111
|
|---|
| 39 | endif
|
|---|
| 40 | if (n2 .eq. 16 .and. mod(n1,4) .eq. 0
|
|---|
| 41 | $ .and. MOD(LOC(a),tt).eq.0
|
|---|
| 42 | $ .and. MOD(LOC(b),tt).eq.0
|
|---|
| 43 | $ .and. MOD(LOC(c),tt).eq.0
|
|---|
| 44 | $ ) then
|
|---|
| 45 | call mxm_bgq_16(a, n1, b, n2, c, n3)
|
|---|
| 46 | goto 111
|
|---|
| 47 | endif
|
|---|
| 48 | if (n2 .eq. 10 .and. mod(n1,4) .eq. 0 .and. mod(n3,2) .eq. 0
|
|---|
| 49 | & .and. MOD(LOC(a),tt).eq.0
|
|---|
| 50 | & .and. MOD(LOC(b),tt).eq.0
|
|---|
| 51 | & .and. MOD(LOC(c),tt).eq.0
|
|---|
| 52 | & ) then
|
|---|
| 53 | call mxm_bgq_10(a, n1, b, n2, c, n3)
|
|---|
| 54 | goto 111
|
|---|
| 55 | endif
|
|---|
| 56 | if (n2 .eq. 6 .and. mod(n1,4) .eq. 0 .and. mod(n3,2) .eq. 0
|
|---|
| 57 | & .and. MOD(LOC(a),tt).eq.0
|
|---|
| 58 | & .and. MOD(LOC(b),tt).eq.0
|
|---|
| 59 | & .and. MOD(LOC(c),tt).eq.0
|
|---|
| 60 | & ) then
|
|---|
| 61 | call mxm_bgq_6(a, n1, b, n2, c, n3)
|
|---|
| 62 | goto 111
|
|---|
| 63 | endif
|
|---|
| 64 | #endif
|
|---|
| 65 |
|
|---|
| 66 | #ifdef XSMM
|
|---|
| 67 | if ((n1*n2*n3)**(1./3) .gt. 6) then
|
|---|
| 68 | call libxsmm_dgemm('N','N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1)
|
|---|
| 69 | goto 111
|
|---|
| 70 | else
|
|---|
| 71 | goto 101
|
|---|
| 72 | endif
|
|---|
| 73 | #endif
|
|---|
| 74 |
|
|---|
| 75 | #ifdef BLAS_MXM
|
|---|
| 76 | call dgemm('N','N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1)
|
|---|
| 77 | goto 111
|
|---|
| 78 | #endif
|
|---|
| 79 |
|
|---|
| 80 | 101 call mxmf2(a,n1,b,n2,c,n3)
|
|---|
| 81 |
|
|---|
| 82 | 111 continue
|
|---|
| 83 | #ifdef TIMER2
|
|---|
| 84 | tmxmf = tmxmf + dnekclock() - etime1
|
|---|
| 85 | #endif
|
|---|
| 86 | return
|
|---|
| 87 | end
|
|---|
| 88 | c-----------------------------------------------------------------------
|
|---|
| 89 | subroutine fgslib_mxm(a,n1,b,n2,c,n3)
|
|---|
| 90 | real a(n1,n2),b(n2,n3),c(n1,n3)
|
|---|
| 91 |
|
|---|
| 92 | call mxmf2(a,n1,b,n2,c,n3)
|
|---|
| 93 |
|
|---|
| 94 | return
|
|---|
| 95 | end
|
|---|