source: CIVL/examples/fortran/nek5000/core/mxm_bgq.f

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100755
File size: 9.4 KB
Line 
1 subroutine mxm_bgq_8(a,n1,b,n2,c,n3)
2 implicit none
3
4 integer n1, n2, n3
5 real(8) a(n1,n2),b(n2,n3)
6 real(8) c(n1,n3)
7
8 integer i, j
9 vector(real(8)) av1, av2, av3, av4, av5, av6, av7, av8
10 vector(real(8)) bv1, bsv1, bsv2, bsv3, bsv4
11 vector(real(8)) bv2, bsv5, bsv6, bsv7, bsv8
12 vector(real(8)) cv
13
14 call alignx(32, a(1,1))
15 call alignx(32, b(1,1))
16 call alignx(32, c(1,1))
17
18 do i = 1, n1, 4
19 av1 = vec_ld(0,a(i,1))
20 av2 = vec_ld(0,a(i,2))
21 av3 = vec_ld(0,a(i,3))
22 av4 = vec_ld(0,a(i,4))
23 av5 = vec_ld(0,a(i,5))
24 av6 = vec_ld(0,a(i,6))
25 av7 = vec_ld(0,a(i,7))
26 av8 = vec_ld(0,a(i,8))
27
28 do j = 1, n3
29 bv1 = vec_ld(0,b(1,j))
30 bv2 = vec_ld(0,b(5,j))
31 bsv1 = vec_splat(bv1, 0)
32 bsv2 = vec_splat(bv1, 1)
33 bsv3 = vec_splat(bv1, 2)
34 bsv4 = vec_splat(bv1, 3)
35 bsv5 = vec_splat(bv2, 0)
36 bsv6 = vec_splat(bv2, 1)
37 bsv7 = vec_splat(bv2, 2)
38 bsv8 = vec_splat(bv2, 3)
39
40 cv = vec_mul(av1, bsv1)
41 cv = vec_madd(av2, bsv2, cv)
42 cv = vec_madd(av3, bsv3, cv)
43 cv = vec_madd(av4, bsv4, cv)
44 cv = vec_madd(av5, bsv5, cv)
45 cv = vec_madd(av6, bsv6, cv)
46 cv = vec_madd(av7, bsv7, cv)
47 cv = vec_madd(av8, bsv8, cv)
48
49 call vec_st(cv, 0, c(i,j))
50 end do
51 end do
52 return
53 end subroutine mxm_bgq_8
54
55 subroutine mxm_bgq_16(a,n1,b,n2,c,n3)
56 implicit none
57
58 integer n1, n2, n3
59 real(8) a(n1,n2),b(n2,n3)
60 real(8) c(n1,n3)
61
62 integer i, j
63
64 vector(real(8)) av1, av2, av3, av4, av5, av6, av7, av8
65 vector(real(8)) av9, av10, av11, av12, av13, av14, av15, av16
66 vector(real(8)) bv1, bsv1, bsv2, bsv3, bsv4
67 vector(real(8)) bv2, bsv5, bsv6, bsv7, bsv8
68 vector(real(8)) bv3, bsv9, bsv10, bsv11, bsv12
69 vector(real(8)) bv4, bsv13, bsv14, bsv15, bsv16
70
71 vector(real(8)) cv
72
73 call alignx(32, a(1,1))
74 call alignx(32, b(1,1))
75 call alignx(32, c(1,1))
76
77 do i = 1, n1, 4
78 av1 = vec_ld(0,a(i,1))
79 av2 = vec_ld(0,a(i,2))
80 av3 = vec_ld(0,a(i,3))
81 av4 = vec_ld(0,a(i,4))
82 av5 = vec_ld(0,a(i,5))
83 av6 = vec_ld(0,a(i,6))
84 av7 = vec_ld(0,a(i,7))
85 av8 = vec_ld(0,a(i,8))
86 av9 = vec_ld(0,a(i,9))
87 av10 = vec_ld(0,a(i,10))
88 av11 = vec_ld(0,a(i,11))
89 av12 = vec_ld(0,a(i,12))
90 av13 = vec_ld(0,a(i,13))
91 av14 = vec_ld(0,a(i,14))
92 av15 = vec_ld(0,a(i,15))
93 av16 = vec_ld(0,a(i,16))
94
95 do j = 1, n3
96 bv1 = vec_ld(0,b(1,j))
97 bv2 = vec_ld(0,b(5,j))
98 bv3 = vec_ld(0,b(9,j))
99 bv4 = vec_ld(0,b(13,j))
100
101 bsv1 = vec_splat(bv1, 0)
102 bsv2 = vec_splat(bv1, 1)
103 bsv3 = vec_splat(bv1, 2)
104 bsv4 = vec_splat(bv1, 3)
105 bsv5 = vec_splat(bv2, 0)
106 bsv6 = vec_splat(bv2, 1)
107 bsv7 = vec_splat(bv2, 2)
108 bsv8 = vec_splat(bv2, 3)
109 bsv9 = vec_splat(bv3, 0)
110 bsv10 = vec_splat(bv3, 1)
111 bsv11 = vec_splat(bv3, 2)
112 bsv12 = vec_splat(bv3, 3)
113 bsv13 = vec_splat(bv4, 0)
114 bsv14 = vec_splat(bv4, 1)
115 bsv15 = vec_splat(bv4, 2)
116 bsv16 = vec_splat(bv4, 3)
117
118 cv = vec_mul(av1, bsv1)
119 cv = vec_madd(av2, bsv2, cv)
120 cv = vec_madd(av3, bsv3, cv)
121 cv = vec_madd(av4, bsv4, cv)
122 cv = vec_madd(av5, bsv5, cv)
123 cv = vec_madd(av6, bsv6, cv)
124 cv = vec_madd(av7, bsv7, cv)
125 cv = vec_madd(av8, bsv8, cv)
126 cv = vec_madd(av9, bsv9, cv)
127 cv = vec_madd(av10, bsv10, cv)
128 cv = vec_madd(av11, bsv11, cv)
129 cv = vec_madd(av12, bsv12, cv)
130 cv = vec_madd(av13, bsv13, cv)
131 cv = vec_madd(av14, bsv14, cv)
132 cv = vec_madd(av15, bsv15, cv)
133 cv = vec_madd(av16, bsv16, cv)
134
135 call vec_st(cv, 0, c(i,j))
136 end do
137 end do
138 return
139 end subroutine mxm_bgq_16
140
141
142 subroutine mxm_bgq_6(a,n1,b,n2,c,n3)
143 implicit none
144
145 integer n1, n2, n3
146 real(8) a(n1,n2),b(n2,n3)
147 real(8) c(n1,n3)
148
149 integer i, j
150
151 vector(real(8)) av1, av2, av3, av4, av5, av6
152 vector(real(8)) bv1, bsv1, bsv2, bsv3, bsv4
153 vector(real(8)) bv2, bsv5, bsv6
154
155 vector(real(8)) cv
156
157 call alignx(32, a(1,1))
158 call alignx(32, b(1,1))
159 call alignx(32, c(1,1))
160
161 do i = 1, n1, 4
162 av1 = vec_ld(0,a(i,1))
163 av2 = vec_ld(0,a(i,2))
164 av3 = vec_ld(0,a(i,3))
165 av4 = vec_ld(0,a(i,4))
166 av5 = vec_ld(0,a(i,5))
167 av6 = vec_ld(0,a(i,6))
168
169 do j = 1, n3, 2
170 bv1 = vec_ld(0,b(1,j))
171 bv2 = vec_ld(0,b(5,j))
172
173 bsv1 = vec_splat(bv1, 0)
174 bsv2 = vec_splat(bv1, 1)
175 bsv3 = vec_splat(bv1, 2)
176 bsv4 = vec_splat(bv1, 3)
177 bsv5 = vec_splat(bv2, 0)
178 bsv6 = vec_splat(bv2, 1)
179
180 cv = vec_mul(av1, bsv1)
181 cv = vec_madd(av2, bsv2, cv)
182 cv = vec_madd(av3, bsv3, cv)
183 cv = vec_madd(av4, bsv4, cv)
184 cv = vec_madd(av5, bsv5, cv)
185 cv = vec_madd(av6, bsv6, cv)
186
187 call vec_st(cv, 0, c(i,j))
188
189 bv1 = vec_ld(0,b(9,j))
190
191 bsv1 = vec_splat(bv2, 2)
192 bsv2 = vec_splat(bv2, 3)
193 bsv3 = vec_splat(bv1, 0)
194 bsv4 = vec_splat(bv1, 1)
195 bsv5 = vec_splat(bv1, 2)
196 bsv6 = vec_splat(bv1, 3)
197
198 cv = vec_mul(av1, bsv1)
199 cv = vec_madd(av2, bsv2, cv)
200 cv = vec_madd(av3, bsv3, cv)
201 cv = vec_madd(av4, bsv4, cv)
202 cv = vec_madd(av5, bsv5, cv)
203 cv = vec_madd(av6, bsv6, cv)
204
205 call vec_st(cv, 0, c(i,j+1))
206 end do
207 end do
208 return
209 end subroutine mxm_bgq_6
210
211 subroutine mxm_bgq_10(a,n1,b,n2,c,n3)
212 implicit none
213
214 integer n1, n2, n3
215 real(8) a(n1,n2),b(n2,n3)
216 real(8) c(n1,n3)
217
218 integer i, j
219
220 vector(real(8)) av1, av2, av3, av4, av5, av6, av7, av8
221 vector(real(8)) av9, av10
222 vector(real(8)) bv1, bsv1, bsv2, bsv3, bsv4
223 vector(real(8)) bv2, bsv5, bsv6, bsv7, bsv8
224 vector(real(8)) bv3, bsv9, bsv10
225 vector(real(8)) cv
226
227 call alignx(32, a(1,1))
228 call alignx(32, b(1,1))
229 call alignx(32, c(1,1))
230
231 do i = 1, n1, 4
232 av1 = vec_ld(0,a(i,1))
233 av2 = vec_ld(0,a(i,2))
234 av3 = vec_ld(0,a(i,3))
235 av4 = vec_ld(0,a(i,4))
236 av5 = vec_ld(0,a(i,5))
237 av6 = vec_ld(0,a(i,6))
238 av7 = vec_ld(0,a(i,7))
239 av8 = vec_ld(0,a(i,8))
240 av9 = vec_ld(0,a(i,9))
241 av10 = vec_ld(0,a(i,10))
242
243 do j = 1, n3, 2
244 bv1 = vec_ld(0,b(1,j))
245 bv2 = vec_ld(0,b(5,j))
246 bv3 = vec_ld(0,b(9,j))
247
248 bsv1 = vec_splat(bv1, 0)
249 bsv2 = vec_splat(bv1, 1)
250 bsv3 = vec_splat(bv1, 2)
251 bsv4 = vec_splat(bv1, 3)
252 bsv5 = vec_splat(bv2, 0)
253 bsv6 = vec_splat(bv2, 1)
254 bsv7 = vec_splat(bv2, 2)
255 bsv8 = vec_splat(bv2, 3)
256 bsv9 = vec_splat(bv3, 0)
257 bsv10 = vec_splat(bv3, 1)
258
259 cv = vec_mul(av1, bsv1)
260 cv = vec_madd(av2, bsv2, cv)
261 cv = vec_madd(av3, bsv3, cv)
262 cv = vec_madd(av4, bsv4, cv)
263 cv = vec_madd(av5, bsv5, cv)
264 cv = vec_madd(av6, bsv6, cv)
265 cv = vec_madd(av7, bsv7, cv)
266 cv = vec_madd(av8, bsv8, cv)
267 cv = vec_madd(av9, bsv9, cv)
268 cv = vec_madd(av10, bsv10, cv)
269
270 call vec_st(cv, 0, c(i,j))
271
272 bv1 = vec_ld(0,b(13,j))
273 bv2 = vec_ld(0,b(17,j))
274
275 bsv1 = vec_splat(bv3, 2)
276 bsv2 = vec_splat(bv3, 3)
277 bsv3 = vec_splat(bv1, 0)
278 bsv4 = vec_splat(bv1, 1)
279 bsv5 = vec_splat(bv1, 2)
280 bsv6 = vec_splat(bv1, 3)
281 bsv7 = vec_splat(bv2, 0)
282 bsv8 = vec_splat(bv2, 1)
283 bsv9 = vec_splat(bv2, 2)
284 bsv10 = vec_splat(bv2, 3)
285
286 cv = vec_mul(av1, bsv1)
287 cv = vec_madd(av2, bsv2, cv)
288 cv = vec_madd(av3, bsv3, cv)
289 cv = vec_madd(av4, bsv4, cv)
290 cv = vec_madd(av5, bsv5, cv)
291 cv = vec_madd(av6, bsv6, cv)
292 cv = vec_madd(av7, bsv7, cv)
293 cv = vec_madd(av8, bsv8, cv)
294 cv = vec_madd(av9, bsv9, cv)
295 cv = vec_madd(av10, bsv10, cv)
296
297 call vec_st(cv, 0, c(i,j+1))
298 end do
299 end do
300 return
301 end subroutine mxm_bgq_10
302
Note: See TracBrowser for help on using the repository browser.