| [e3f356c] | 1 | !!!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~!!!
|
|---|
| 2 | !!! Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
|
|---|
| 3 | !!! and DataRaceBench project contributors. See the DataRaceBench/COPYRIGHT file for details.
|
|---|
| 4 | !!!
|
|---|
| 5 | !!! SPDX-License-Identifier: (BSD-3-Clause)
|
|---|
| 6 | !!!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~!!!
|
|---|
| 7 |
|
|---|
| 8 | !*****************************************************************************
|
|---|
| 9 | !
|
|---|
| 10 | ! adi.F90: This file is part of the PolyBench/Fortran 1.0 test suite.
|
|---|
| 11 | !
|
|---|
| 12 | ! Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|---|
| 13 | ! Web address: http://polybench.sourceforge.net
|
|---|
| 14 | !
|
|---|
| 15 | !*****************************************************************************
|
|---|
| 16 |
|
|---|
| 17 | !!No data race pairs.
|
|---|
| 18 |
|
|---|
| 19 | ! Include polybench common header.
|
|---|
| 20 | #include "polybench/fpolybench.h"
|
|---|
| 21 |
|
|---|
| 22 | ! Include benchmark-specific header.
|
|---|
| 23 | ! Default data type is double, default size is 10x1024x1024.
|
|---|
| 24 | #include "polybench/adi.h"
|
|---|
| 25 |
|
|---|
| 26 | program adi
|
|---|
| 27 | use omp_lib
|
|---|
| 28 | implicit none
|
|---|
| 29 |
|
|---|
| 30 | POLYBENCH_2D_ARRAY_DECL(x,DATA_TYPE,500, 500)
|
|---|
| 31 | POLYBENCH_2D_ARRAY_DECL(a,DATA_TYPE,500, 500)
|
|---|
| 32 | POLYBENCH_2D_ARRAY_DECL(b,DATA_TYPE,500, 500)
|
|---|
| 33 | polybench_declare_prevent_dce_vars
|
|---|
| 34 | polybench_declare_instruments
|
|---|
| 35 |
|
|---|
| 36 | ! Allocation of Arrays
|
|---|
| 37 | POLYBENCH_ALLOC_2D_ARRAY(x, 500, 500)
|
|---|
| 38 | POLYBENCH_ALLOC_2D_ARRAY(a, 500, 500)
|
|---|
| 39 | POLYBENCH_ALLOC_2D_ARRAY(b, 500, 500)
|
|---|
| 40 |
|
|---|
| 41 | ! Initialization
|
|---|
| 42 | call init_array(500, x, a, b)
|
|---|
| 43 |
|
|---|
| 44 | ! Kernel Execution
|
|---|
| 45 | polybench_start_instruments
|
|---|
| 46 |
|
|---|
| 47 | call kernel_adi(10, 500, x, a, b)
|
|---|
| 48 |
|
|---|
| 49 | polybench_stop_instruments
|
|---|
| 50 | polybench_print_instruments
|
|---|
| 51 |
|
|---|
| 52 | ! Prevent dead-code elimination. All live-out data must be printed
|
|---|
| 53 | ! by the function call in argument.
|
|---|
| 54 | polybench_prevent_dce(print_array(500, x));
|
|---|
| 55 |
|
|---|
| 56 | ! Deallocation of Arrays
|
|---|
| 57 | POLYBENCH_DEALLOC_ARRAY(x)
|
|---|
| 58 | POLYBENCH_DEALLOC_ARRAY(a)
|
|---|
| 59 | POLYBENCH_DEALLOC_ARRAY(b)
|
|---|
| 60 |
|
|---|
| 61 | contains
|
|---|
| 62 | ! Array initialization.
|
|---|
| 63 | subroutine init_array(n, x, a, b)
|
|---|
| 64 | implicit none
|
|---|
| 65 |
|
|---|
| 66 | DATA_TYPE, dimension(n, n) :: a
|
|---|
| 67 | DATA_TYPE, dimension(n, n) :: x
|
|---|
| 68 | DATA_TYPE, dimension(n, n) :: b
|
|---|
| 69 | integer :: n
|
|---|
| 70 | integer :: c1, c2, c3, c4
|
|---|
| 71 |
|
|---|
| 72 | if (n >= 1) then
|
|---|
| 73 | !$omp parallel do private(c2,c3,c4)
|
|---|
| 74 | do c1 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 75 | do c2 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 76 | do c3 = 16*c1, merge(16*c1+15,n-1,(16*c1+15)<(n-1))
|
|---|
| 77 | !$omp simd
|
|---|
| 78 | do c4 = 16*c2, merge(16*c2+15,n-1,(16*c2+15)<(n-1))
|
|---|
| 79 | x(c3,c4)=(DBLE(c3)*DBLE(c4+1)+1.0D0)/DBLE(n)
|
|---|
| 80 | a(c3,c4)=(DBLE(c3)*DBLE(c4+2)+2.0D0)/DBLE(n)
|
|---|
| 81 | b(c3,c4)=(DBLE(c3)*DBLE(c4+3)+3.0D0)/DBLE(n)
|
|---|
| 82 | end do
|
|---|
| 83 | end do
|
|---|
| 84 | end do
|
|---|
| 85 | end do
|
|---|
| 86 | !$omp end parallel do
|
|---|
| 87 | end if
|
|---|
| 88 | end subroutine
|
|---|
| 89 |
|
|---|
| 90 | ! DCE code. Must scan the entire live-out data.
|
|---|
| 91 | ! Can be used also to check the correctness of the output.
|
|---|
| 92 |
|
|---|
| 93 | subroutine print_array(n, x)
|
|---|
| 94 | implicit none
|
|---|
| 95 |
|
|---|
| 96 | DATA_TYPE, dimension(n, n) :: x
|
|---|
| 97 | integer :: n
|
|---|
| 98 | integer :: i, j
|
|---|
| 99 |
|
|---|
| 100 | do i = 1, n
|
|---|
| 101 | do j = 1, n
|
|---|
| 102 | write(0, DATA_PRINTF_MODIFIER) x(i, j)
|
|---|
| 103 | if (mod((i * 500 + j), 20) == 0) then
|
|---|
| 104 | write(0, *)
|
|---|
| 105 | end if
|
|---|
| 106 | end do
|
|---|
| 107 | end do
|
|---|
| 108 | write(0, *)
|
|---|
| 109 | end subroutine
|
|---|
| 110 |
|
|---|
| 111 | ! Main computational kernel. The whole function will be timed,
|
|---|
| 112 | ! including the call and return.
|
|---|
| 113 | subroutine kernel_adi(tsteps, n, x, a, b)
|
|---|
| 114 | implicit none
|
|---|
| 115 |
|
|---|
| 116 | DATA_TYPE, dimension(n, n) :: a
|
|---|
| 117 | DATA_TYPE, dimension(n, n) :: x
|
|---|
| 118 | DATA_TYPE, dimension(n, n) :: b
|
|---|
| 119 | integer :: n, tsteps
|
|---|
| 120 | !integer :: i1, i2, t
|
|---|
| 121 | integer :: c0,c2,c8, c9, c15
|
|---|
| 122 |
|
|---|
| 123 | if (n >= 1 .and. tsteps >= 1) then
|
|---|
| 124 | do c0 = 1, tsteps
|
|---|
| 125 | if (n>=2) then
|
|---|
| 126 | !$omp parallel do private(c15,c9,c8)
|
|---|
| 127 | do c2 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 128 | do c8 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 129 | do c9 = merge(2,16*c8,1>16*c8), merge(16*c8+15,n-1,(16*c8+15)<(n-1))
|
|---|
| 130 | !$omp simd
|
|---|
| 131 | do c15 = 16*c2, merge(16*c2+15,n-1,16*c2+15<n-1)
|
|---|
| 132 | B(c15,c9) = B(c15,c9) - A(c15,c9) * A(c15,c9) / B(c15,c9 - 1)
|
|---|
| 133 | end do
|
|---|
| 134 | end do
|
|---|
| 135 | end do
|
|---|
| 136 |
|
|---|
| 137 | do c8 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 138 | do c9 = merge(2,16*c8,2>16*c8), merge(16*c8+15,n-1,(16*c8+15)<(n-1))
|
|---|
| 139 | !$omp simd
|
|---|
| 140 | do c15 = 16*c2, merge(16*c2+15,n-1,16*c2+15<n-1)
|
|---|
| 141 | x(c15,c9) = x(c15,c9) - x(c15,c9) * A(c15,c9) / B(c15,c9 - 1)
|
|---|
| 142 | end do
|
|---|
| 143 | end do
|
|---|
| 144 | end do
|
|---|
| 145 |
|
|---|
| 146 | do c8 = 1, merge(merge(-((-(n-3) + 17) / 16),-((-(n-3) + 15) / 16),16<0),(n-3) / 16,(n-3) * 16 < 0)
|
|---|
| 147 | do c9 = 16*c8, merge(16*c8+15,n-3,16*c8+15<n-3)
|
|---|
| 148 | !$omp simd
|
|---|
| 149 | do c15=16*c2, merge(16*c2+15,n-1,16*c2+15 < n-1)
|
|---|
| 150 | X(c15,n - c9 - 2) = (X(c15,n - 2 - c9) - X(c15,n - 2 - c9 - 1) * A(c15,n - c9 - 3)) / B(c15,n - 3 - c9)
|
|---|
| 151 | end do
|
|---|
| 152 | end do
|
|---|
| 153 | end do
|
|---|
| 154 | end do
|
|---|
| 155 | end if
|
|---|
| 156 |
|
|---|
| 157 | !$omp parallel do private(c15)
|
|---|
| 158 | do c2 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 159 | !$omp simd
|
|---|
| 160 | do c15=16*c2, merge(16*c2+15,n-1,16*c2+15<n-1)
|
|---|
| 161 | x(c15,n-1) = x(c15,n-1)/b(c15,n-1)
|
|---|
| 162 | end do
|
|---|
| 163 | end do
|
|---|
| 164 | !$omp end parallel do
|
|---|
| 165 |
|
|---|
| 166 | if (n >= 2) then
|
|---|
| 167 | !$omp parallel do private(c15, c9, c8)
|
|---|
| 168 | do c2 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 169 | do c8 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 170 | do c9 = merge(2,16*c8,2>16*c8), merge(16*c8+15,n-1,16*c8+15<n-1)
|
|---|
| 171 | !$omp simd
|
|---|
| 172 | do c15 = 16*c2, merge(16*c2+15,n-1,16*c2+15<n-1)
|
|---|
| 173 | B(c9,c15) = B(c9,c15) - A(c9,c15) * A(c9,c15) / B(c9 - 1,c15)
|
|---|
| 174 | end do
|
|---|
| 175 | end do
|
|---|
| 176 | end do
|
|---|
| 177 |
|
|---|
| 178 | do c8 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 179 | do c9 = merge(2,16*c8,2>16*c8), merge(16*c8+15,n-1,16*c8+15<n-1)
|
|---|
| 180 | !$omp simd
|
|---|
| 181 | do c15 = 16*c2, merge(16*c2+15,n-1,16*c2+15<n-1)
|
|---|
| 182 | X(c9,c15) = X(c9,c15) - X(c9 - 1,c15) * A(c9,c15) / B(c9 - 1,c15)
|
|---|
| 183 | end do
|
|---|
| 184 | end do
|
|---|
| 185 | end do
|
|---|
| 186 |
|
|---|
| 187 | do c8 = 1, merge(merge(-((-(n-3) + 17) / 16),-((-(n-3) + 15) / 16),16<0),(n-3) / 16,(n-3) * 16 < 0)
|
|---|
| 188 | do c9 = 16*c8, merge(16*c8+15,n-3,16*c8+15<n-3)
|
|---|
| 189 | !$omp simd
|
|---|
| 190 | do c15=16*c2, merge(16*c2+15,n-1,16*c2+15 < n-1)
|
|---|
| 191 | X(n - 2 - c9,c15) = (X(n - 2 - c9,c15) - X(n - c9 - 3,c15) * A(n - 3 - c9,c15)) / B(n - 2 - c9,c15)
|
|---|
| 192 | end do
|
|---|
| 193 | end do
|
|---|
| 194 | end do
|
|---|
| 195 |
|
|---|
| 196 | end do
|
|---|
| 197 | !$omp end parallel do
|
|---|
| 198 | end if
|
|---|
| 199 |
|
|---|
| 200 | !$omp parallel do private(c15)
|
|---|
| 201 | do c2 = 1, merge(merge(-((-(n-1) + 17) / 16),-((-(n-1) + 15) / 16),16<0),(n-1) / 16,(n-1) * 16 < 0)
|
|---|
| 202 | !$omp simd
|
|---|
| 203 | do c15 = 16*c2, merge(16*c2+15,n-1,16*c2+15<n-1)
|
|---|
| 204 | X(n - 1,c15) = X(n - 1,c15) / B(n - 1,c15)
|
|---|
| 205 | end do
|
|---|
| 206 | end do
|
|---|
| 207 | !$omp end parallel do
|
|---|
| 208 | end do
|
|---|
| 209 | end if
|
|---|
| 210 |
|
|---|
| 211 |
|
|---|
| 212 | end subroutine
|
|---|
| 213 |
|
|---|
| 214 | end program
|
|---|