| 1 | c-----------------------------------------------------------------------
|
|---|
| 2 | subroutine setupcomm(comm,newcomm,newcommg,path_in,session_in)
|
|---|
| 3 | include 'mpif.h'
|
|---|
| 4 | include 'SIZE'
|
|---|
| 5 | include 'PARALLEL'
|
|---|
| 6 | include 'TSTEP'
|
|---|
| 7 | include 'INPUT'
|
|---|
| 8 |
|
|---|
| 9 | integer comm, newcomm, newcommg
|
|---|
| 10 | character session_in*(*), path_in*(*)
|
|---|
| 11 | logical flag
|
|---|
| 12 |
|
|---|
| 13 | common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal
|
|---|
| 14 |
|
|---|
| 15 | integer nid_global_root(0:nsessmax-1)
|
|---|
| 16 | character*132 session_mult(0:nsessmax-1), path_mult(0:nsessmax-1)
|
|---|
| 17 |
|
|---|
| 18 | logical ifhigh
|
|---|
| 19 | logical mpi_is_initialized
|
|---|
| 20 |
|
|---|
| 21 | integer*8 ntags
|
|---|
| 22 |
|
|---|
| 23 | call mpi_initialized(mpi_is_initialized, ierr)
|
|---|
| 24 | if (.not.mpi_is_initialized) call mpi_init(ierr)
|
|---|
| 25 |
|
|---|
| 26 | call mpi_comm_dup(comm,newcommg,ierr)
|
|---|
| 27 | newcomm = newcommg
|
|---|
| 28 | nekcomm = newcommg
|
|---|
| 29 |
|
|---|
| 30 | call mpi_comm_size(nekcomm,np_global,ierr)
|
|---|
| 31 | call mpi_comm_rank(nekcomm,nid_global,ierr)
|
|---|
| 32 |
|
|---|
| 33 | ! check upper tag size limit
|
|---|
| 34 | call mpi_comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,ntags,flag,ierr)
|
|---|
| 35 | if (ntags .lt. np_global) then
|
|---|
| 36 | if(nid_global.eq.0) write(6,*) 'ABORT: MPI_TAG_UB too small!'
|
|---|
| 37 | call exitt
|
|---|
| 38 | endif
|
|---|
| 39 |
|
|---|
| 40 | ! set defaults
|
|---|
| 41 | nid = nid_global
|
|---|
| 42 | ifneknek = .false.
|
|---|
| 43 | ifneknekc = .false. ! session are uncoupled
|
|---|
| 44 | nsessions = 1
|
|---|
| 45 |
|
|---|
| 46 | ierr = 0
|
|---|
| 47 | nlin = 0
|
|---|
| 48 | if (nid .eq. 0) then
|
|---|
| 49 | l = ltrunc(session_in,len(session_in))
|
|---|
| 50 | if (l .gt. 0) then
|
|---|
| 51 | call blank(session_mult(0),132)
|
|---|
| 52 | call chcopy(session_mult(0), session_in, l)
|
|---|
| 53 | l = ltrunc(path_in,len(path_in))
|
|---|
| 54 | call blank(path_mult(0) ,132)
|
|---|
| 55 | call chcopy(path_mult(0), path_in, l)
|
|---|
| 56 | else
|
|---|
| 57 | !write(6,*) 'Reading session file ...'
|
|---|
| 58 | open (unit=8,file='SESSION.NAME',status='old',err=24)
|
|---|
| 59 | 21 read (8,*,END=22)
|
|---|
| 60 | nlin = nlin + 1
|
|---|
| 61 | goto 21
|
|---|
| 62 | 22 rewind(8)
|
|---|
| 63 | if (nlin.gt.2) read(8,*,err=24) nsessions
|
|---|
| 64 | if (nsessions.gt.1) read(8,*,err=24) ifneknekc
|
|---|
| 65 | do n=0,nsessions-1
|
|---|
| 66 | call blank(session_mult(n),132)
|
|---|
| 67 | call blank(path_mult(n) ,132)
|
|---|
| 68 | read(8,11,err=24) session_mult(n)
|
|---|
| 69 | read(8,11,err=24) path_mult(n)
|
|---|
| 70 | if (nsessions.gt.1) read(8,*,err=24) npsess(n)
|
|---|
| 71 | enddo
|
|---|
| 72 | 11 format(a132)
|
|---|
| 73 | close(8)
|
|---|
| 74 | endif
|
|---|
| 75 | if (nsessions.gt.1)
|
|---|
| 76 | $ write(6,*) 'Number of sessions:',nsessions
|
|---|
| 77 | goto 23
|
|---|
| 78 | 24 ierr = 1
|
|---|
| 79 | endif
|
|---|
| 80 | 23 continue
|
|---|
| 81 | call err_chk(ierr,' Error while reading SESSION.NAME!$')
|
|---|
| 82 |
|
|---|
| 83 | call bcast(nsessions,ISIZE)
|
|---|
| 84 | if (nsessions .gt. nsessmax)
|
|---|
| 85 | & call exitti('nsessmax in SIZE too low!$',nsessmax)
|
|---|
| 86 | if (nsessions .gt. 1) ifneknek = .true.
|
|---|
| 87 |
|
|---|
| 88 | call bcast(ifneknekc,LSIZE)
|
|---|
| 89 | do n = 0,nsessions-1
|
|---|
| 90 | call bcast(npsess(n),ISIZE)
|
|---|
| 91 | call bcast(session_mult(n),132*CSIZE)
|
|---|
| 92 | call bcast(path_mult(n),132*CSIZE)
|
|---|
| 93 | enddo
|
|---|
| 94 |
|
|---|
| 95 | ! single session run
|
|---|
| 96 | if (.not.ifneknek) then
|
|---|
| 97 | ifneknekc = .false.
|
|---|
| 98 | session = session_mult(0)
|
|---|
| 99 | path = path_mult(0)
|
|---|
| 100 | amgfile = session
|
|---|
| 101 | return
|
|---|
| 102 | endif
|
|---|
| 103 |
|
|---|
| 104 | c Check if specified number of ranks in each session is consistent
|
|---|
| 105 | c with the total number of ranks
|
|---|
| 106 | npall=0
|
|---|
| 107 | do n=0,nsessions-1
|
|---|
| 108 | npall=npall+npsess(n)
|
|---|
| 109 | enddo
|
|---|
| 110 | if (npall.ne.np_global)
|
|---|
| 111 | & call exitti('Number of ranks does not match!$',npall)
|
|---|
| 112 |
|
|---|
| 113 | c Assign key for splitting into multiple groups
|
|---|
| 114 | nid_global_root_next=0
|
|---|
| 115 | do n=0,nsessions-1
|
|---|
| 116 | nid_global_root(n)=nid_global_root_next
|
|---|
| 117 | nid_global_root_next=nid_global_root(n)+npsess(n)
|
|---|
| 118 | if (nid_global.ge.nid_global_root(n).and.
|
|---|
| 119 | & nid_global.lt.nid_global_root_next) idsess = n
|
|---|
| 120 | enddo
|
|---|
| 121 | call mpi_comm_split(comm,idsess,nid,newcomm,ierr)
|
|---|
| 122 |
|
|---|
| 123 | session = session_mult(idsess)
|
|---|
| 124 | path = path_mult (idsess)
|
|---|
| 125 |
|
|---|
| 126 | if (ifneknekc) then
|
|---|
| 127 | if (nsessions.gt.2) call exitti(
|
|---|
| 128 | & 'More than 2 coupled sessions are currently not supported!$',
|
|---|
| 129 | $ nsessions)
|
|---|
| 130 | endif
|
|---|
| 131 |
|
|---|
| 132 | amgfile = session
|
|---|
| 133 |
|
|---|
| 134 | return
|
|---|
| 135 | end
|
|---|
| 136 | c---------------------------------------------------------------------
|
|---|
| 137 | subroutine iniproc()
|
|---|
| 138 | include 'SIZE'
|
|---|
| 139 | include 'PARALLEL'
|
|---|
| 140 | include 'INPUT'
|
|---|
| 141 | include 'mpif.h'
|
|---|
| 142 |
|
|---|
| 143 | common /nekmpi/ nid_,np_,nekcomm,nekgroup,nekreal
|
|---|
| 144 |
|
|---|
| 145 | logical flag
|
|---|
| 146 | integer*8 isize_mpi, lb
|
|---|
| 147 |
|
|---|
| 148 | nid = mynode()
|
|---|
| 149 | nid_ = nid
|
|---|
| 150 | np = numnodes()
|
|---|
| 151 | np_ = np
|
|---|
| 152 |
|
|---|
| 153 | nio = -1 ! Default io flag
|
|---|
| 154 | if (nid.eq.0) nio=0 ! Only node 0 writes
|
|---|
| 155 |
|
|---|
| 156 | if (nid.eq.nio) then
|
|---|
| 157 | if (ifneknek) then
|
|---|
| 158 | call set_stdout(' ',idsess)
|
|---|
| 159 | else
|
|---|
| 160 | call set_stdout(' ',-1)
|
|---|
| 161 | endif
|
|---|
| 162 | endif
|
|---|
| 163 |
|
|---|
| 164 | if (wdsize .eq. 4)
|
|---|
| 165 | $ call exitti('Single precision mode not supported!',wdsize)
|
|---|
| 166 |
|
|---|
| 167 | call MPI_Type_Get_Extent(MPI_DOUBLE_PRECISION,lb,isize_mpi,ierr)
|
|---|
| 168 | if (isize_mpi .ne. wdsize) then
|
|---|
| 169 | call exitti('MPI real size does not match$',isize_mpi)
|
|---|
| 170 | endif
|
|---|
| 171 |
|
|---|
| 172 | call MPI_Type_Get_Extent(MPI_INTEGER,lb,isize_mpi,ierr)
|
|---|
| 173 | if (isize_mpi .ne. isize) then
|
|---|
| 174 | call exitti('MPI integer size does not match$',isize_mpi)
|
|---|
| 175 | endif
|
|---|
| 176 |
|
|---|
| 177 | call MPI_Type_Get_Extent(MPI_INTEGER8,lb,isize_mpi,ierr)
|
|---|
| 178 | if (isize_mpi .ne. isize8) then
|
|---|
| 179 | call exitti('MPI integer8 size does not match$',isize_mpi)
|
|---|
| 180 | endif
|
|---|
| 181 |
|
|---|
| 182 | PID = 0
|
|---|
| 183 | NULLPID=0
|
|---|
| 184 | NODE0=0
|
|---|
| 185 | NODE= NID+1
|
|---|
| 186 |
|
|---|
| 187 | C Test timer accuracy
|
|---|
| 188 | edif = 0.0
|
|---|
| 189 | do i = 1,10
|
|---|
| 190 | e1 = dnekclock()
|
|---|
| 191 | e2 = dnekclock()
|
|---|
| 192 | edif = edif + e2-e1
|
|---|
| 193 | enddo
|
|---|
| 194 | edif = edif/10.
|
|---|
| 195 |
|
|---|
| 196 | call fgslib_crystal_setup(cr_h,nekcomm,np) ! set cr handle to new instance
|
|---|
| 197 | return
|
|---|
| 198 | end
|
|---|
| 199 | c-----------------------------------------------------------------------
|
|---|
| 200 | subroutine gop( x, w, op, n)
|
|---|
| 201 |
|
|---|
| 202 | c Global vector commutative operation
|
|---|
| 203 |
|
|---|
| 204 | include 'CTIMER'
|
|---|
| 205 |
|
|---|
| 206 | include 'mpif.h'
|
|---|
| 207 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 208 |
|
|---|
| 209 | real x(n), w(n)
|
|---|
| 210 | character*3 op
|
|---|
| 211 |
|
|---|
| 212 | if (ifsync) call nekgsync()
|
|---|
| 213 |
|
|---|
| 214 | #ifdef TIMER
|
|---|
| 215 | if (icalld.eq.0) then
|
|---|
| 216 | tgop =0.0d0
|
|---|
| 217 | ngop =0
|
|---|
| 218 | icalld=1
|
|---|
| 219 | endif
|
|---|
| 220 | ngop = ngop + 1
|
|---|
| 221 | etime1=dnekclock()
|
|---|
| 222 | #endif
|
|---|
| 223 | c
|
|---|
| 224 | if (op.eq.'+ ') then
|
|---|
| 225 | call mpi_allreduce(x,w,n,MPI_DOUBLE_PRECISION,mpi_sum,nekcomm,ie)
|
|---|
| 226 | elseif (op.EQ.'M ') then
|
|---|
| 227 | call mpi_allreduce(x,w,n,MPI_DOUBLE_PRECISION,mpi_max,nekcomm,ie)
|
|---|
| 228 | elseif (op.EQ.'m ') then
|
|---|
| 229 | call mpi_allreduce(x,w,n,MPI_DOUBLE_PRECISION,mpi_min,nekcomm,ie)
|
|---|
| 230 | elseif (op.EQ.'* ') then
|
|---|
| 231 | call mpi_allreduce(x,w,n,MPI_DOUBLE_PRECISION,mpi_prod,nekcomm,ie)
|
|---|
| 232 | else
|
|---|
| 233 | write(6,*) nid,' OP ',op,' not supported. ABORT in GOP.'
|
|---|
| 234 | call exitt
|
|---|
| 235 | endif
|
|---|
| 236 |
|
|---|
| 237 | call copy(x,w,n)
|
|---|
| 238 |
|
|---|
| 239 | #ifdef TIMER
|
|---|
| 240 | tgop =tgop +(dnekclock()-etime1)
|
|---|
| 241 | #endif
|
|---|
| 242 |
|
|---|
| 243 | return
|
|---|
| 244 | end
|
|---|
| 245 | c-----------------------------------------------------------------------
|
|---|
| 246 | subroutine igop( x, w, op, n)
|
|---|
| 247 | c
|
|---|
| 248 | c Global vector commutative operation
|
|---|
| 249 | c
|
|---|
| 250 | include 'mpif.h'
|
|---|
| 251 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 252 |
|
|---|
| 253 | integer x(n), w(n)
|
|---|
| 254 | character*3 op
|
|---|
| 255 |
|
|---|
| 256 | if (op.eq.'+ ') then
|
|---|
| 257 | call mpi_allreduce (x,w,n,mpi_integer,mpi_sum ,nekcomm,ierr)
|
|---|
| 258 | elseif (op.EQ.'M ') then
|
|---|
| 259 | call mpi_allreduce (x,w,n,mpi_integer,mpi_max ,nekcomm,ierr)
|
|---|
| 260 | elseif (op.EQ.'m ') then
|
|---|
| 261 | call mpi_allreduce (x,w,n,mpi_integer,mpi_min ,nekcomm,ierr)
|
|---|
| 262 | elseif (op.EQ.'* ') then
|
|---|
| 263 | call mpi_allreduce (x,w,n,mpi_integer,mpi_prod,nekcomm,ierr)
|
|---|
| 264 | else
|
|---|
| 265 | write(6,*) nid,' OP ',op,' not supported. ABORT in igop.'
|
|---|
| 266 | call exitt
|
|---|
| 267 | endif
|
|---|
| 268 |
|
|---|
| 269 | call icopy(x,w,n)
|
|---|
| 270 |
|
|---|
| 271 | return
|
|---|
| 272 | end
|
|---|
| 273 | c-----------------------------------------------------------------------
|
|---|
| 274 | subroutine i8gop( x, w, op, n)
|
|---|
| 275 | c
|
|---|
| 276 | c Global vector commutative operation
|
|---|
| 277 | c
|
|---|
| 278 | include 'mpif.h'
|
|---|
| 279 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 280 |
|
|---|
| 281 | integer*8 x(n), w(n)
|
|---|
| 282 | character*3 op
|
|---|
| 283 |
|
|---|
| 284 | if (op.eq.'+ ') then
|
|---|
| 285 | call mpi_allreduce (x,w,n,mpi_integer8,mpi_sum ,nekcomm,ierr)
|
|---|
| 286 | elseif (op.EQ.'M ') then
|
|---|
| 287 | call mpi_allreduce (x,w,n,mpi_integer8,mpi_max ,nekcomm,ierr)
|
|---|
| 288 | elseif (op.EQ.'m ') then
|
|---|
| 289 | call mpi_allreduce (x,w,n,mpi_integer8,mpi_min ,nekcomm,ierr)
|
|---|
| 290 | elseif (op.EQ.'* ') then
|
|---|
| 291 | call mpi_allreduce (x,w,n,mpi_integer8,mpi_prod,nekcomm,ierr)
|
|---|
| 292 | else
|
|---|
| 293 | write(6,*) nid,' OP ',op,' not supported. ABORT in igop.'
|
|---|
| 294 | call exitt
|
|---|
| 295 | endif
|
|---|
| 296 |
|
|---|
| 297 | call i8copy(x,w,n)
|
|---|
| 298 |
|
|---|
| 299 | return
|
|---|
| 300 | end
|
|---|
| 301 | c-----------------------------------------------------------------------
|
|---|
| 302 | subroutine csend(mtype,buf,len,jnid,jpid)
|
|---|
| 303 | include 'mpif.h'
|
|---|
| 304 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 305 | real*4 buf(1)
|
|---|
| 306 |
|
|---|
| 307 | call mpi_send (buf,len,mpi_byte,jnid,mtype,nekcomm,ierr)
|
|---|
| 308 |
|
|---|
| 309 | return
|
|---|
| 310 | end
|
|---|
| 311 | c-----------------------------------------------------------------------
|
|---|
| 312 | subroutine crecv(mtype,buf,lenm)
|
|---|
| 313 | include 'mpif.h'
|
|---|
| 314 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 315 | integer status(mpi_status_size)
|
|---|
| 316 | C
|
|---|
| 317 | real*4 buf(1)
|
|---|
| 318 | len = lenm
|
|---|
| 319 | jnid = mpi_any_source
|
|---|
| 320 |
|
|---|
| 321 | call mpi_recv (buf,len,mpi_byte
|
|---|
| 322 | $ ,jnid,mtype,nekcomm,status,ierr)
|
|---|
| 323 | c
|
|---|
| 324 | if (len.gt.lenm) then
|
|---|
| 325 | write(6,*) nid,'long message in mpi_crecv:',len,lenm
|
|---|
| 326 | call exitt
|
|---|
| 327 | endif
|
|---|
| 328 | c
|
|---|
| 329 | return
|
|---|
| 330 | end
|
|---|
| 331 | c-----------------------------------------------------------------------
|
|---|
| 332 | subroutine crecv2(mtype,buf,lenm,jnid)
|
|---|
| 333 | include 'mpif.h'
|
|---|
| 334 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 335 | integer status(mpi_status_size)
|
|---|
| 336 | C
|
|---|
| 337 | real*4 buf(1)
|
|---|
| 338 | len = lenm
|
|---|
| 339 |
|
|---|
| 340 | call mpi_recv (buf,len,mpi_byte
|
|---|
| 341 | $ ,jnid,mtype,nekcomm,status,ierr)
|
|---|
| 342 | c
|
|---|
| 343 | if (len.gt.lenm) then
|
|---|
| 344 | write(6,*) nid,'long message in mpi_crecv:',len,lenm
|
|---|
| 345 | call exitt
|
|---|
| 346 | endif
|
|---|
| 347 | c
|
|---|
| 348 | return
|
|---|
| 349 | end
|
|---|
| 350 | c-----------------------------------------------------------------------
|
|---|
| 351 | subroutine crecv3(mtype,buf,len,lenm)
|
|---|
| 352 | include 'mpif.h'
|
|---|
| 353 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 354 | integer status(mpi_status_size)
|
|---|
| 355 | C
|
|---|
| 356 | real*4 buf(1)
|
|---|
| 357 | len = lenm
|
|---|
| 358 | jnid = mpi_any_source
|
|---|
| 359 |
|
|---|
| 360 | call mpi_recv (buf,len,mpi_byte
|
|---|
| 361 | $ ,jnid,mtype,nekcomm,status,ierr)
|
|---|
| 362 | call mpi_get_count (status,mpi_byte,len,ierr)
|
|---|
| 363 | c
|
|---|
| 364 | if (len.gt.lenm) then
|
|---|
| 365 | write(6,*) nid,'long message in mpi_crecv:',len,lenm
|
|---|
| 366 | call exitt
|
|---|
| 367 | endif
|
|---|
| 368 | c
|
|---|
| 369 | return
|
|---|
| 370 | end
|
|---|
| 371 | c-----------------------------------------------------------------------
|
|---|
| 372 | integer function numnodes()
|
|---|
| 373 | include 'mpif.h'
|
|---|
| 374 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 375 |
|
|---|
| 376 | call mpi_comm_size (nekcomm, numnodes , ierr)
|
|---|
| 377 |
|
|---|
| 378 | return
|
|---|
| 379 | end
|
|---|
| 380 | c-----------------------------------------------------------------------
|
|---|
| 381 | integer function mynode()
|
|---|
| 382 | include 'mpif.h'
|
|---|
| 383 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 384 | integer myid
|
|---|
| 385 |
|
|---|
| 386 | call mpi_comm_rank (nekcomm, myid, ierr)
|
|---|
| 387 | mynode = myid
|
|---|
| 388 |
|
|---|
| 389 | return
|
|---|
| 390 | end
|
|---|
| 391 | c-----------------------------------------------------------------------
|
|---|
| 392 | real*8 function dnekclock()
|
|---|
| 393 | include 'mpif.h'
|
|---|
| 394 | c
|
|---|
| 395 | dnekclock=mpi_wtime()
|
|---|
| 396 | c
|
|---|
| 397 | return
|
|---|
| 398 | end
|
|---|
| 399 | c-----------------------------------------------------------------------
|
|---|
| 400 | real*8 function dnekclock_sync()
|
|---|
| 401 | include 'mpif.h'
|
|---|
| 402 | c
|
|---|
| 403 | call nekgsync()
|
|---|
| 404 | dnekclock_sync=mpi_wtime()
|
|---|
| 405 | c
|
|---|
| 406 | return
|
|---|
| 407 | end
|
|---|
| 408 | c-----------------------------------------------------------------------
|
|---|
| 409 | subroutine lbcast(ifif)
|
|---|
| 410 | C
|
|---|
| 411 | C Broadcast logical variable to all processors.
|
|---|
| 412 | C
|
|---|
| 413 | include 'SIZE'
|
|---|
| 414 | include 'PARALLEL'
|
|---|
| 415 | include 'mpif.h'
|
|---|
| 416 |
|
|---|
| 417 | logical ifif
|
|---|
| 418 |
|
|---|
| 419 | if (np.eq.1) return
|
|---|
| 420 |
|
|---|
| 421 | item=0
|
|---|
| 422 | if (ifif) item=1
|
|---|
| 423 | call bcast(item,isize)
|
|---|
| 424 | ifif=.false.
|
|---|
| 425 | if (item.eq.1) ifif=.true.
|
|---|
| 426 |
|
|---|
| 427 | return
|
|---|
| 428 | end
|
|---|
| 429 | c-----------------------------------------------------------------------
|
|---|
| 430 | subroutine bcast(buf,len)
|
|---|
| 431 | include 'mpif.h'
|
|---|
| 432 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 433 | real*4 buf(1)
|
|---|
| 434 |
|
|---|
| 435 | call mpi_bcast (buf,len,mpi_byte,0,nekcomm,ierr)
|
|---|
| 436 |
|
|---|
| 437 | return
|
|---|
| 438 | end
|
|---|
| 439 | c-----------------------------------------------------------------------
|
|---|
| 440 | subroutine create_comm(inewcomm)
|
|---|
| 441 | include 'mpif.h'
|
|---|
| 442 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 443 |
|
|---|
| 444 | c call mpi_comm_group (mpi_comm_world,itmp,ierr)
|
|---|
| 445 | c call mpi_comm_create (mpi_comm_world,itmp,icomm,ierr)
|
|---|
| 446 | c call mpi_group_free (itmp,ierr)
|
|---|
| 447 |
|
|---|
| 448 | call mpi_comm_dup(nekcomm,inewcomm,ierr)
|
|---|
| 449 |
|
|---|
| 450 | return
|
|---|
| 451 | end
|
|---|
| 452 | c-----------------------------------------------------------------------
|
|---|
| 453 | function isend(msgtag,x,len,jnid,jpid)
|
|---|
| 454 | c
|
|---|
| 455 | c Note: len in bytes
|
|---|
| 456 | c
|
|---|
| 457 | integer x(1)
|
|---|
| 458 | C
|
|---|
| 459 | include 'mpif.h'
|
|---|
| 460 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 461 | C
|
|---|
| 462 | call mpi_isend (x,len,mpi_byte,jnid,msgtag
|
|---|
| 463 | $ ,nekcomm,imsg,ierr)
|
|---|
| 464 | isend = imsg
|
|---|
| 465 | c write(6,*) nid,' isend:',imsg,msgtag,len,jnid,(x(k),k=1,len/4)
|
|---|
| 466 | c
|
|---|
| 467 | return
|
|---|
| 468 | end
|
|---|
| 469 | c-----------------------------------------------------------------------
|
|---|
| 470 | function irecv(msgtag,x,len)
|
|---|
| 471 | c
|
|---|
| 472 | c Note: len in bytes
|
|---|
| 473 | c
|
|---|
| 474 | integer x(1)
|
|---|
| 475 | C
|
|---|
| 476 | include 'mpif.h'
|
|---|
| 477 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 478 | C
|
|---|
| 479 | call mpi_irecv (x,len,mpi_byte,mpi_any_source,msgtag
|
|---|
| 480 | $ ,nekcomm,imsg,ierr)
|
|---|
| 481 | irecv = imsg
|
|---|
| 482 | c write(6,*) nid,' irecv:',imsg,msgtag,len
|
|---|
| 483 | c
|
|---|
| 484 | c
|
|---|
| 485 | return
|
|---|
| 486 | end
|
|---|
| 487 | c-----------------------------------------------------------------------
|
|---|
| 488 | subroutine msgwait(imsg)
|
|---|
| 489 | c
|
|---|
| 490 | include 'mpif.h'
|
|---|
| 491 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 492 | integer status(mpi_status_size)
|
|---|
| 493 | c
|
|---|
| 494 | c write(6,*) nid,' msgwait:',imsg
|
|---|
| 495 | c
|
|---|
| 496 | call mpi_wait (imsg,status,ierr)
|
|---|
| 497 | c
|
|---|
| 498 | return
|
|---|
| 499 | end
|
|---|
| 500 | c-----------------------------------------------------------------------
|
|---|
| 501 | subroutine nekgsync()
|
|---|
| 502 |
|
|---|
| 503 | include 'mpif.h'
|
|---|
| 504 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 505 |
|
|---|
| 506 | call mpi_barrier(nekcomm,ierr)
|
|---|
| 507 |
|
|---|
| 508 | return
|
|---|
| 509 | end
|
|---|
| 510 | c-----------------------------------------------------------------------
|
|---|
| 511 | subroutine exittr(stringi,rdata,idata)
|
|---|
| 512 | character*1 stringi(132)
|
|---|
| 513 | character*1 stringo(132)
|
|---|
| 514 | character*25 s25
|
|---|
| 515 | include 'SIZE'
|
|---|
| 516 | include 'TOTAL'
|
|---|
| 517 | include 'CTIMER'
|
|---|
| 518 |
|
|---|
| 519 | call blank(stringo,132)
|
|---|
| 520 | call chcopy(stringo,stringi,132)
|
|---|
| 521 | len = indx1(stringo,'$',1)
|
|---|
| 522 | write(s25,25) rdata,idata
|
|---|
| 523 | 25 format(1x,1p1e14.6,i10)
|
|---|
| 524 | call chcopy(stringo(len),s25,25)
|
|---|
| 525 |
|
|---|
| 526 | if (nid.eq.0) write(6,1) (stringo(k),k=1,len+24)
|
|---|
| 527 | 1 format('EXIT: ',132a1)
|
|---|
| 528 |
|
|---|
| 529 | call exitt
|
|---|
| 530 |
|
|---|
| 531 | return
|
|---|
| 532 | end
|
|---|
| 533 | c-----------------------------------------------------------------------
|
|---|
| 534 | subroutine exitti(stringi,idata)
|
|---|
| 535 | character*1 stringi(132)
|
|---|
| 536 | character*1 stringo(132)
|
|---|
| 537 | character*11 s11
|
|---|
| 538 | include 'SIZE'
|
|---|
| 539 | include 'TOTAL'
|
|---|
| 540 | include 'CTIMER'
|
|---|
| 541 |
|
|---|
| 542 | call blank(stringo,132)
|
|---|
| 543 | call chcopy(stringo,stringi,132)
|
|---|
| 544 | len = indx1(stringo,'$',1)
|
|---|
| 545 | write(s11,11) idata
|
|---|
| 546 | 11 format(1x,i10)
|
|---|
| 547 | call chcopy(stringo(len),s11,11)
|
|---|
| 548 |
|
|---|
| 549 | if (nid.eq.0) write(6,1) (stringo(k),k=1,len+10)
|
|---|
| 550 | 1 format('EXIT: ',132a1)
|
|---|
| 551 |
|
|---|
| 552 | call exitt
|
|---|
| 553 |
|
|---|
| 554 | return
|
|---|
| 555 | end
|
|---|
| 556 | c-----------------------------------------------------------------------
|
|---|
| 557 | subroutine err_chk(ierr,string)
|
|---|
| 558 | character*1 string(132)
|
|---|
| 559 | character*1 ostring(132)
|
|---|
| 560 | character*10 s10
|
|---|
| 561 | include 'SIZE'
|
|---|
| 562 | c include 'TOTAL'
|
|---|
| 563 | c include 'CTIMER'
|
|---|
| 564 |
|
|---|
| 565 | ierr = iglsum(ierr,1)
|
|---|
| 566 | if(ierr.eq.0) return
|
|---|
| 567 |
|
|---|
| 568 | len = indx1(string,'$',1)
|
|---|
| 569 | call blank(ostring,132)
|
|---|
| 570 | write(s10,11) ierr
|
|---|
| 571 | 11 format(1x,' ierr=',i3)
|
|---|
| 572 |
|
|---|
| 573 | call chcopy(ostring,string,len-1)
|
|---|
| 574 | call chcopy(ostring(len),s10,10)
|
|---|
| 575 |
|
|---|
| 576 | if (nid.eq.0) write(6,1) (ostring(k),k=1,len+10)
|
|---|
| 577 | 1 format('ERROR: ',132a1)
|
|---|
| 578 |
|
|---|
| 579 | call exitt
|
|---|
| 580 |
|
|---|
| 581 | return
|
|---|
| 582 | end
|
|---|
| 583 | c
|
|---|
| 584 | c-----------------------------------------------------------------------
|
|---|
| 585 | subroutine exitt0
|
|---|
| 586 |
|
|---|
| 587 | include 'SIZE'
|
|---|
| 588 | include 'TOTAL'
|
|---|
| 589 |
|
|---|
| 590 | if (nid.eq.0) then
|
|---|
| 591 | write(6,*) ' '
|
|---|
| 592 | write(6,'(A)') 'run successful: dying ...'
|
|---|
| 593 | write(6,*) ' '
|
|---|
| 594 | endif
|
|---|
| 595 |
|
|---|
| 596 | c if (nid.eq.0) call close_files
|
|---|
| 597 | call print_runtime_info
|
|---|
| 598 | call nek_die(0)
|
|---|
| 599 |
|
|---|
| 600 | return
|
|---|
| 601 | end
|
|---|
| 602 | c-----------------------------------------------------------------------
|
|---|
| 603 | subroutine exitt
|
|---|
| 604 |
|
|---|
| 605 | include 'SIZE'
|
|---|
| 606 | include 'TOTAL'
|
|---|
| 607 |
|
|---|
| 608 | if (nid.eq.0) then
|
|---|
| 609 | write(6,*) ' '
|
|---|
| 610 | write(6,'(A)') 'an error occured: dying ...'
|
|---|
| 611 | write(6,*) ' '
|
|---|
| 612 | endif
|
|---|
| 613 |
|
|---|
| 614 | c call print_stack()
|
|---|
| 615 | c if (nid.eq.0) call close_files
|
|---|
| 616 | c call print_runtime_info
|
|---|
| 617 | call nek_die(1)
|
|---|
| 618 |
|
|---|
| 619 | return
|
|---|
| 620 | end
|
|---|
| 621 | c-----------------------------------------------------------------------
|
|---|
| 622 | subroutine print_runtime_info
|
|---|
| 623 | include 'SIZE'
|
|---|
| 624 | include 'TOTAL'
|
|---|
| 625 | include 'CTIMER'
|
|---|
| 626 | include 'mpif.h'
|
|---|
| 627 |
|
|---|
| 628 | #ifdef PAPI
|
|---|
| 629 | gflops = glsum(dnekgflops(),1)
|
|---|
| 630 | #endif
|
|---|
| 631 |
|
|---|
| 632 | tstop = dnekclock_sync()
|
|---|
| 633 | ttotal = tstop-etimes
|
|---|
| 634 | tsol = max(ttime - tprep,0.0)
|
|---|
| 635 | nxyz = lx1*ly1*lz1
|
|---|
| 636 |
|
|---|
| 637 | dtmp4 = glsum(getmaxrss(),1)/1e9
|
|---|
| 638 |
|
|---|
| 639 | if (nid.eq.0) then
|
|---|
| 640 | dtmp1 = 0
|
|---|
| 641 | dtmp2 = 0
|
|---|
| 642 | if(istep.gt.0) then
|
|---|
| 643 | dgp = nvtot
|
|---|
| 644 | dgp = max(dgp,1.)*max(istep,1)
|
|---|
| 645 | dtmp0 = np*(ttime-tprep)
|
|---|
| 646 | dtmp1 = 0
|
|---|
| 647 | if (dtmp0.gt.0) dtmp1 = dgp/dtmp0
|
|---|
| 648 | dtmp2 = (ttime-tprep)/max(istep,1)
|
|---|
| 649 | endif
|
|---|
| 650 | write(6,*) ' '
|
|---|
| 651 | write(6,'(5(A,1p1e13.5,A,/))')
|
|---|
| 652 | & 'total elapsed time : ',ttotal, ' sec'
|
|---|
| 653 | & ,'total solver time w/o IO : ',tsol, ' sec'
|
|---|
| 654 | & ,'time/timestep : ',dtmp2 , ' sec'
|
|---|
| 655 | & ,'avg throughput per timestep : ',dtmp1 , ' gridpts/CPUs'
|
|---|
| 656 | & ,'total max memory usage : ',dtmp4 , ' GB'
|
|---|
| 657 | #ifdef PAPI
|
|---|
| 658 | write(6,'(1(A,1p1e13.5,/))')
|
|---|
| 659 | & ,'total Gflops/s : ',gflops
|
|---|
| 660 | #endif
|
|---|
| 661 | endif
|
|---|
| 662 | call flush_io
|
|---|
| 663 |
|
|---|
| 664 | return
|
|---|
| 665 | end
|
|---|
| 666 | c-----------------------------------------------------------------------
|
|---|
| 667 | subroutine nek_die(ierr)
|
|---|
| 668 | include 'SIZE'
|
|---|
| 669 | include 'mpif.h'
|
|---|
| 670 |
|
|---|
| 671 | call mpi_finalize (ierr_)
|
|---|
| 672 | call cexit(ierr)
|
|---|
| 673 |
|
|---|
| 674 | return
|
|---|
| 675 | end
|
|---|
| 676 | c-----------------------------------------------------------------------
|
|---|
| 677 | subroutine fgslib_userExitHandler(istatus)
|
|---|
| 678 |
|
|---|
| 679 | call exitt
|
|---|
| 680 |
|
|---|
| 681 | return
|
|---|
| 682 | end
|
|---|
| 683 | c-----------------------------------------------------------------------
|
|---|
| 684 | subroutine printHeader
|
|---|
| 685 |
|
|---|
| 686 | include 'SIZE'
|
|---|
| 687 | include 'PARALLEL'
|
|---|
| 688 |
|
|---|
| 689 | include 'HEADER'
|
|---|
| 690 | write(6,*) 'Number of MPI ranks :', np
|
|---|
| 691 | c WRITE(6,*) 'REAL wdsize :',WDSIZE
|
|---|
| 692 | c WRITE(6,*) 'INTEGER wdsize :',ISIZE
|
|---|
| 693 | c WRITE(6,*) 'INTEGER8 wdsize :',ISIZE8
|
|---|
| 694 | WRITE(6,*) ' '
|
|---|
| 695 |
|
|---|
| 696 | return
|
|---|
| 697 | end
|
|---|
| 698 | c-----------------------------------------------------------------------
|
|---|
| 699 | function igl_running_sum(in)
|
|---|
| 700 | c
|
|---|
| 701 | include 'mpif.h'
|
|---|
| 702 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 703 | integer status(mpi_status_size)
|
|---|
| 704 | integer x,w,r
|
|---|
| 705 |
|
|---|
| 706 | x = in ! running sum
|
|---|
| 707 | w = in ! working buff
|
|---|
| 708 | r = 0 ! recv buff
|
|---|
| 709 |
|
|---|
| 710 | call mpi_scan(x,r,1,mpi_integer,mpi_sum,nekcomm,ierr)
|
|---|
| 711 | igl_running_sum = r
|
|---|
| 712 |
|
|---|
| 713 | return
|
|---|
| 714 | end
|
|---|
| 715 | c-----------------------------------------------------------------------
|
|---|
| 716 | subroutine platform_timer(ivb) ! mxm, ping-pong, and all_reduce timer
|
|---|
| 717 |
|
|---|
| 718 | include 'SIZE'
|
|---|
| 719 | include 'TOTAL'
|
|---|
| 720 |
|
|---|
| 721 |
|
|---|
| 722 | call mxm_test_all(nid,ivb) ! measure mxm times
|
|---|
| 723 | c call exitti('done mxm_test_all$',ivb)
|
|---|
| 724 |
|
|---|
| 725 | call comm_test(ivb) ! measure message-passing and all-reduce times
|
|---|
| 726 |
|
|---|
| 727 | return
|
|---|
| 728 | end
|
|---|
| 729 | c-----------------------------------------------------------------------
|
|---|
| 730 | subroutine comm_test(ivb) ! measure message-passing and all-reduce times
|
|---|
| 731 | ! ivb = 0 --> minimal verbosity
|
|---|
| 732 | ! ivb = 1 --> fully verbose
|
|---|
| 733 | ! ivb = 2 --> smaller sample set(shorter)
|
|---|
| 734 |
|
|---|
| 735 | include 'SIZE'
|
|---|
| 736 | include 'PARALLEL'
|
|---|
| 737 |
|
|---|
| 738 | call gop_test(ivb) ! added, Jan. 8, 2008
|
|---|
| 739 |
|
|---|
| 740 | log_np=log2(np)
|
|---|
| 741 | np2 = 2**log_np
|
|---|
| 742 | if (np2.eq.np) call gp2_test(ivb) ! added, Jan. 8, 2008
|
|---|
| 743 |
|
|---|
| 744 | io = 6
|
|---|
| 745 | n512 = min(512,np-1)
|
|---|
| 746 |
|
|---|
| 747 | do nodeb=1,n512
|
|---|
| 748 | call pingpongo(alphas,betas,0,nodeb,.0005,io,ivb)
|
|---|
| 749 | if (nid.eq.0) write(6,2) nodeb,np,alphas,betas
|
|---|
| 750 | 2 format(2i10,1p2e15.7,' alpha betao')
|
|---|
| 751 | enddo
|
|---|
| 752 |
|
|---|
| 753 | do kk=0,2
|
|---|
| 754 | do nodeb=1,n512
|
|---|
| 755 | call pingpong (alphas,betas,0,nodeb,.0005,io,ivb,kk)
|
|---|
| 756 | if (nid.eq.0) write(6,1) nodeb,np,alphas,betas,kk
|
|---|
| 757 | 1 format(2i10,1p2e15.7,' alpha beta',i1)
|
|---|
| 758 | enddo
|
|---|
| 759 | enddo
|
|---|
| 760 |
|
|---|
| 761 | return
|
|---|
| 762 | end
|
|---|
| 763 | c-----------------------------------------------------------------------
|
|---|
| 764 | subroutine pingpong(alphas,betas,nodea,nodeb,dt,io,ivb,kk)
|
|---|
| 765 |
|
|---|
| 766 | include 'SIZE'
|
|---|
| 767 | common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
|
|---|
| 768 |
|
|---|
| 769 | parameter (lt=lx1*ly1*lz1*lelt)
|
|---|
| 770 | parameter (mwd = 3*lt/2)
|
|---|
| 771 | common /scrns/ x(mwd),y(mwd),x1(mwd),y1(mwd)
|
|---|
| 772 |
|
|---|
| 773 | include 'mpif.h'
|
|---|
| 774 | integer status(mpi_status_size)
|
|---|
| 775 |
|
|---|
| 776 | character*10 fname
|
|---|
| 777 |
|
|---|
| 778 | if (nid.eq.nodea) then
|
|---|
| 779 | write(fname,3) np,nodeb
|
|---|
| 780 | 3 format('t',i4.4,'.',i4.4)
|
|---|
| 781 | if (io.ne.6) open (unit=io,file=fname)
|
|---|
| 782 | endif
|
|---|
| 783 |
|
|---|
| 784 | call nekgsync
|
|---|
| 785 | call get_msg_vol(msg_vol,dt,nodea,nodeb) ! Est. msg vol for dt s
|
|---|
| 786 |
|
|---|
| 787 | nwds = 0
|
|---|
| 788 | if (nid.eq.nodea.and.ivb.gt.0) write(io,*)
|
|---|
| 789 |
|
|---|
| 790 | betas = 0 ! Reported inverse bandwidth
|
|---|
| 791 | count = 0
|
|---|
| 792 |
|
|---|
| 793 | do itest = 1,500
|
|---|
| 794 |
|
|---|
| 795 | nloop = msg_vol/(nwds+2)
|
|---|
| 796 | nloop = min(nloop,1000)
|
|---|
| 797 | nloop = max(nloop,1)
|
|---|
| 798 |
|
|---|
| 799 | len = 8*nwds
|
|---|
| 800 |
|
|---|
| 801 | if (kk.eq.0)
|
|---|
| 802 | $ call ping_loop (t1,t0,len,nloop,nodea,nodeb,nid,x,y,x1,y1)
|
|---|
| 803 | if (kk.eq.1)
|
|---|
| 804 | $ call ping_loop1(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
|
|---|
| 805 | if (kk.eq.2)
|
|---|
| 806 | $ call ping_loop2(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
|
|---|
| 807 |
|
|---|
| 808 | if (nid.eq.nodea) then
|
|---|
| 809 | tmsg = (t1-t0)/(2*nloop) ! 2*nloop--> Double Buffer
|
|---|
| 810 | tmsg = tmsg / 2. ! one-way cost = 1/2 round-trip
|
|---|
| 811 | tpwd = tmsg ! time-per-word
|
|---|
| 812 | if (nwds.gt.0) tpwd = tmsg/nwds
|
|---|
| 813 | if (ivb.gt.0) write(io,1) nodeb,np,nloop,nwds,tmsg,tpwd,kk
|
|---|
| 814 | 1 format(3i6,i12,1p2e16.8,' pgn',i1)
|
|---|
| 815 |
|
|---|
| 816 | if (nwds.eq.1) then
|
|---|
| 817 | alphas = tmsg
|
|---|
| 818 | elseif (nwds.gt.10000) then ! "average" beta
|
|---|
| 819 | betas = (betas*count + tpwd)/(count+1)
|
|---|
| 820 | count = count + 1
|
|---|
| 821 | endif
|
|---|
| 822 | endif
|
|---|
| 823 |
|
|---|
| 824 | if (ivb.eq.2) then
|
|---|
| 825 | nwds = (nwds+1)*1.25
|
|---|
| 826 | else
|
|---|
| 827 | nwds = (nwds+1)*1.016
|
|---|
| 828 | endif
|
|---|
| 829 | if (nwds.gt.mwd) then
|
|---|
| 830 | c if (nwds.gt.1024) then
|
|---|
| 831 | if (nid.eq.nodea.and.io.ne.6) close(unit=io)
|
|---|
| 832 | call nekgsync
|
|---|
| 833 | return
|
|---|
| 834 | endif
|
|---|
| 835 |
|
|---|
| 836 | enddo
|
|---|
| 837 |
|
|---|
| 838 | if (nid.eq.nodea.and.io.ne.6) close(unit=io)
|
|---|
| 839 | call nekgsync
|
|---|
| 840 |
|
|---|
| 841 | return
|
|---|
| 842 | end
|
|---|
| 843 | c-----------------------------------------------------------------------
|
|---|
| 844 | subroutine pingpongo(alphas,betas,nodea,nodeb,dt,io,ivb)
|
|---|
| 845 |
|
|---|
| 846 | include 'SIZE'
|
|---|
| 847 | common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
|
|---|
| 848 |
|
|---|
| 849 | parameter (lt=lx1*ly1*lz1*lelt)
|
|---|
| 850 | parameter (mwd = 3*lt)
|
|---|
| 851 | common /scrns/ x(mwd),y(mwd)
|
|---|
| 852 |
|
|---|
| 853 | include 'mpif.h'
|
|---|
| 854 | integer status(mpi_status_size)
|
|---|
| 855 |
|
|---|
| 856 | character*10 fname
|
|---|
| 857 |
|
|---|
| 858 | if (nid.eq.nodea) then
|
|---|
| 859 | write(fname,3) np,nodeb
|
|---|
| 860 | 3 format('t',i4.4,'.',i4.4)
|
|---|
| 861 | if (io.ne.6) open (unit=io,file=fname)
|
|---|
| 862 | endif
|
|---|
| 863 |
|
|---|
| 864 | call nekgsync
|
|---|
| 865 | call get_msg_vol(msg_vol,dt,nodea,nodeb) ! Est. msg vol for dt s
|
|---|
| 866 |
|
|---|
| 867 | nwds = 0
|
|---|
| 868 | if (nid.eq.nodea.and.ivb.gt.0) write(io,*)
|
|---|
| 869 |
|
|---|
| 870 | betas = 0 ! Reported inverse bandwidth
|
|---|
| 871 | count = 0
|
|---|
| 872 |
|
|---|
| 873 | do itest = 1,500
|
|---|
| 874 | call nekgsync
|
|---|
| 875 | nloop = msg_vol/(nwds+2)
|
|---|
| 876 | nloop = min(nloop,1000)
|
|---|
| 877 | nloop = max(nloop,1)
|
|---|
| 878 |
|
|---|
| 879 | len = 8*nwds
|
|---|
| 880 | jnid = mpi_any_source
|
|---|
| 881 |
|
|---|
| 882 | if (nid.eq.nodea) then
|
|---|
| 883 |
|
|---|
| 884 | msg = irecv(itest,y,1)
|
|---|
| 885 | call csend(itest,x,1,nodeb,0) ! Initiate send, to synch.
|
|---|
| 886 | call msgwait(msg)
|
|---|
| 887 |
|
|---|
| 888 | t0 = mpi_wtime ()
|
|---|
| 889 | do i=1,nloop
|
|---|
| 890 | call mpi_irecv(y,len,mpi_byte,mpi_any_source,i
|
|---|
| 891 | $ ,nekcomm,msg,ierr)
|
|---|
| 892 | call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr)
|
|---|
| 893 | call mpi_wait (msg,status,ierr)
|
|---|
| 894 | enddo
|
|---|
| 895 | t1 = mpi_wtime ()
|
|---|
| 896 | tmsg = (t1-t0)/nloop
|
|---|
| 897 | tmsg = tmsg / 2. ! Round-trip message time = twice one-way
|
|---|
| 898 | tpwd = tmsg
|
|---|
| 899 | if (nwds.gt.0) tpwd = tmsg/nwds
|
|---|
| 900 | if (ivb.gt.0) write(io,1) nodeb,np,nloop,nwds,tmsg,tpwd
|
|---|
| 901 | 1 format(3i6,i12,1p2e16.8,' pgo')
|
|---|
| 902 |
|
|---|
| 903 | if (nwds.eq.1) then
|
|---|
| 904 | alphas = tmsg
|
|---|
| 905 | elseif (nwds.gt.10000) then
|
|---|
| 906 | betas = (betas*count + tpwd)/(count+1)
|
|---|
| 907 | count = count + 1
|
|---|
| 908 | endif
|
|---|
| 909 |
|
|---|
| 910 | elseif (nid.eq.nodeb) then
|
|---|
| 911 |
|
|---|
| 912 | call crecv(itest,y,1) ! Initiate send, to synch.
|
|---|
| 913 | call csend(itest,x,1,nodea,0)
|
|---|
| 914 |
|
|---|
| 915 | t0 = dnekclock()
|
|---|
| 916 | do i=1,nloop
|
|---|
| 917 | call mpi_recv (y,len,mpi_byte
|
|---|
| 918 | $ ,jnid,i,nekcomm,status,ierr)
|
|---|
| 919 | call mpi_send (x,len,mpi_byte,nodea,i,nekcomm,ierr)
|
|---|
| 920 | enddo
|
|---|
| 921 | t1 = dnekclock()
|
|---|
| 922 | tmsg = (t1-t0)/nloop
|
|---|
| 923 |
|
|---|
| 924 | endif
|
|---|
| 925 |
|
|---|
| 926 | nwds = (nwds+1)*1.016
|
|---|
| 927 | if (nwds.gt.mwd) then
|
|---|
| 928 | if (nid.eq.nodea.and.io.ne.6) close(unit=io)
|
|---|
| 929 | call nekgsync
|
|---|
| 930 | return
|
|---|
| 931 | endif
|
|---|
| 932 |
|
|---|
| 933 | enddo
|
|---|
| 934 |
|
|---|
| 935 | if (nid.eq.nodea.and.io.ne.6) close(unit=io)
|
|---|
| 936 | call nekgsync
|
|---|
| 937 |
|
|---|
| 938 | return
|
|---|
| 939 | end
|
|---|
| 940 | c-----------------------------------------------------------------------
|
|---|
| 941 | subroutine get_msg_vol(msg_vol,dt,nodea,nodeb)
|
|---|
| 942 | include 'SIZE'
|
|---|
| 943 | common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
|
|---|
| 944 | parameter (lt=lx1*ly1*lz1*lelt)
|
|---|
| 945 | common /scrns/ x(3*lt),y(3*lt)
|
|---|
| 946 | !
|
|---|
| 947 | ! Est. msg vol for dt s
|
|---|
| 948 | !
|
|---|
| 949 | msg_vol = 1000
|
|---|
| 950 |
|
|---|
| 951 | nwds = min(1000,lt)
|
|---|
| 952 | nloop = 50
|
|---|
| 953 |
|
|---|
| 954 | tmsg = 0.
|
|---|
| 955 | call gop(tmsg,t1,'+ ',1)
|
|---|
| 956 |
|
|---|
| 957 | len = 8*nwds
|
|---|
| 958 | if (nid.eq.nodea) then
|
|---|
| 959 |
|
|---|
| 960 | msg = irecv(1,y,1)
|
|---|
| 961 | call csend(1,x,1,nodeb,0) ! Initiate send, to synch.
|
|---|
| 962 | call msgwait(msg)
|
|---|
| 963 |
|
|---|
| 964 | t0 = dnekclock()
|
|---|
| 965 | do i=1,nloop
|
|---|
| 966 | msg = irecv(i,y,len)
|
|---|
| 967 | call csend(i,x,len,nodeb,0)
|
|---|
| 968 | call msgwait(msg)
|
|---|
| 969 | enddo
|
|---|
| 970 | t1 = dnekclock()
|
|---|
| 971 | tmsg = (t1-t0)/nloop
|
|---|
| 972 | tpwd = tmsg/nwds
|
|---|
| 973 |
|
|---|
| 974 | elseif (nid.eq.nodeb) then
|
|---|
| 975 |
|
|---|
| 976 | call crecv(1,y,1) ! Initiate send, to synch.
|
|---|
| 977 | call csend(1,x,1,nodea,0)
|
|---|
| 978 |
|
|---|
| 979 | t0 = dnekclock()
|
|---|
| 980 | do i=1,nloop
|
|---|
| 981 | call crecv(i,y,len)
|
|---|
| 982 | call csend(i,x,len,nodea,0)
|
|---|
| 983 | enddo
|
|---|
| 984 | t1 = dnekclock()
|
|---|
| 985 | tmsg = (t1-t0)/nloop
|
|---|
| 986 | tmsg = 0.
|
|---|
| 987 |
|
|---|
| 988 | endif
|
|---|
| 989 |
|
|---|
| 990 | call gop(tmsg,t1,'+ ',1)
|
|---|
| 991 | msg_vol = nwds*(dt/tmsg)
|
|---|
| 992 | c if (nid.eq.nodea) write(6,*) nid,msg_vol,nwds,dt,tmsg,' msgvol'
|
|---|
| 993 |
|
|---|
| 994 | return
|
|---|
| 995 | end
|
|---|
| 996 | c-----------------------------------------------------------------------
|
|---|
| 997 | subroutine gop_test(ivb)
|
|---|
| 998 | include 'SIZE'
|
|---|
| 999 | common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
|
|---|
| 1000 | include 'mpif.h'
|
|---|
| 1001 | integer status(mpi_status_size)
|
|---|
| 1002 |
|
|---|
| 1003 | parameter (lt=lx1*ly1*lz1*lelt)
|
|---|
| 1004 | parameter (mwd = 3*lt)
|
|---|
| 1005 | common /scrns/ x(mwd),y(mwd)
|
|---|
| 1006 | common /scruz/ times(2,500)
|
|---|
| 1007 | common /scrcg/ nwd(500)
|
|---|
| 1008 |
|
|---|
| 1009 | nwds = 1
|
|---|
| 1010 | mtest = 0
|
|---|
| 1011 | do itest = 1,500
|
|---|
| 1012 | nwds = (nwds+1)*1.016
|
|---|
| 1013 | if (nwds.gt.mwd) goto 100
|
|---|
| 1014 | mtest = mtest+1
|
|---|
| 1015 | nwd(mtest) = nwds
|
|---|
| 1016 | enddo
|
|---|
| 1017 | 100 continue
|
|---|
| 1018 |
|
|---|
| 1019 | nwds = 1
|
|---|
| 1020 | do itest = mtest,1,-1
|
|---|
| 1021 |
|
|---|
| 1022 | tiny = 1.e-27
|
|---|
| 1023 | call cfill(x,tiny,mwd)
|
|---|
| 1024 | nwds = nwd(itest)
|
|---|
| 1025 | call nekgsync
|
|---|
| 1026 |
|
|---|
| 1027 | t0 = mpi_wtime ()
|
|---|
| 1028 | call gop(x,y,'+ ',nwds)
|
|---|
| 1029 | call gop(x,y,'+ ',nwds)
|
|---|
| 1030 | call gop(x,y,'+ ',nwds)
|
|---|
| 1031 | call gop(x,y,'+ ',nwds)
|
|---|
| 1032 | call gop(x,y,'+ ',nwds)
|
|---|
| 1033 | call gop(x,y,'+ ',nwds)
|
|---|
| 1034 | t1 = mpi_wtime ()
|
|---|
| 1035 |
|
|---|
| 1036 | tmsg = (t1-t0)/6 ! six calls
|
|---|
| 1037 | tpwd = tmsg
|
|---|
| 1038 | if (nwds.gt.0) tpwd = tmsg/nwds
|
|---|
| 1039 | times(1,itest) = tmsg
|
|---|
| 1040 | times(2,itest) = tpwd
|
|---|
| 1041 |
|
|---|
| 1042 | enddo
|
|---|
| 1043 | 101 continue
|
|---|
| 1044 |
|
|---|
| 1045 |
|
|---|
| 1046 | if (nid.eq.0) then
|
|---|
| 1047 | nwds = 1
|
|---|
| 1048 | do itest=1,500
|
|---|
| 1049 | if (ivb.gt.0.or.itest.eq.1)
|
|---|
| 1050 | $ write(6,1) np,nwds,(times(k,itest),k=1,2)
|
|---|
| 1051 | 1 format(i12,i12,1p2e16.8,' gop')
|
|---|
| 1052 | nwds = (nwds+1)*1.016
|
|---|
| 1053 | if (nwds.gt.mwd) goto 102
|
|---|
| 1054 | enddo
|
|---|
| 1055 | 102 continue
|
|---|
| 1056 | endif
|
|---|
| 1057 |
|
|---|
| 1058 | return
|
|---|
| 1059 | end
|
|---|
| 1060 | c-----------------------------------------------------------------------
|
|---|
| 1061 | subroutine gp2_test(ivb)
|
|---|
| 1062 |
|
|---|
| 1063 | include 'SIZE'
|
|---|
| 1064 | include 'mpif.h'
|
|---|
| 1065 |
|
|---|
| 1066 | common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
|
|---|
| 1067 | integer status(mpi_status_size)
|
|---|
| 1068 |
|
|---|
| 1069 | parameter (lt=lx1*ly1*lz1*lelt)
|
|---|
| 1070 | parameter (mwd = 3*lt)
|
|---|
| 1071 | common /scrns/ x(mwd),y(mwd)
|
|---|
| 1072 | common /scruz/ times(2,500)
|
|---|
| 1073 |
|
|---|
| 1074 | call rzero(x,mwd)
|
|---|
| 1075 |
|
|---|
| 1076 | nwds = 1
|
|---|
| 1077 | do itest = 1,500
|
|---|
| 1078 | call gp2(x,y,'+ ',1,nid,np)
|
|---|
| 1079 |
|
|---|
| 1080 | t0 = mpi_wtime ()
|
|---|
| 1081 | call gp2(x,y,'+ ',nwds,nid,np)
|
|---|
| 1082 | call gp2(x,y,'+ ',nwds,nid,np)
|
|---|
| 1083 | call gp2(x,y,'+ ',nwds,nid,np)
|
|---|
| 1084 | call gp2(x,y,'+ ',nwds,nid,np)
|
|---|
| 1085 | t1 = mpi_wtime ()
|
|---|
| 1086 |
|
|---|
| 1087 | tmsg = (t1-t0)/4 ! four calls
|
|---|
| 1088 | tpwd = tmsg
|
|---|
| 1089 | if (nwds.gt.0) tpwd = tmsg/nwds
|
|---|
| 1090 | times(1,itest) = tmsg
|
|---|
| 1091 | times(2,itest) = tpwd
|
|---|
| 1092 |
|
|---|
| 1093 | nwds = (nwds+1)*1.016
|
|---|
| 1094 | if (nwds.gt.mwd) goto 101
|
|---|
| 1095 | enddo
|
|---|
| 1096 | 101 continue
|
|---|
| 1097 |
|
|---|
| 1098 |
|
|---|
| 1099 | if (nid.eq.0) then
|
|---|
| 1100 | nwds = 1
|
|---|
| 1101 | do itest=1,500
|
|---|
| 1102 | if (ivb.gt.0.or.itest.eq.1)
|
|---|
| 1103 | $ write(6,1) np,nwds,(times(k,itest),k=1,2)
|
|---|
| 1104 | 1 format(i12,i12,1p2e16.8,' gp2')
|
|---|
| 1105 | nwds = (nwds+1)*1.016
|
|---|
| 1106 | if (nwds.gt.mwd) goto 102
|
|---|
| 1107 | enddo
|
|---|
| 1108 | 102 continue
|
|---|
| 1109 | endif
|
|---|
| 1110 |
|
|---|
| 1111 | return
|
|---|
| 1112 | end
|
|---|
| 1113 | c-----------------------------------------------------------------------
|
|---|
| 1114 | integer function xor(m,n)
|
|---|
| 1115 | c
|
|---|
| 1116 | c If NOT running on a parallel processor, it is sufficient to
|
|---|
| 1117 | c have this routine return a value of XOR=1.
|
|---|
| 1118 | c
|
|---|
| 1119 | c Pick one of the following:
|
|---|
| 1120 | c
|
|---|
| 1121 | c UNIX 4.2, f77:
|
|---|
| 1122 | XOR = OR(M,N)-AND(M,N)
|
|---|
| 1123 | c
|
|---|
| 1124 | c Intel FTN286:
|
|---|
| 1125 | c XOR = M.NEQV.N
|
|---|
| 1126 | c
|
|---|
| 1127 | c Ryan-McFarland Fortran
|
|---|
| 1128 | C XOR = IEOR(M,N)
|
|---|
| 1129 | c
|
|---|
| 1130 | c XOR = 0
|
|---|
| 1131 | c IF(M.EQ.1 .OR. N.EQ.1) XOR=1
|
|---|
| 1132 | c IF(M.EQ.0 .AND. N.EQ.0) XOR=0
|
|---|
| 1133 | c IF(M.EQ.1 .AND. N.EQ.1) XOR=0
|
|---|
| 1134 | c IF(M.GT.1 .OR.N.GT.1 .OR.M.LT.0.OR.N.LT.0) THEN
|
|---|
| 1135 | c PRINT*,'ERROR IN XOR'
|
|---|
| 1136 | c STOP
|
|---|
| 1137 | c ENDIF
|
|---|
| 1138 | C
|
|---|
| 1139 | return
|
|---|
| 1140 | end
|
|---|
| 1141 | c-----------------------------------------------------------------------
|
|---|
| 1142 | subroutine gp2( x, w, op, n, nid, np)
|
|---|
| 1143 | c
|
|---|
| 1144 | c Global vector commutative operation using spanning tree.
|
|---|
| 1145 | c
|
|---|
| 1146 | c Std. fan-in/fan-out
|
|---|
| 1147 |
|
|---|
| 1148 | real x(n), w(n)
|
|---|
| 1149 | character*3 op
|
|---|
| 1150 |
|
|---|
| 1151 | integer bit, bytes, cnt, diff, spsize, i,
|
|---|
| 1152 | * parent, troot, xor, root, lnp, log2
|
|---|
| 1153 | logical ifgot
|
|---|
| 1154 |
|
|---|
| 1155 | integer type
|
|---|
| 1156 | save type
|
|---|
| 1157 | data type /998/
|
|---|
| 1158 |
|
|---|
| 1159 | type = type+100
|
|---|
| 1160 | if (type.gt.9992) type=type-998
|
|---|
| 1161 | typer = type-1
|
|---|
| 1162 | bytes = 8*n
|
|---|
| 1163 |
|
|---|
| 1164 | root = 0
|
|---|
| 1165 | troot = max0((nid/np)*np, root)
|
|---|
| 1166 | diff = xor(nid,troot)
|
|---|
| 1167 | nullpid = 0
|
|---|
| 1168 |
|
|---|
| 1169 | c Accumulate contributions from children, if any
|
|---|
| 1170 | level2=1
|
|---|
| 1171 | 5 continue
|
|---|
| 1172 | level=level2
|
|---|
| 1173 | level2=level+level
|
|---|
| 1174 | if (mod(nid,level2).ne.0) goto 20
|
|---|
| 1175 | call crecv(type,w,bytes)
|
|---|
| 1176 | if (op.eq.'+ ') then
|
|---|
| 1177 | do i=1,n
|
|---|
| 1178 | x(i) = x(i) + w(i)
|
|---|
| 1179 | enddo
|
|---|
| 1180 | elseif (op.eq.'* ') then
|
|---|
| 1181 | do i=1,n
|
|---|
| 1182 | x(i) = x(i) * w(i)
|
|---|
| 1183 | enddo
|
|---|
| 1184 | elseif (op.eq.'M ') then
|
|---|
| 1185 | do i=1,n
|
|---|
| 1186 | x(i) = max(x(i),w(i))
|
|---|
| 1187 | enddo
|
|---|
| 1188 | elseif (op.eq.'m ') then
|
|---|
| 1189 | do i=1,n
|
|---|
| 1190 | x(i) = min(x(i),w(i))
|
|---|
| 1191 | enddo
|
|---|
| 1192 | endif
|
|---|
| 1193 | if (level2.lt.np) goto 5
|
|---|
| 1194 |
|
|---|
| 1195 | c Pass result back to parent
|
|---|
| 1196 | 20 parent = nid-level
|
|---|
| 1197 | if (nid .ne. 0) call csend(type,x,bytes,parent,nullpid)
|
|---|
| 1198 |
|
|---|
| 1199 | c Await final answer from node 0 via log_2 fan out
|
|---|
| 1200 | level=np/2
|
|---|
| 1201 | ifgot=.false.
|
|---|
| 1202 | if (nid.eq.root) ifgot=.true.
|
|---|
| 1203 |
|
|---|
| 1204 | lnp = log2(np)
|
|---|
| 1205 | do i=1,lnp
|
|---|
| 1206 | if (ifgot) then
|
|---|
| 1207 | jnid=nid+level
|
|---|
| 1208 | call csend(typer,x,bytes,jnid,nullpid)
|
|---|
| 1209 | elseif (mod(nid,level).eq.0) then
|
|---|
| 1210 | call crecv(typer,x,bytes)
|
|---|
| 1211 | ifgot=.true.
|
|---|
| 1212 | endif
|
|---|
| 1213 | level=level/2
|
|---|
| 1214 | enddo
|
|---|
| 1215 |
|
|---|
| 1216 | return
|
|---|
| 1217 | end
|
|---|
| 1218 | c-----------------------------------------------------------------------
|
|---|
| 1219 | subroutine ping_loop1(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
|
|---|
| 1220 |
|
|---|
| 1221 | common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
|
|---|
| 1222 |
|
|---|
| 1223 | real x(1),y(1)
|
|---|
| 1224 |
|
|---|
| 1225 | include 'mpif.h'
|
|---|
| 1226 | integer status(mpi_status_size)
|
|---|
| 1227 |
|
|---|
| 1228 | i=0
|
|---|
| 1229 | if (nid.eq.nodea) then
|
|---|
| 1230 | call nekgsync
|
|---|
| 1231 | call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 1b
|
|---|
| 1232 | call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a
|
|---|
| 1233 | c call mpi_rsend(x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a
|
|---|
| 1234 | call msgwait(msg) ! 1b
|
|---|
| 1235 |
|
|---|
| 1236 | t0 = mpi_wtime ()
|
|---|
| 1237 | do i=1,nloop
|
|---|
| 1238 | call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 2b
|
|---|
| 1239 | call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a
|
|---|
| 1240 | c call mpi_rsend(x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a
|
|---|
| 1241 | call mpi_wait (msg,status,ierr) ! 2b
|
|---|
| 1242 | enddo
|
|---|
| 1243 | t1 = mpi_wtime ()
|
|---|
| 1244 |
|
|---|
| 1245 | elseif (nid.eq.nodeb) then
|
|---|
| 1246 |
|
|---|
| 1247 | call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 1a
|
|---|
| 1248 | call nekgsync
|
|---|
| 1249 | call mpi_wait (msg,status,ierr) ! 1a
|
|---|
| 1250 |
|
|---|
| 1251 | j=i
|
|---|
| 1252 | do i=1,nloop
|
|---|
| 1253 | call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 2a
|
|---|
| 1254 | c call mpi_rsend(x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b
|
|---|
| 1255 | call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b
|
|---|
| 1256 | call mpi_wait (msg,status,ierr) ! 2a
|
|---|
| 1257 | j=i
|
|---|
| 1258 | enddo
|
|---|
| 1259 | c call mpi_rsend(x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb
|
|---|
| 1260 | call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb
|
|---|
| 1261 |
|
|---|
| 1262 | else
|
|---|
| 1263 | call nekgsync
|
|---|
| 1264 | endif
|
|---|
| 1265 |
|
|---|
| 1266 | return
|
|---|
| 1267 | end
|
|---|
| 1268 | c-----------------------------------------------------------------------
|
|---|
| 1269 | subroutine ping_loop2(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
|
|---|
| 1270 |
|
|---|
| 1271 | common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
|
|---|
| 1272 |
|
|---|
| 1273 | real x(1),y(1)
|
|---|
| 1274 |
|
|---|
| 1275 | include 'mpif.h'
|
|---|
| 1276 | integer status(mpi_status_size)
|
|---|
| 1277 |
|
|---|
| 1278 | i=0
|
|---|
| 1279 | if (nid.eq.nodea) then
|
|---|
| 1280 | call nekgsync
|
|---|
| 1281 | call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 1b
|
|---|
| 1282 | call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a
|
|---|
| 1283 | call msgwait(msg) ! 1b
|
|---|
| 1284 |
|
|---|
| 1285 | t0 = mpi_wtime ()
|
|---|
| 1286 | do i=1,nloop
|
|---|
| 1287 | call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a
|
|---|
| 1288 | call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 2b
|
|---|
| 1289 | call mpi_wait (msg,status,ierr) ! 2b
|
|---|
| 1290 | enddo
|
|---|
| 1291 | t1 = mpi_wtime ()
|
|---|
| 1292 |
|
|---|
| 1293 | elseif (nid.eq.nodeb) then
|
|---|
| 1294 |
|
|---|
| 1295 | call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 1a
|
|---|
| 1296 | call nekgsync
|
|---|
| 1297 | call mpi_wait (msg,status,ierr) ! 1a
|
|---|
| 1298 |
|
|---|
| 1299 | j=i
|
|---|
| 1300 | do i=1,nloop
|
|---|
| 1301 | call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b
|
|---|
| 1302 | call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 2a
|
|---|
| 1303 | call mpi_wait (msg,status,ierr) ! 2a
|
|---|
| 1304 | j=i
|
|---|
| 1305 | enddo
|
|---|
| 1306 | call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb
|
|---|
| 1307 |
|
|---|
| 1308 | else
|
|---|
| 1309 | call nekgsync
|
|---|
| 1310 | endif
|
|---|
| 1311 |
|
|---|
| 1312 | return
|
|---|
| 1313 | end
|
|---|
| 1314 | c-----------------------------------------------------------------------
|
|---|
| 1315 | subroutine ping_loop(t1,t0,len,nloop,nodea,nodeb,nid,x1,y1,x2,y2)
|
|---|
| 1316 | c Double Buffer : does 2*nloop timings
|
|---|
| 1317 |
|
|---|
| 1318 | common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
|
|---|
| 1319 |
|
|---|
| 1320 | real x1(1),y1(1),x2(1),y2(1)
|
|---|
| 1321 |
|
|---|
| 1322 | include 'mpif.h'
|
|---|
| 1323 | integer status(mpi_status_size)
|
|---|
| 1324 |
|
|---|
| 1325 | itag=1
|
|---|
| 1326 | if (nid.eq.nodea) then
|
|---|
| 1327 | call mpi_irecv(y1,len,mpi_byte,nodeb,itag,nekcomm,msg1,ierr) ! 1b
|
|---|
| 1328 | call nekgsync
|
|---|
| 1329 |
|
|---|
| 1330 |
|
|---|
| 1331 | t0 = mpi_wtime ()
|
|---|
| 1332 | do i=1,nloop
|
|---|
| 1333 | call mpi_send (x1,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! 1a
|
|---|
| 1334 | call mpi_irecv(y2,len,mpi_byte,nodeb,itag,nekcomm,msg2,ierr)! 2b
|
|---|
| 1335 | call mpi_wait (msg1,status,ierr) ! 1b
|
|---|
| 1336 | call mpi_send (x2,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! 2a
|
|---|
| 1337 | call mpi_irecv(y1,len,mpi_byte,nodeb,itag,nekcomm,msg1,ierr)! 3b
|
|---|
| 1338 | call mpi_wait (msg2,status,ierr) ! 2b
|
|---|
| 1339 | enddo
|
|---|
| 1340 | t1 = mpi_wtime ()
|
|---|
| 1341 | call mpi_send (x1,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! nb
|
|---|
| 1342 | call mpi_wait (msg1,status,ierr) ! nb
|
|---|
| 1343 |
|
|---|
| 1344 | elseif (nid.eq.nodeb) then
|
|---|
| 1345 |
|
|---|
| 1346 | call mpi_irecv(y1,len,mpi_byte,nodea,itag,nekcomm,msg1,ierr) ! nb
|
|---|
| 1347 | call nekgsync
|
|---|
| 1348 |
|
|---|
| 1349 |
|
|---|
| 1350 | do i=1,nloop
|
|---|
| 1351 | call mpi_wait (msg1,status,ierr) ! 1a
|
|---|
| 1352 | call mpi_send (x1,len,mpi_byte,nodea,itag,nekcomm,ierr) ! 1b
|
|---|
| 1353 | call mpi_irecv(y2,len,mpi_byte,nodea,itag,nekcomm,msg2,ierr)! 2a
|
|---|
| 1354 | call mpi_wait (msg2,status,ierr) ! 2a
|
|---|
| 1355 | call mpi_send (x2,len,mpi_byte,nodea,itag,nekcomm,ierr) ! 2b
|
|---|
| 1356 | call mpi_irecv(y1,len,mpi_byte,nodea,itag,nekcomm,msg1,ierr)! 3a
|
|---|
| 1357 | enddo
|
|---|
| 1358 | call mpi_wait (msg1,status,ierr) ! 2a
|
|---|
| 1359 | call mpi_send (x1,len,mpi_byte,nodea,itag,nekcomm,ierr) ! nb
|
|---|
| 1360 |
|
|---|
| 1361 | else
|
|---|
| 1362 | call nekgsync
|
|---|
| 1363 | endif
|
|---|
| 1364 |
|
|---|
| 1365 | return
|
|---|
| 1366 | end
|
|---|
| 1367 | c-----------------------------------------------------------------------
|
|---|
| 1368 | integer*8 function i8gl_running_sum(in)
|
|---|
| 1369 | c
|
|---|
| 1370 | include 'mpif.h'
|
|---|
| 1371 |
|
|---|
| 1372 | integer*8 in
|
|---|
| 1373 |
|
|---|
| 1374 | common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
|
|---|
| 1375 | integer status(mpi_status_size)
|
|---|
| 1376 | integer*8 x,r
|
|---|
| 1377 |
|
|---|
| 1378 | x = in ! running sum
|
|---|
| 1379 | r = 0 ! recv buff
|
|---|
| 1380 |
|
|---|
| 1381 | call mpi_scan(x,r,1,mpi_integer8,mpi_sum,nekcomm,ierr)
|
|---|
| 1382 | i8gl_running_sum = r
|
|---|
| 1383 |
|
|---|
| 1384 | return
|
|---|
| 1385 | end
|
|---|
| 1386 | c-----------------------------------------------------------------------
|
|---|
| 1387 | subroutine close_files
|
|---|
| 1388 | logical ifopen
|
|---|
| 1389 |
|
|---|
| 1390 | do ii=1,99
|
|---|
| 1391 | if(ii.ne.5.and.ii.ne.6) then
|
|---|
| 1392 | inquire(unit=ii,opened=ifopen)
|
|---|
| 1393 | if(ifopen) close(ii)
|
|---|
| 1394 | endif
|
|---|
| 1395 | enddo
|
|---|
| 1396 |
|
|---|
| 1397 | return
|
|---|
| 1398 | end
|
|---|
| 1399 | c----------------------------------------------------------------------
|
|---|
| 1400 | subroutine neknekgsync()
|
|---|
| 1401 |
|
|---|
| 1402 | include 'SIZE'
|
|---|
| 1403 | include 'PARALLEL'
|
|---|
| 1404 |
|
|---|
| 1405 | call mpi_barrier(iglobalcomm,ierr)
|
|---|
| 1406 |
|
|---|
| 1407 | return
|
|---|
| 1408 | end
|
|---|
| 1409 | c------------------------------------------------------------------------
|
|---|
| 1410 | subroutine setnekcomm(comm_in)
|
|---|
| 1411 |
|
|---|
| 1412 | include 'SIZE'
|
|---|
| 1413 |
|
|---|
| 1414 | integer comm_in
|
|---|
| 1415 | common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal
|
|---|
| 1416 |
|
|---|
| 1417 | nekcomm = comm_in
|
|---|
| 1418 | call mpi_comm_size(nekcomm,mp,ierr)
|
|---|
| 1419 | np = mp
|
|---|
| 1420 |
|
|---|
| 1421 | return
|
|---|
| 1422 | end
|
|---|