source: CIVL/examples/fortran/nek5000/core/comm_mpi.f

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100755
File size: 38.3 KB
Line 
1c-----------------------------------------------------------------------
2 subroutine setupcomm(comm,newcomm,newcommg,path_in,session_in)
3 include 'mpif.h'
4 include 'SIZE'
5 include 'PARALLEL'
6 include 'TSTEP'
7 include 'INPUT'
8
9 integer comm, newcomm, newcommg
10 character session_in*(*), path_in*(*)
11 logical flag
12
13 common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal
14
15 integer nid_global_root(0:nsessmax-1)
16 character*132 session_mult(0:nsessmax-1), path_mult(0:nsessmax-1)
17
18 logical ifhigh
19 logical mpi_is_initialized
20
21 integer*8 ntags
22
23 call mpi_initialized(mpi_is_initialized, ierr)
24 if (.not.mpi_is_initialized) call mpi_init(ierr)
25
26 call mpi_comm_dup(comm,newcommg,ierr)
27 newcomm = newcommg
28 nekcomm = newcommg
29
30 call mpi_comm_size(nekcomm,np_global,ierr)
31 call mpi_comm_rank(nekcomm,nid_global,ierr)
32
33 ! check upper tag size limit
34 call mpi_comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,ntags,flag,ierr)
35 if (ntags .lt. np_global) then
36 if(nid_global.eq.0) write(6,*) 'ABORT: MPI_TAG_UB too small!'
37 call exitt
38 endif
39
40 ! set defaults
41 nid = nid_global
42 ifneknek = .false.
43 ifneknekc = .false. ! session are uncoupled
44 nsessions = 1
45
46 ierr = 0
47 nlin = 0
48 if (nid .eq. 0) then
49 l = ltrunc(session_in,len(session_in))
50 if (l .gt. 0) then
51 call blank(session_mult(0),132)
52 call chcopy(session_mult(0), session_in, l)
53 l = ltrunc(path_in,len(path_in))
54 call blank(path_mult(0) ,132)
55 call chcopy(path_mult(0), path_in, l)
56 else
57 !write(6,*) 'Reading session file ...'
58 open (unit=8,file='SESSION.NAME',status='old',err=24)
59 21 read (8,*,END=22)
60 nlin = nlin + 1
61 goto 21
62 22 rewind(8)
63 if (nlin.gt.2) read(8,*,err=24) nsessions
64 if (nsessions.gt.1) read(8,*,err=24) ifneknekc
65 do n=0,nsessions-1
66 call blank(session_mult(n),132)
67 call blank(path_mult(n) ,132)
68 read(8,11,err=24) session_mult(n)
69 read(8,11,err=24) path_mult(n)
70 if (nsessions.gt.1) read(8,*,err=24) npsess(n)
71 enddo
72 11 format(a132)
73 close(8)
74 endif
75 if (nsessions.gt.1)
76 $ write(6,*) 'Number of sessions:',nsessions
77 goto 23
78 24 ierr = 1
79 endif
80 23 continue
81 call err_chk(ierr,' Error while reading SESSION.NAME!$')
82
83 call bcast(nsessions,ISIZE)
84 if (nsessions .gt. nsessmax)
85 & call exitti('nsessmax in SIZE too low!$',nsessmax)
86 if (nsessions .gt. 1) ifneknek = .true.
87
88 call bcast(ifneknekc,LSIZE)
89 do n = 0,nsessions-1
90 call bcast(npsess(n),ISIZE)
91 call bcast(session_mult(n),132*CSIZE)
92 call bcast(path_mult(n),132*CSIZE)
93 enddo
94
95 ! single session run
96 if (.not.ifneknek) then
97 ifneknekc = .false.
98 session = session_mult(0)
99 path = path_mult(0)
100 amgfile = session
101 return
102 endif
103
104c Check if specified number of ranks in each session is consistent
105c with the total number of ranks
106 npall=0
107 do n=0,nsessions-1
108 npall=npall+npsess(n)
109 enddo
110 if (npall.ne.np_global)
111 & call exitti('Number of ranks does not match!$',npall)
112
113c Assign key for splitting into multiple groups
114 nid_global_root_next=0
115 do n=0,nsessions-1
116 nid_global_root(n)=nid_global_root_next
117 nid_global_root_next=nid_global_root(n)+npsess(n)
118 if (nid_global.ge.nid_global_root(n).and.
119 & nid_global.lt.nid_global_root_next) idsess = n
120 enddo
121 call mpi_comm_split(comm,idsess,nid,newcomm,ierr)
122
123 session = session_mult(idsess)
124 path = path_mult (idsess)
125
126 if (ifneknekc) then
127 if (nsessions.gt.2) call exitti(
128 & 'More than 2 coupled sessions are currently not supported!$',
129 $ nsessions)
130 endif
131
132 amgfile = session
133
134 return
135 end
136c---------------------------------------------------------------------
137 subroutine iniproc()
138 include 'SIZE'
139 include 'PARALLEL'
140 include 'INPUT'
141 include 'mpif.h'
142
143 common /nekmpi/ nid_,np_,nekcomm,nekgroup,nekreal
144
145 logical flag
146 integer*8 isize_mpi, lb
147
148 nid = mynode()
149 nid_ = nid
150 np = numnodes()
151 np_ = np
152
153 nio = -1 ! Default io flag
154 if (nid.eq.0) nio=0 ! Only node 0 writes
155
156 if (nid.eq.nio) then
157 if (ifneknek) then
158 call set_stdout(' ',idsess)
159 else
160 call set_stdout(' ',-1)
161 endif
162 endif
163
164 if (wdsize .eq. 4)
165 $ call exitti('Single precision mode not supported!',wdsize)
166
167 call MPI_Type_Get_Extent(MPI_DOUBLE_PRECISION,lb,isize_mpi,ierr)
168 if (isize_mpi .ne. wdsize) then
169 call exitti('MPI real size does not match$',isize_mpi)
170 endif
171
172 call MPI_Type_Get_Extent(MPI_INTEGER,lb,isize_mpi,ierr)
173 if (isize_mpi .ne. isize) then
174 call exitti('MPI integer size does not match$',isize_mpi)
175 endif
176
177 call MPI_Type_Get_Extent(MPI_INTEGER8,lb,isize_mpi,ierr)
178 if (isize_mpi .ne. isize8) then
179 call exitti('MPI integer8 size does not match$',isize_mpi)
180 endif
181
182 PID = 0
183 NULLPID=0
184 NODE0=0
185 NODE= NID+1
186
187C Test timer accuracy
188 edif = 0.0
189 do i = 1,10
190 e1 = dnekclock()
191 e2 = dnekclock()
192 edif = edif + e2-e1
193 enddo
194 edif = edif/10.
195
196 call fgslib_crystal_setup(cr_h,nekcomm,np) ! set cr handle to new instance
197 return
198 end
199c-----------------------------------------------------------------------
200 subroutine gop( x, w, op, n)
201
202c Global vector commutative operation
203
204 include 'CTIMER'
205
206 include 'mpif.h'
207 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
208
209 real x(n), w(n)
210 character*3 op
211
212 if (ifsync) call nekgsync()
213
214#ifdef TIMER
215 if (icalld.eq.0) then
216 tgop =0.0d0
217 ngop =0
218 icalld=1
219 endif
220 ngop = ngop + 1
221 etime1=dnekclock()
222#endif
223c
224 if (op.eq.'+ ') then
225 call mpi_allreduce(x,w,n,MPI_DOUBLE_PRECISION,mpi_sum,nekcomm,ie)
226 elseif (op.EQ.'M ') then
227 call mpi_allreduce(x,w,n,MPI_DOUBLE_PRECISION,mpi_max,nekcomm,ie)
228 elseif (op.EQ.'m ') then
229 call mpi_allreduce(x,w,n,MPI_DOUBLE_PRECISION,mpi_min,nekcomm,ie)
230 elseif (op.EQ.'* ') then
231 call mpi_allreduce(x,w,n,MPI_DOUBLE_PRECISION,mpi_prod,nekcomm,ie)
232 else
233 write(6,*) nid,' OP ',op,' not supported. ABORT in GOP.'
234 call exitt
235 endif
236
237 call copy(x,w,n)
238
239#ifdef TIMER
240 tgop =tgop +(dnekclock()-etime1)
241#endif
242
243 return
244 end
245c-----------------------------------------------------------------------
246 subroutine igop( x, w, op, n)
247c
248c Global vector commutative operation
249c
250 include 'mpif.h'
251 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
252
253 integer x(n), w(n)
254 character*3 op
255
256 if (op.eq.'+ ') then
257 call mpi_allreduce (x,w,n,mpi_integer,mpi_sum ,nekcomm,ierr)
258 elseif (op.EQ.'M ') then
259 call mpi_allreduce (x,w,n,mpi_integer,mpi_max ,nekcomm,ierr)
260 elseif (op.EQ.'m ') then
261 call mpi_allreduce (x,w,n,mpi_integer,mpi_min ,nekcomm,ierr)
262 elseif (op.EQ.'* ') then
263 call mpi_allreduce (x,w,n,mpi_integer,mpi_prod,nekcomm,ierr)
264 else
265 write(6,*) nid,' OP ',op,' not supported. ABORT in igop.'
266 call exitt
267 endif
268
269 call icopy(x,w,n)
270
271 return
272 end
273c-----------------------------------------------------------------------
274 subroutine i8gop( x, w, op, n)
275c
276c Global vector commutative operation
277c
278 include 'mpif.h'
279 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
280
281 integer*8 x(n), w(n)
282 character*3 op
283
284 if (op.eq.'+ ') then
285 call mpi_allreduce (x,w,n,mpi_integer8,mpi_sum ,nekcomm,ierr)
286 elseif (op.EQ.'M ') then
287 call mpi_allreduce (x,w,n,mpi_integer8,mpi_max ,nekcomm,ierr)
288 elseif (op.EQ.'m ') then
289 call mpi_allreduce (x,w,n,mpi_integer8,mpi_min ,nekcomm,ierr)
290 elseif (op.EQ.'* ') then
291 call mpi_allreduce (x,w,n,mpi_integer8,mpi_prod,nekcomm,ierr)
292 else
293 write(6,*) nid,' OP ',op,' not supported. ABORT in igop.'
294 call exitt
295 endif
296
297 call i8copy(x,w,n)
298
299 return
300 end
301c-----------------------------------------------------------------------
302 subroutine csend(mtype,buf,len,jnid,jpid)
303 include 'mpif.h'
304 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
305 real*4 buf(1)
306
307 call mpi_send (buf,len,mpi_byte,jnid,mtype,nekcomm,ierr)
308
309 return
310 end
311c-----------------------------------------------------------------------
312 subroutine crecv(mtype,buf,lenm)
313 include 'mpif.h'
314 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
315 integer status(mpi_status_size)
316C
317 real*4 buf(1)
318 len = lenm
319 jnid = mpi_any_source
320
321 call mpi_recv (buf,len,mpi_byte
322 $ ,jnid,mtype,nekcomm,status,ierr)
323c
324 if (len.gt.lenm) then
325 write(6,*) nid,'long message in mpi_crecv:',len,lenm
326 call exitt
327 endif
328c
329 return
330 end
331c-----------------------------------------------------------------------
332 subroutine crecv2(mtype,buf,lenm,jnid)
333 include 'mpif.h'
334 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
335 integer status(mpi_status_size)
336C
337 real*4 buf(1)
338 len = lenm
339
340 call mpi_recv (buf,len,mpi_byte
341 $ ,jnid,mtype,nekcomm,status,ierr)
342c
343 if (len.gt.lenm) then
344 write(6,*) nid,'long message in mpi_crecv:',len,lenm
345 call exitt
346 endif
347c
348 return
349 end
350c-----------------------------------------------------------------------
351 subroutine crecv3(mtype,buf,len,lenm)
352 include 'mpif.h'
353 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
354 integer status(mpi_status_size)
355C
356 real*4 buf(1)
357 len = lenm
358 jnid = mpi_any_source
359
360 call mpi_recv (buf,len,mpi_byte
361 $ ,jnid,mtype,nekcomm,status,ierr)
362 call mpi_get_count (status,mpi_byte,len,ierr)
363c
364 if (len.gt.lenm) then
365 write(6,*) nid,'long message in mpi_crecv:',len,lenm
366 call exitt
367 endif
368c
369 return
370 end
371c-----------------------------------------------------------------------
372 integer function numnodes()
373 include 'mpif.h'
374 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
375
376 call mpi_comm_size (nekcomm, numnodes , ierr)
377
378 return
379 end
380c-----------------------------------------------------------------------
381 integer function mynode()
382 include 'mpif.h'
383 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
384 integer myid
385
386 call mpi_comm_rank (nekcomm, myid, ierr)
387 mynode = myid
388
389 return
390 end
391c-----------------------------------------------------------------------
392 real*8 function dnekclock()
393 include 'mpif.h'
394c
395 dnekclock=mpi_wtime()
396c
397 return
398 end
399c-----------------------------------------------------------------------
400 real*8 function dnekclock_sync()
401 include 'mpif.h'
402c
403 call nekgsync()
404 dnekclock_sync=mpi_wtime()
405c
406 return
407 end
408c-----------------------------------------------------------------------
409 subroutine lbcast(ifif)
410C
411C Broadcast logical variable to all processors.
412C
413 include 'SIZE'
414 include 'PARALLEL'
415 include 'mpif.h'
416
417 logical ifif
418
419 if (np.eq.1) return
420
421 item=0
422 if (ifif) item=1
423 call bcast(item,isize)
424 ifif=.false.
425 if (item.eq.1) ifif=.true.
426
427 return
428 end
429c-----------------------------------------------------------------------
430 subroutine bcast(buf,len)
431 include 'mpif.h'
432 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
433 real*4 buf(1)
434
435 call mpi_bcast (buf,len,mpi_byte,0,nekcomm,ierr)
436
437 return
438 end
439c-----------------------------------------------------------------------
440 subroutine create_comm(inewcomm)
441 include 'mpif.h'
442 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
443
444c call mpi_comm_group (mpi_comm_world,itmp,ierr)
445c call mpi_comm_create (mpi_comm_world,itmp,icomm,ierr)
446c call mpi_group_free (itmp,ierr)
447
448 call mpi_comm_dup(nekcomm,inewcomm,ierr)
449
450 return
451 end
452c-----------------------------------------------------------------------
453 function isend(msgtag,x,len,jnid,jpid)
454c
455c Note: len in bytes
456c
457 integer x(1)
458C
459 include 'mpif.h'
460 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
461C
462 call mpi_isend (x,len,mpi_byte,jnid,msgtag
463 $ ,nekcomm,imsg,ierr)
464 isend = imsg
465c write(6,*) nid,' isend:',imsg,msgtag,len,jnid,(x(k),k=1,len/4)
466c
467 return
468 end
469c-----------------------------------------------------------------------
470 function irecv(msgtag,x,len)
471c
472c Note: len in bytes
473c
474 integer x(1)
475C
476 include 'mpif.h'
477 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
478C
479 call mpi_irecv (x,len,mpi_byte,mpi_any_source,msgtag
480 $ ,nekcomm,imsg,ierr)
481 irecv = imsg
482c write(6,*) nid,' irecv:',imsg,msgtag,len
483c
484c
485 return
486 end
487c-----------------------------------------------------------------------
488 subroutine msgwait(imsg)
489c
490 include 'mpif.h'
491 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
492 integer status(mpi_status_size)
493c
494c write(6,*) nid,' msgwait:',imsg
495c
496 call mpi_wait (imsg,status,ierr)
497c
498 return
499 end
500c-----------------------------------------------------------------------
501 subroutine nekgsync()
502
503 include 'mpif.h'
504 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
505
506 call mpi_barrier(nekcomm,ierr)
507
508 return
509 end
510c-----------------------------------------------------------------------
511 subroutine exittr(stringi,rdata,idata)
512 character*1 stringi(132)
513 character*1 stringo(132)
514 character*25 s25
515 include 'SIZE'
516 include 'TOTAL'
517 include 'CTIMER'
518
519 call blank(stringo,132)
520 call chcopy(stringo,stringi,132)
521 len = indx1(stringo,'$',1)
522 write(s25,25) rdata,idata
523 25 format(1x,1p1e14.6,i10)
524 call chcopy(stringo(len),s25,25)
525
526 if (nid.eq.0) write(6,1) (stringo(k),k=1,len+24)
527 1 format('EXIT: ',132a1)
528
529 call exitt
530
531 return
532 end
533c-----------------------------------------------------------------------
534 subroutine exitti(stringi,idata)
535 character*1 stringi(132)
536 character*1 stringo(132)
537 character*11 s11
538 include 'SIZE'
539 include 'TOTAL'
540 include 'CTIMER'
541
542 call blank(stringo,132)
543 call chcopy(stringo,stringi,132)
544 len = indx1(stringo,'$',1)
545 write(s11,11) idata
546 11 format(1x,i10)
547 call chcopy(stringo(len),s11,11)
548
549 if (nid.eq.0) write(6,1) (stringo(k),k=1,len+10)
550 1 format('EXIT: ',132a1)
551
552 call exitt
553
554 return
555 end
556c-----------------------------------------------------------------------
557 subroutine err_chk(ierr,string)
558 character*1 string(132)
559 character*1 ostring(132)
560 character*10 s10
561 include 'SIZE'
562c include 'TOTAL'
563c include 'CTIMER'
564
565 ierr = iglsum(ierr,1)
566 if(ierr.eq.0) return
567
568 len = indx1(string,'$',1)
569 call blank(ostring,132)
570 write(s10,11) ierr
571 11 format(1x,' ierr=',i3)
572
573 call chcopy(ostring,string,len-1)
574 call chcopy(ostring(len),s10,10)
575
576 if (nid.eq.0) write(6,1) (ostring(k),k=1,len+10)
577 1 format('ERROR: ',132a1)
578
579 call exitt
580
581 return
582 end
583c
584c-----------------------------------------------------------------------
585 subroutine exitt0
586
587 include 'SIZE'
588 include 'TOTAL'
589
590 if (nid.eq.0) then
591 write(6,*) ' '
592 write(6,'(A)') 'run successful: dying ...'
593 write(6,*) ' '
594 endif
595
596c if (nid.eq.0) call close_files
597 call print_runtime_info
598 call nek_die(0)
599
600 return
601 end
602c-----------------------------------------------------------------------
603 subroutine exitt
604
605 include 'SIZE'
606 include 'TOTAL'
607
608 if (nid.eq.0) then
609 write(6,*) ' '
610 write(6,'(A)') 'an error occured: dying ...'
611 write(6,*) ' '
612 endif
613
614c call print_stack()
615c if (nid.eq.0) call close_files
616c call print_runtime_info
617 call nek_die(1)
618
619 return
620 end
621c-----------------------------------------------------------------------
622 subroutine print_runtime_info
623 include 'SIZE'
624 include 'TOTAL'
625 include 'CTIMER'
626 include 'mpif.h'
627
628#ifdef PAPI
629 gflops = glsum(dnekgflops(),1)
630#endif
631
632 tstop = dnekclock_sync()
633 ttotal = tstop-etimes
634 tsol = max(ttime - tprep,0.0)
635 nxyz = lx1*ly1*lz1
636
637 dtmp4 = glsum(getmaxrss(),1)/1e9
638
639 if (nid.eq.0) then
640 dtmp1 = 0
641 dtmp2 = 0
642 if(istep.gt.0) then
643 dgp = nvtot
644 dgp = max(dgp,1.)*max(istep,1)
645 dtmp0 = np*(ttime-tprep)
646 dtmp1 = 0
647 if (dtmp0.gt.0) dtmp1 = dgp/dtmp0
648 dtmp2 = (ttime-tprep)/max(istep,1)
649 endif
650 write(6,*) ' '
651 write(6,'(5(A,1p1e13.5,A,/))')
652 & 'total elapsed time : ',ttotal, ' sec'
653 & ,'total solver time w/o IO : ',tsol, ' sec'
654 & ,'time/timestep : ',dtmp2 , ' sec'
655 & ,'avg throughput per timestep : ',dtmp1 , ' gridpts/CPUs'
656 & ,'total max memory usage : ',dtmp4 , ' GB'
657#ifdef PAPI
658 write(6,'(1(A,1p1e13.5,/))')
659 & ,'total Gflops/s : ',gflops
660#endif
661 endif
662 call flush_io
663
664 return
665 end
666c-----------------------------------------------------------------------
667 subroutine nek_die(ierr)
668 include 'SIZE'
669 include 'mpif.h'
670
671 call mpi_finalize (ierr_)
672 call cexit(ierr)
673
674 return
675 end
676c-----------------------------------------------------------------------
677 subroutine fgslib_userExitHandler(istatus)
678
679 call exitt
680
681 return
682 end
683c-----------------------------------------------------------------------
684 subroutine printHeader
685
686 include 'SIZE'
687 include 'PARALLEL'
688
689 include 'HEADER'
690 write(6,*) 'Number of MPI ranks :', np
691c WRITE(6,*) 'REAL wdsize :',WDSIZE
692c WRITE(6,*) 'INTEGER wdsize :',ISIZE
693c WRITE(6,*) 'INTEGER8 wdsize :',ISIZE8
694 WRITE(6,*) ' '
695
696 return
697 end
698c-----------------------------------------------------------------------
699 function igl_running_sum(in)
700c
701 include 'mpif.h'
702 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
703 integer status(mpi_status_size)
704 integer x,w,r
705
706 x = in ! running sum
707 w = in ! working buff
708 r = 0 ! recv buff
709
710 call mpi_scan(x,r,1,mpi_integer,mpi_sum,nekcomm,ierr)
711 igl_running_sum = r
712
713 return
714 end
715c-----------------------------------------------------------------------
716 subroutine platform_timer(ivb) ! mxm, ping-pong, and all_reduce timer
717
718 include 'SIZE'
719 include 'TOTAL'
720
721
722 call mxm_test_all(nid,ivb) ! measure mxm times
723c call exitti('done mxm_test_all$',ivb)
724
725 call comm_test(ivb) ! measure message-passing and all-reduce times
726
727 return
728 end
729c-----------------------------------------------------------------------
730 subroutine comm_test(ivb) ! measure message-passing and all-reduce times
731 ! ivb = 0 --> minimal verbosity
732 ! ivb = 1 --> fully verbose
733 ! ivb = 2 --> smaller sample set(shorter)
734
735 include 'SIZE'
736 include 'PARALLEL'
737
738 call gop_test(ivb) ! added, Jan. 8, 2008
739
740 log_np=log2(np)
741 np2 = 2**log_np
742 if (np2.eq.np) call gp2_test(ivb) ! added, Jan. 8, 2008
743
744 io = 6
745 n512 = min(512,np-1)
746
747 do nodeb=1,n512
748 call pingpongo(alphas,betas,0,nodeb,.0005,io,ivb)
749 if (nid.eq.0) write(6,2) nodeb,np,alphas,betas
750 2 format(2i10,1p2e15.7,' alpha betao')
751 enddo
752
753 do kk=0,2
754 do nodeb=1,n512
755 call pingpong (alphas,betas,0,nodeb,.0005,io,ivb,kk)
756 if (nid.eq.0) write(6,1) nodeb,np,alphas,betas,kk
757 1 format(2i10,1p2e15.7,' alpha beta',i1)
758 enddo
759 enddo
760
761 return
762 end
763c-----------------------------------------------------------------------
764 subroutine pingpong(alphas,betas,nodea,nodeb,dt,io,ivb,kk)
765
766 include 'SIZE'
767 common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
768
769 parameter (lt=lx1*ly1*lz1*lelt)
770 parameter (mwd = 3*lt/2)
771 common /scrns/ x(mwd),y(mwd),x1(mwd),y1(mwd)
772
773 include 'mpif.h'
774 integer status(mpi_status_size)
775
776 character*10 fname
777
778 if (nid.eq.nodea) then
779 write(fname,3) np,nodeb
780 3 format('t',i4.4,'.',i4.4)
781 if (io.ne.6) open (unit=io,file=fname)
782 endif
783
784 call nekgsync
785 call get_msg_vol(msg_vol,dt,nodea,nodeb) ! Est. msg vol for dt s
786
787 nwds = 0
788 if (nid.eq.nodea.and.ivb.gt.0) write(io,*)
789
790 betas = 0 ! Reported inverse bandwidth
791 count = 0
792
793 do itest = 1,500
794
795 nloop = msg_vol/(nwds+2)
796 nloop = min(nloop,1000)
797 nloop = max(nloop,1)
798
799 len = 8*nwds
800
801 if (kk.eq.0)
802 $ call ping_loop (t1,t0,len,nloop,nodea,nodeb,nid,x,y,x1,y1)
803 if (kk.eq.1)
804 $ call ping_loop1(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
805 if (kk.eq.2)
806 $ call ping_loop2(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
807
808 if (nid.eq.nodea) then
809 tmsg = (t1-t0)/(2*nloop) ! 2*nloop--> Double Buffer
810 tmsg = tmsg / 2. ! one-way cost = 1/2 round-trip
811 tpwd = tmsg ! time-per-word
812 if (nwds.gt.0) tpwd = tmsg/nwds
813 if (ivb.gt.0) write(io,1) nodeb,np,nloop,nwds,tmsg,tpwd,kk
814 1 format(3i6,i12,1p2e16.8,' pgn',i1)
815
816 if (nwds.eq.1) then
817 alphas = tmsg
818 elseif (nwds.gt.10000) then ! "average" beta
819 betas = (betas*count + tpwd)/(count+1)
820 count = count + 1
821 endif
822 endif
823
824 if (ivb.eq.2) then
825 nwds = (nwds+1)*1.25
826 else
827 nwds = (nwds+1)*1.016
828 endif
829 if (nwds.gt.mwd) then
830c if (nwds.gt.1024) then
831 if (nid.eq.nodea.and.io.ne.6) close(unit=io)
832 call nekgsync
833 return
834 endif
835
836 enddo
837
838 if (nid.eq.nodea.and.io.ne.6) close(unit=io)
839 call nekgsync
840
841 return
842 end
843c-----------------------------------------------------------------------
844 subroutine pingpongo(alphas,betas,nodea,nodeb,dt,io,ivb)
845
846 include 'SIZE'
847 common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
848
849 parameter (lt=lx1*ly1*lz1*lelt)
850 parameter (mwd = 3*lt)
851 common /scrns/ x(mwd),y(mwd)
852
853 include 'mpif.h'
854 integer status(mpi_status_size)
855
856 character*10 fname
857
858 if (nid.eq.nodea) then
859 write(fname,3) np,nodeb
860 3 format('t',i4.4,'.',i4.4)
861 if (io.ne.6) open (unit=io,file=fname)
862 endif
863
864 call nekgsync
865 call get_msg_vol(msg_vol,dt,nodea,nodeb) ! Est. msg vol for dt s
866
867 nwds = 0
868 if (nid.eq.nodea.and.ivb.gt.0) write(io,*)
869
870 betas = 0 ! Reported inverse bandwidth
871 count = 0
872
873 do itest = 1,500
874 call nekgsync
875 nloop = msg_vol/(nwds+2)
876 nloop = min(nloop,1000)
877 nloop = max(nloop,1)
878
879 len = 8*nwds
880 jnid = mpi_any_source
881
882 if (nid.eq.nodea) then
883
884 msg = irecv(itest,y,1)
885 call csend(itest,x,1,nodeb,0) ! Initiate send, to synch.
886 call msgwait(msg)
887
888 t0 = mpi_wtime ()
889 do i=1,nloop
890 call mpi_irecv(y,len,mpi_byte,mpi_any_source,i
891 $ ,nekcomm,msg,ierr)
892 call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr)
893 call mpi_wait (msg,status,ierr)
894 enddo
895 t1 = mpi_wtime ()
896 tmsg = (t1-t0)/nloop
897 tmsg = tmsg / 2. ! Round-trip message time = twice one-way
898 tpwd = tmsg
899 if (nwds.gt.0) tpwd = tmsg/nwds
900 if (ivb.gt.0) write(io,1) nodeb,np,nloop,nwds,tmsg,tpwd
901 1 format(3i6,i12,1p2e16.8,' pgo')
902
903 if (nwds.eq.1) then
904 alphas = tmsg
905 elseif (nwds.gt.10000) then
906 betas = (betas*count + tpwd)/(count+1)
907 count = count + 1
908 endif
909
910 elseif (nid.eq.nodeb) then
911
912 call crecv(itest,y,1) ! Initiate send, to synch.
913 call csend(itest,x,1,nodea,0)
914
915 t0 = dnekclock()
916 do i=1,nloop
917 call mpi_recv (y,len,mpi_byte
918 $ ,jnid,i,nekcomm,status,ierr)
919 call mpi_send (x,len,mpi_byte,nodea,i,nekcomm,ierr)
920 enddo
921 t1 = dnekclock()
922 tmsg = (t1-t0)/nloop
923
924 endif
925
926 nwds = (nwds+1)*1.016
927 if (nwds.gt.mwd) then
928 if (nid.eq.nodea.and.io.ne.6) close(unit=io)
929 call nekgsync
930 return
931 endif
932
933 enddo
934
935 if (nid.eq.nodea.and.io.ne.6) close(unit=io)
936 call nekgsync
937
938 return
939 end
940c-----------------------------------------------------------------------
941 subroutine get_msg_vol(msg_vol,dt,nodea,nodeb)
942 include 'SIZE'
943 common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
944 parameter (lt=lx1*ly1*lz1*lelt)
945 common /scrns/ x(3*lt),y(3*lt)
946!
947! Est. msg vol for dt s
948!
949 msg_vol = 1000
950
951 nwds = min(1000,lt)
952 nloop = 50
953
954 tmsg = 0.
955 call gop(tmsg,t1,'+ ',1)
956
957 len = 8*nwds
958 if (nid.eq.nodea) then
959
960 msg = irecv(1,y,1)
961 call csend(1,x,1,nodeb,0) ! Initiate send, to synch.
962 call msgwait(msg)
963
964 t0 = dnekclock()
965 do i=1,nloop
966 msg = irecv(i,y,len)
967 call csend(i,x,len,nodeb,0)
968 call msgwait(msg)
969 enddo
970 t1 = dnekclock()
971 tmsg = (t1-t0)/nloop
972 tpwd = tmsg/nwds
973
974 elseif (nid.eq.nodeb) then
975
976 call crecv(1,y,1) ! Initiate send, to synch.
977 call csend(1,x,1,nodea,0)
978
979 t0 = dnekclock()
980 do i=1,nloop
981 call crecv(i,y,len)
982 call csend(i,x,len,nodea,0)
983 enddo
984 t1 = dnekclock()
985 tmsg = (t1-t0)/nloop
986 tmsg = 0.
987
988 endif
989
990 call gop(tmsg,t1,'+ ',1)
991 msg_vol = nwds*(dt/tmsg)
992c if (nid.eq.nodea) write(6,*) nid,msg_vol,nwds,dt,tmsg,' msgvol'
993
994 return
995 end
996c-----------------------------------------------------------------------
997 subroutine gop_test(ivb)
998 include 'SIZE'
999 common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
1000 include 'mpif.h'
1001 integer status(mpi_status_size)
1002
1003 parameter (lt=lx1*ly1*lz1*lelt)
1004 parameter (mwd = 3*lt)
1005 common /scrns/ x(mwd),y(mwd)
1006 common /scruz/ times(2,500)
1007 common /scrcg/ nwd(500)
1008
1009 nwds = 1
1010 mtest = 0
1011 do itest = 1,500
1012 nwds = (nwds+1)*1.016
1013 if (nwds.gt.mwd) goto 100
1014 mtest = mtest+1
1015 nwd(mtest) = nwds
1016 enddo
1017 100 continue
1018
1019 nwds = 1
1020 do itest = mtest,1,-1
1021
1022 tiny = 1.e-27
1023 call cfill(x,tiny,mwd)
1024 nwds = nwd(itest)
1025 call nekgsync
1026
1027 t0 = mpi_wtime ()
1028 call gop(x,y,'+ ',nwds)
1029 call gop(x,y,'+ ',nwds)
1030 call gop(x,y,'+ ',nwds)
1031 call gop(x,y,'+ ',nwds)
1032 call gop(x,y,'+ ',nwds)
1033 call gop(x,y,'+ ',nwds)
1034 t1 = mpi_wtime ()
1035
1036 tmsg = (t1-t0)/6 ! six calls
1037 tpwd = tmsg
1038 if (nwds.gt.0) tpwd = tmsg/nwds
1039 times(1,itest) = tmsg
1040 times(2,itest) = tpwd
1041
1042 enddo
1043 101 continue
1044
1045
1046 if (nid.eq.0) then
1047 nwds = 1
1048 do itest=1,500
1049 if (ivb.gt.0.or.itest.eq.1)
1050 $ write(6,1) np,nwds,(times(k,itest),k=1,2)
1051 1 format(i12,i12,1p2e16.8,' gop')
1052 nwds = (nwds+1)*1.016
1053 if (nwds.gt.mwd) goto 102
1054 enddo
1055 102 continue
1056 endif
1057
1058 return
1059 end
1060c-----------------------------------------------------------------------
1061 subroutine gp2_test(ivb)
1062
1063 include 'SIZE'
1064 include 'mpif.h'
1065
1066 common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
1067 integer status(mpi_status_size)
1068
1069 parameter (lt=lx1*ly1*lz1*lelt)
1070 parameter (mwd = 3*lt)
1071 common /scrns/ x(mwd),y(mwd)
1072 common /scruz/ times(2,500)
1073
1074 call rzero(x,mwd)
1075
1076 nwds = 1
1077 do itest = 1,500
1078 call gp2(x,y,'+ ',1,nid,np)
1079
1080 t0 = mpi_wtime ()
1081 call gp2(x,y,'+ ',nwds,nid,np)
1082 call gp2(x,y,'+ ',nwds,nid,np)
1083 call gp2(x,y,'+ ',nwds,nid,np)
1084 call gp2(x,y,'+ ',nwds,nid,np)
1085 t1 = mpi_wtime ()
1086
1087 tmsg = (t1-t0)/4 ! four calls
1088 tpwd = tmsg
1089 if (nwds.gt.0) tpwd = tmsg/nwds
1090 times(1,itest) = tmsg
1091 times(2,itest) = tpwd
1092
1093 nwds = (nwds+1)*1.016
1094 if (nwds.gt.mwd) goto 101
1095 enddo
1096 101 continue
1097
1098
1099 if (nid.eq.0) then
1100 nwds = 1
1101 do itest=1,500
1102 if (ivb.gt.0.or.itest.eq.1)
1103 $ write(6,1) np,nwds,(times(k,itest),k=1,2)
1104 1 format(i12,i12,1p2e16.8,' gp2')
1105 nwds = (nwds+1)*1.016
1106 if (nwds.gt.mwd) goto 102
1107 enddo
1108 102 continue
1109 endif
1110
1111 return
1112 end
1113c-----------------------------------------------------------------------
1114 integer function xor(m,n)
1115c
1116c If NOT running on a parallel processor, it is sufficient to
1117c have this routine return a value of XOR=1.
1118c
1119c Pick one of the following:
1120c
1121c UNIX 4.2, f77:
1122 XOR = OR(M,N)-AND(M,N)
1123c
1124c Intel FTN286:
1125c XOR = M.NEQV.N
1126c
1127c Ryan-McFarland Fortran
1128C XOR = IEOR(M,N)
1129c
1130c XOR = 0
1131c IF(M.EQ.1 .OR. N.EQ.1) XOR=1
1132c IF(M.EQ.0 .AND. N.EQ.0) XOR=0
1133c IF(M.EQ.1 .AND. N.EQ.1) XOR=0
1134c IF(M.GT.1 .OR.N.GT.1 .OR.M.LT.0.OR.N.LT.0) THEN
1135c PRINT*,'ERROR IN XOR'
1136c STOP
1137c ENDIF
1138C
1139 return
1140 end
1141c-----------------------------------------------------------------------
1142 subroutine gp2( x, w, op, n, nid, np)
1143c
1144c Global vector commutative operation using spanning tree.
1145c
1146c Std. fan-in/fan-out
1147
1148 real x(n), w(n)
1149 character*3 op
1150
1151 integer bit, bytes, cnt, diff, spsize, i,
1152 * parent, troot, xor, root, lnp, log2
1153 logical ifgot
1154
1155 integer type
1156 save type
1157 data type /998/
1158
1159 type = type+100
1160 if (type.gt.9992) type=type-998
1161 typer = type-1
1162 bytes = 8*n
1163
1164 root = 0
1165 troot = max0((nid/np)*np, root)
1166 diff = xor(nid,troot)
1167 nullpid = 0
1168
1169c Accumulate contributions from children, if any
1170 level2=1
1171 5 continue
1172 level=level2
1173 level2=level+level
1174 if (mod(nid,level2).ne.0) goto 20
1175 call crecv(type,w,bytes)
1176 if (op.eq.'+ ') then
1177 do i=1,n
1178 x(i) = x(i) + w(i)
1179 enddo
1180 elseif (op.eq.'* ') then
1181 do i=1,n
1182 x(i) = x(i) * w(i)
1183 enddo
1184 elseif (op.eq.'M ') then
1185 do i=1,n
1186 x(i) = max(x(i),w(i))
1187 enddo
1188 elseif (op.eq.'m ') then
1189 do i=1,n
1190 x(i) = min(x(i),w(i))
1191 enddo
1192 endif
1193 if (level2.lt.np) goto 5
1194
1195c Pass result back to parent
1196 20 parent = nid-level
1197 if (nid .ne. 0) call csend(type,x,bytes,parent,nullpid)
1198
1199c Await final answer from node 0 via log_2 fan out
1200 level=np/2
1201 ifgot=.false.
1202 if (nid.eq.root) ifgot=.true.
1203
1204 lnp = log2(np)
1205 do i=1,lnp
1206 if (ifgot) then
1207 jnid=nid+level
1208 call csend(typer,x,bytes,jnid,nullpid)
1209 elseif (mod(nid,level).eq.0) then
1210 call crecv(typer,x,bytes)
1211 ifgot=.true.
1212 endif
1213 level=level/2
1214 enddo
1215
1216 return
1217 end
1218c-----------------------------------------------------------------------
1219 subroutine ping_loop1(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
1220
1221 common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
1222
1223 real x(1),y(1)
1224
1225 include 'mpif.h'
1226 integer status(mpi_status_size)
1227
1228 i=0
1229 if (nid.eq.nodea) then
1230 call nekgsync
1231 call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 1b
1232 call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a
1233c call mpi_rsend(x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a
1234 call msgwait(msg) ! 1b
1235
1236 t0 = mpi_wtime ()
1237 do i=1,nloop
1238 call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 2b
1239 call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a
1240c call mpi_rsend(x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a
1241 call mpi_wait (msg,status,ierr) ! 2b
1242 enddo
1243 t1 = mpi_wtime ()
1244
1245 elseif (nid.eq.nodeb) then
1246
1247 call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 1a
1248 call nekgsync
1249 call mpi_wait (msg,status,ierr) ! 1a
1250
1251 j=i
1252 do i=1,nloop
1253 call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 2a
1254c call mpi_rsend(x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b
1255 call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b
1256 call mpi_wait (msg,status,ierr) ! 2a
1257 j=i
1258 enddo
1259c call mpi_rsend(x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb
1260 call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb
1261
1262 else
1263 call nekgsync
1264 endif
1265
1266 return
1267 end
1268c-----------------------------------------------------------------------
1269 subroutine ping_loop2(t1,t0,len,nloop,nodea,nodeb,nid,x,y)
1270
1271 common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
1272
1273 real x(1),y(1)
1274
1275 include 'mpif.h'
1276 integer status(mpi_status_size)
1277
1278 i=0
1279 if (nid.eq.nodea) then
1280 call nekgsync
1281 call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 1b
1282 call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 1a
1283 call msgwait(msg) ! 1b
1284
1285 t0 = mpi_wtime ()
1286 do i=1,nloop
1287 call mpi_send (x,len,mpi_byte,nodeb,i,nekcomm,ierr) ! 2a
1288 call mpi_irecv(y,len,mpi_byte,nodeb,i,nekcomm,msg,ierr) ! 2b
1289 call mpi_wait (msg,status,ierr) ! 2b
1290 enddo
1291 t1 = mpi_wtime ()
1292
1293 elseif (nid.eq.nodeb) then
1294
1295 call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 1a
1296 call nekgsync
1297 call mpi_wait (msg,status,ierr) ! 1a
1298
1299 j=i
1300 do i=1,nloop
1301 call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! 1b
1302 call mpi_irecv(y,len,mpi_byte,nodea,i,nekcomm,msg,ierr) ! 2a
1303 call mpi_wait (msg,status,ierr) ! 2a
1304 j=i
1305 enddo
1306 call mpi_send (x,len,mpi_byte,nodea,j,nekcomm,ierr) ! nb
1307
1308 else
1309 call nekgsync
1310 endif
1311
1312 return
1313 end
1314c-----------------------------------------------------------------------
1315 subroutine ping_loop(t1,t0,len,nloop,nodea,nodeb,nid,x1,y1,x2,y2)
1316c Double Buffer : does 2*nloop timings
1317
1318 common /nekmpi/ mid,np,nekcomm,nekgroup,nekreal
1319
1320 real x1(1),y1(1),x2(1),y2(1)
1321
1322 include 'mpif.h'
1323 integer status(mpi_status_size)
1324
1325 itag=1
1326 if (nid.eq.nodea) then
1327 call mpi_irecv(y1,len,mpi_byte,nodeb,itag,nekcomm,msg1,ierr) ! 1b
1328 call nekgsync
1329
1330
1331 t0 = mpi_wtime ()
1332 do i=1,nloop
1333 call mpi_send (x1,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! 1a
1334 call mpi_irecv(y2,len,mpi_byte,nodeb,itag,nekcomm,msg2,ierr)! 2b
1335 call mpi_wait (msg1,status,ierr) ! 1b
1336 call mpi_send (x2,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! 2a
1337 call mpi_irecv(y1,len,mpi_byte,nodeb,itag,nekcomm,msg1,ierr)! 3b
1338 call mpi_wait (msg2,status,ierr) ! 2b
1339 enddo
1340 t1 = mpi_wtime ()
1341 call mpi_send (x1,len,mpi_byte,nodeb,itag,nekcomm,ierr) ! nb
1342 call mpi_wait (msg1,status,ierr) ! nb
1343
1344 elseif (nid.eq.nodeb) then
1345
1346 call mpi_irecv(y1,len,mpi_byte,nodea,itag,nekcomm,msg1,ierr) ! nb
1347 call nekgsync
1348
1349
1350 do i=1,nloop
1351 call mpi_wait (msg1,status,ierr) ! 1a
1352 call mpi_send (x1,len,mpi_byte,nodea,itag,nekcomm,ierr) ! 1b
1353 call mpi_irecv(y2,len,mpi_byte,nodea,itag,nekcomm,msg2,ierr)! 2a
1354 call mpi_wait (msg2,status,ierr) ! 2a
1355 call mpi_send (x2,len,mpi_byte,nodea,itag,nekcomm,ierr) ! 2b
1356 call mpi_irecv(y1,len,mpi_byte,nodea,itag,nekcomm,msg1,ierr)! 3a
1357 enddo
1358 call mpi_wait (msg1,status,ierr) ! 2a
1359 call mpi_send (x1,len,mpi_byte,nodea,itag,nekcomm,ierr) ! nb
1360
1361 else
1362 call nekgsync
1363 endif
1364
1365 return
1366 end
1367c-----------------------------------------------------------------------
1368 integer*8 function i8gl_running_sum(in)
1369c
1370 include 'mpif.h'
1371
1372 integer*8 in
1373
1374 common /nekmpi/ nid,np,nekcomm,nekgroup,nekreal
1375 integer status(mpi_status_size)
1376 integer*8 x,r
1377
1378 x = in ! running sum
1379 r = 0 ! recv buff
1380
1381 call mpi_scan(x,r,1,mpi_integer8,mpi_sum,nekcomm,ierr)
1382 i8gl_running_sum = r
1383
1384 return
1385 end
1386c-----------------------------------------------------------------------
1387 subroutine close_files
1388 logical ifopen
1389
1390 do ii=1,99
1391 if(ii.ne.5.and.ii.ne.6) then
1392 inquire(unit=ii,opened=ifopen)
1393 if(ifopen) close(ii)
1394 endif
1395 enddo
1396
1397 return
1398 end
1399c----------------------------------------------------------------------
1400 subroutine neknekgsync()
1401
1402 include 'SIZE'
1403 include 'PARALLEL'
1404
1405 call mpi_barrier(iglobalcomm,ierr)
1406
1407 return
1408 end
1409c------------------------------------------------------------------------
1410 subroutine setnekcomm(comm_in)
1411
1412 include 'SIZE'
1413
1414 integer comm_in
1415 common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal
1416
1417 nekcomm = comm_in
1418 call mpi_comm_size(nekcomm,mp,ierr)
1419 np = mp
1420
1421 return
1422 end
Note: See TracBrowser for help on using the repository browser.