Skip to content

Commit

Permalink
Added safety check to ensure bcast is done correctly under SH_MEM_COMM
Browse files Browse the repository at this point in the history
  • Loading branch information
josephzhang8 committed Dec 13, 2024
1 parent afc1638 commit 5539588
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 13 deletions.
2 changes: 1 addition & 1 deletion cmake/SCHISM.local.build
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#####################################################################
#Leave this on
set(BLD_STANDALONE ON CACHE BOOLEAN "SCHISM standalone")
set(SH_MEM_COMM OFF CACHE BOOLEAN "Use shared memory communicator")
set(SH_MEM_COMM ON CACHE BOOLEAN "Use shared memory communicator")

#Default is NO_PARMETIS=OFF, i.e. use ParMETIS
set(NO_PARMETIS OFF CACHE BOOLEAN "Turn off ParMETIS")
Expand Down
36 changes: 25 additions & 11 deletions src/Core/schism_msgp.F90
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ module schism_msgp
TYPE(C_PTR),public,save :: c_window_ptr
! real(4),public,save,pointer :: ath3(:,:,:,:)
integer,public,save :: disp_unit ! displacement stride in shared window
#endif ! SH_MEM_COMM
#endif /*SH_MEM_COMM*/

!-----------------------------------------------------------------------------
! Private data
Expand Down Expand Up @@ -394,7 +394,7 @@ subroutine parallel_init(communicator)
implicit none
integer, optional :: communicator

integer :: comm2,nproc3,myrank3,nproc_compute
integer :: comm2,nproc3,myrank3,nproc_compute,itmp,itmp2


if (present(communicator)) then
Expand Down Expand Up @@ -433,15 +433,6 @@ subroutine parallel_init(communicator)
CALL MPI_Comm_size(comm2, nproc3, ierr)
CALL MPI_Comm_rank(comm2, myrank3,ierr)

#ifdef SH_MEM_COMM
! Set up by-node shared-memory communicator for compute nodes
if(task_id==1) then
CALL MPI_Comm_split_type(comm2,MPI_COMM_TYPE_SHARED,0,MPI_INFO_NULL,comm_node,ierr)
CALL MPI_Comm_size(comm_node, nproc_node, ierr)
CALL MPI_Comm_rank(comm_node, myrank_node,ierr)
endif
#endif ! SH_MEM_COMM

if(task_id==1) then !compute
comm=comm2
nproc=nproc3
Expand All @@ -454,6 +445,29 @@ subroutine parallel_init(communicator)
! print*, 'Scribes:',myrank_schism,nproc_schism,myrank3,nproc3,nproc_compute,nscribes,task_id
endif

#ifdef SH_MEM_COMM
! Set up by-node shared-memory communicator for compute nodes
if(task_id==1) then
CALL MPI_Comm_split_type(comm,MPI_COMM_TYPE_SHARED,0,MPI_INFO_NULL,comm_node,ierr)
CALL MPI_Comm_size(comm_node, nproc_node, ierr)
CALL MPI_Comm_rank(comm_node, myrank_node,ierr)

!Ensure that myrank_node=0 includes myrank=0 for some bcast (read in by
!myrank_node=0 and then bcast from myrank=0 in comm)
if(myrank_node==0.and.myrank==0) then
itmp=1 !true
else
itmp=0
endif
!comm_node seems to cause error, maybe due to non-unique ranks?
call mpi_allreduce(itmp,itmp2,1,itype,MPI_SUM,comm,ierr)
if(itmp2==0) then
write(*,*)'Ranks:',myrank_node,myrank,itmp,itmp2
call parallel_abort('MSGP: myrank=0 is not the head process in share mem')
endif
endif !task_id
#endif /*SH_MEM_COMM*/

end subroutine parallel_init


Expand Down
2 changes: 1 addition & 1 deletion src/Readme.beta_notes
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ git versions:
161. `ca44ba0` (1 May 2024): fixed bugs in vegetation&marsh module (uninited sav_h0 etc)
162. `cfb58a5` [5 July 2024] replace fatal errors with warning in btrack (no intersecting edges). These are exaordinary exceptions that occur rarely.
163. `2f88fc5` (7 Aug 2024): changed horizontal diffusion method to filter;
164. `c441ace` (12 Dec 2024): added share memory communicator for efficient bcast;
164. `c441ace` (12 Dec 2024): added share memory communicator for efficient bcast for if_source/=0;

================================================================================================
(D) Auto-test history:
Expand Down

0 comments on commit 5539588

Please sign in to comment.