Skip to content

Commit

Permalink
feat(hpc): add option to print partition table to file (#2098)
Browse files Browse the repository at this point in the history
* - option to print partition table to file

* - print load balance upon request, also when default
- add test for default partitioning and printing

* - add to release notes
  • Loading branch information
mjr-deltares authored Dec 10, 2024
1 parent 2d8c5b6 commit 4acac0d
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 63 deletions.
56 changes: 31 additions & 25 deletions autotest/test_par_hpc01.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
cases = [
"par_hpc01-9cpu",
"par_hpc01-3cpu",
"par_hpc01-3cpu-default",
"par_hpc01-2cpu",
"par_hpc01-1cpu",
]
Expand All @@ -28,8 +29,8 @@
domain_grid = (3, 3)

# specified per case
models_per_cpu = [1, 3, 5, 9]
ncpus_cases = [9, 3, 2, 1]
models_per_cpu = [1, 3, 3, 5, 9]
ncpus_cases = [9, 3, 3, 2, 1]

nlay = 1
nrow = 3
Expand Down Expand Up @@ -83,7 +84,11 @@ def get_simulation(idx, ws):
highest_rank = rnk
partitions = [(n, highest_rank - r) for n, r in partitions]

hpc = flopy.mf6.ModflowUtlhpc(sim, partitions=partitions)
if cases[idx] == "par_hpc01-3cpu-default":
# apply default partitioning in this case
hpc = flopy.mf6.ModflowUtlhpc(sim, print_table=True)
else:
hpc = flopy.mf6.ModflowUtlhpc(sim, print_table=True, partitions=partitions)

tdis = flopy.mf6.ModflowTdis(sim, time_units="DAYS", nper=nper, perioddata=tdis_rc)

Expand Down Expand Up @@ -253,28 +258,29 @@ def check_output(idx, test):

ncpus = ncpus_cases[idx]
partitions = mf6_sim.name_file.package_dict["hpc"].partitions.array
if ncpus > 1:
for name, rank in partitions:
model_id = mf6_sim.model_names.index(name) + 1
list_file = pl.Path(test.workspace, f"mfsim.p{rank}.lst")
success_msg = f"GWF6 model {model_id} will be created"
success = False
for line in open(list_file).readlines():
if success_msg in line:
success = True
break
assert success, f"Model {model_id} not created on target process {rank}"
elif ncpus == 1:
list_file = pl.Path(test.workspace, "mfsim.lst")
for name, rank in partitions:
model_id = mf6_sim.model_names.index(name) + 1
success_msg = f"GWF6 model {model_id} will be created"
success = False
for line in open(list_file).readlines():
if success_msg in line:
success = True
break
assert success, f"Model {model_id} not created on target process {rank}"
if cases[idx] != "par_hpc01-3cpu-default":
if ncpus > 1:
for name, rank in partitions:
model_id = mf6_sim.model_names.index(name) + 1
list_file = pl.Path(test.workspace, f"mfsim.p{rank}.lst")
success_msg = f"GWF6 model {model_id} will be created"
success = False
for line in open(list_file).readlines():
if success_msg in line:
success = True
break
assert success, f"Model {model_id} not created on target process {rank}"
elif ncpus == 1:
list_file = pl.Path(test.workspace, "mfsim.lst")
for name, rank in partitions:
model_id = mf6_sim.model_names.index(name) + 1
success_msg = f"GWF6 model {model_id} will be created"
success = False
for line in open(list_file).readlines():
if success_msg in line:
success = True
break
assert success, f"Model {model_id} not created on target process {rank}"


@pytest.mark.parallel
Expand Down
1 change: 1 addition & 0 deletions doc/ReleaseNotes/develop.tex
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,6 @@
\underline{PARALLEL}
\begin{itemize}
\item Writing budget files for a MVR package in the GWF-GWF Exchange gave an error when running in parallel but this is fixed with this change.
\item An option has been added to the HPC package that prints the partition table to the simulation listing file.
\end{itemize}

8 changes: 4 additions & 4 deletions doc/mf6io/mf6ivar/dfn/utl-hpc.dfn
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
# flopy parent_name_type parent_package MFSimulation

block options
name print_input
name print_table
type keyword
reader urword
optional true
longname model print input to listing file
description keyword to indicate that the input will be printed.
longname model print table to listing file
description keyword to indicate that the partition table will be printed to the listing file.

block options
name dev_log_mpi
Expand All @@ -25,7 +25,7 @@ block partitions
name partitions
type recarray mname mrank
reader urword
optional
optional true
longname list of partition numbers
description is the list of zero-based partition numbers.

Expand Down
143 changes: 117 additions & 26 deletions src/Distributed/DistributedSim.f90
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
module DistributedSimModule
use KindModule, only: I4B, LGP
use SimVariablesModule, only: idm_context, nr_procs, proc_id, errmsg, warnmsg
use SimVariablesModule, only: idm_context, simulation_mode, nr_procs, proc_id, &
errmsg, warnmsg
use ConstantsModule, only: LENMEMPATH, LENMODELNAME, LINELENGTH, LENPACKAGETYPE
use ArrayHandlersModule, only: ifind
use CharacterStringModule, only: CharacterStringType
use SimModule, only: store_error, store_error_filename, count_errors, &
store_warning
use MemoryManagerModule, only: mem_allocate, mem_deallocate, mem_setptr, &
mem_print_detailed, get_isize
use MemoryManagerExtModule, only: mem_set_value
use MemoryHelperModule, only: create_mem_path

implicit none
Expand All @@ -21,6 +23,7 @@ module DistributedSimModule
integer(I4B), pointer :: nr_models !< the total (global) number of models, equals the length of the model block in mfsim.nam
integer(I4B), dimension(:), pointer, contiguous :: load_mask => null() !< mask for loading models and exchanges, 1 when active on this processor, else 0
integer(I4B), dimension(:), pointer, contiguous :: model_ranks => null() !< load balance: model rank (0,...,nr_procs-1) per global model id (array index)
logical(LGP), pointer :: print_ptable !< when true, the partition table is printed to file
contains
procedure :: create
procedure :: get_load_mask
Expand All @@ -31,6 +34,7 @@ module DistributedSimModule
procedure, private :: set_load_balance_from_input
procedure, private :: set_load_balance_default
procedure, private :: validate_load_balance
procedure, private :: print_load_balance
end type

! singleton, private member
Expand Down Expand Up @@ -67,6 +71,9 @@ subroutine create(this)
call mem_allocate(this%nr_models, 'NUMMODELS', this%memory_path)
this%nr_models = nmod

call mem_allocate(this%print_ptable, 'PRINT_PTABLE', this%memory_path)
this%print_ptable = .false.

end subroutine create

!> @brief Return pointer to the load mask for models
Expand Down Expand Up @@ -118,11 +125,15 @@ end subroutine create_load_mask
!> @brief Get the model load balance for the simulation
!<
function get_load_balance(this) result(mranks)
use SimVariablesModule, only: iout
use UtlHpcInputModule, only: UtlHpcParamFoundType
class(DistributedSimType) :: this !< this distributed sim instance
integer(I4B), dimension(:), pointer :: mranks !< the load balance: array of ranks per model id
! local
integer(I4B) :: isize
character(len=LENMEMPATH) :: hpc_mempath
logical(LGP) :: hpc6_present, partitions_present
character(len=LENMEMPATH) :: simnam_mempath, hpc_mempath
type(UtlHpcParamFoundType) :: found

! if load balance available, return here:
if (associated(this%model_ranks)) then
Expand All @@ -133,38 +144,57 @@ function get_load_balance(this) result(mranks)
call mem_allocate(this%model_ranks, this%nr_models, 'MODELRANKS', &
this%memory_path)

! check if exists (partitions block is optional in HPC file)
hpc_mempath = create_mem_path('UTL', 'HPC', idm_context)
call get_isize('MNAME', hpc_mempath, isize)
! check for optional HPC file
simnam_mempath = create_mem_path('SIM', 'NAM', idm_context)
call get_isize('HPC6_FILENAME', simnam_mempath, isize)
hpc6_present = isize > 0

if (isize > 0) then
! HPC file present
if (nr_procs == 1) then
! handle serial case
if (simulation_mode == 'SEQUENTIAL') then
if (hpc6_present) then
write (warnmsg, *) "Ignoring PARTITIONS block in HPC file when "// &
"running a serial process"
call store_warning(warnmsg)

! single process, everything on cpu 0:
this%model_ranks = 0
else
! set balance from HPC file
call this%set_load_balance_from_input()
! check if valid configuration
call this%validate_load_balance()
end if

! single process, everything on cpu 0:
this%model_ranks = 0
mranks => this%model_ranks
return
end if

! continue for PARALLEL mode only:
write (iout, '(/1x,a)') 'PROCESSING HPC DATA'

hpc_mempath = create_mem_path('UTL', 'HPC', idm_context)
! source optional print input flag
call mem_set_value(this%print_ptable, 'PRINT_TABLE', hpc_mempath, &
found%print_table)
! check if optional partition block exists
call get_isize('MNAME', hpc_mempath, isize)
partitions_present = isize > 0

! fill model ranks (i.e. the load balance)
if (partitions_present) then
! set balance from HPC file
call this%set_load_balance_from_input()
call this%validate_load_balance()
write (iout, '(1x,a)') 'Read partition data from HPC file'
else
! no HPC file present
if (nr_procs == 1) then
! single process, everything on cpu 0:
this%model_ranks = 0
else
! set balance from default algorithm
call this%set_load_balance_default()
end if
! no HPC file present, set balance with default algorithm
call this%set_load_balance_default()
write (iout, '(1x,a)') 'Generate default partition data'
end if

mranks => this%model_ranks

! print to listing file
if (this%print_ptable) then
call this%print_load_balance()
end if

write (iout, '(1x,a)') 'END OF HPC DATA'

end function get_load_balance

!> @brief Load load balance from the input configuration
Expand Down Expand Up @@ -390,6 +420,7 @@ end subroutine set_load_balance_default
!<
subroutine validate_load_balance(this)
class(DistributedSimType) :: this
! local
character(len=LENMEMPATH) :: input_mempath
type(CharacterStringType), dimension(:), contiguous, &
pointer :: mtypes !< model types
Expand Down Expand Up @@ -450,15 +481,75 @@ subroutine validate_load_balance(this)

end subroutine validate_load_balance

!> @brief Print the load balance table to the listing file
!<
subroutine print_load_balance(this)
use TableModule, only: TableType, table_cr
use ConstantsModule, only: TABLEFT, TABCENTER
use SimVariablesModule, only: iout, proc_id
class(DistributedSimType) :: this
! local
type(TableType), pointer :: inputtab => null()
character(len=LINELENGTH) :: tag, term
character(len=LENMEMPATH) :: input_mempath
type(CharacterStringType), dimension(:), contiguous, &
pointer :: mtypes !< model types
type(CharacterStringType), dimension(:), contiguous, &
pointer :: mnames !< model names
integer(I4B) :: im, nr_models

input_mempath = create_mem_path('SIM', 'NAM', idm_context)

call mem_setptr(mtypes, 'MTYPE', input_mempath)
call mem_setptr(mnames, 'MNAME', input_mempath)

! setup table
nr_models = size(mnames)
call table_cr(inputtab, 'HPC', 'HPC PARTITION DATA')
call inputtab%table_df(nr_models, 5, iout)

! add columns
tag = 'ID'
call inputtab%initialize_column(tag, 8, alignment=TABLEFT)
tag = 'NAME'
call inputtab%initialize_column(tag, LENMODELNAME + 4, alignment=TABLEFT)
tag = 'TYPE'
call inputtab%initialize_column(tag, 8, alignment=TABLEFT)
tag = 'RANK'
call inputtab%initialize_column(tag, 8, alignment=TABLEFT)
tag = 'LOCAL'
call inputtab%initialize_column(tag, 8, alignment=TABLEFT)

do im = 1, nr_models
call inputtab%add_term(im)
term = mnames(im)
call inputtab%add_term(term)
term = mtypes(im)
call inputtab%add_term(term)
call inputtab%add_term(this%model_ranks(im))
term = ''
if (this%model_ranks(im) == proc_id) term = 'X'
call inputtab%add_term(term)
end do

! deallocate
call inputtab%table_da()
deallocate (inputtab)

end subroutine print_load_balance

!> @brief clean up
!<
subroutine destroy(this)
class(DistributedSimType) :: this

call mem_deallocate(this%load_mask)
call mem_deallocate(this%model_ranks)
if (associated(this%load_mask)) then
call mem_deallocate(this%load_mask)
call mem_deallocate(this%model_ranks)
end if

call mem_deallocate(this%nr_models)
call mem_deallocate(this%print_ptable)

! delete singleton instance
if (associated(dist_sim)) deallocate (dist_sim)
Expand Down
16 changes: 8 additions & 8 deletions src/Idm/utl-hpcidm.f90
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module UtlHpcInputModule
public utl_hpc_subpackages

type UtlHpcParamFoundType
logical :: print_input = .false.
logical :: print_table = .false.
logical :: dev_log_mpi = .false.
logical :: mname = .false.
logical :: mrank = .false.
Expand All @@ -27,16 +27,16 @@ module UtlHpcInputModule
]

type(InputParamDefinitionType), parameter :: &
utlhpc_print_input = InputParamDefinitionType &
utlhpc_print_table = InputParamDefinitionType &
( &
'UTL', & ! component
'HPC', & ! subcomponent
'OPTIONS', & ! block
'PRINT_INPUT', & ! tag name
'PRINT_INPUT', & ! fortran variable
'PRINT_TABLE', & ! tag name
'PRINT_TABLE', & ! fortran variable
'KEYWORD', & ! type
'', & ! shape
'model print input to listing file', & ! longname
'model print table to listing file', & ! longname
.false., & ! required
.false., & ! multi-record
.false., & ! preserve case
Expand Down Expand Up @@ -101,7 +101,7 @@ module UtlHpcInputModule
type(InputParamDefinitionType), parameter :: &
utl_hpc_param_definitions(*) = &
[ &
utlhpc_print_input, &
utlhpc_print_table, &
utlhpc_dev_log_mpi, &
utlhpc_mname, &
utlhpc_mrank &
Expand All @@ -118,7 +118,7 @@ module UtlHpcInputModule
'RECARRAY MNAME MRANK', & ! type
'', & ! shape
'list of partition numbers', & ! longname
.true., & ! required
.false., & ! required
.false., & ! multi-record
.false., & ! preserve case
.false., & ! layered
Expand All @@ -142,7 +142,7 @@ module UtlHpcInputModule
), &
InputBlockDefinitionType( &
'PARTITIONS', & ! blockname
.true., & ! required
.false., & ! required
.true., & ! aggregate
.false. & ! block_variable
) &
Expand Down

0 comments on commit 4acac0d

Please sign in to comment.