Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

code to use HPE SMARTREDIS #213

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cime_config/buildexe
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def _main_func():
atm_model = case.get_value("COMP_ATM")
gmake_args = get_standard_makefile_args(case)
esmf_aware_threading = case.get_value("ESMF_AWARE_THREADING")
use_smartsim = case.get_value("USE_SMARTSIM")

# Determine valid components
valid_comps = []
Expand Down Expand Up @@ -77,6 +78,10 @@ def _main_func():
if esmf_aware_threading:
gmake_args += " USER_CPPDEFS=-DESMF_AWARE_THREADING"

if use_smartsim:
gmake_args += " USE_SMARTSIM=TRUE"


gmake_args += " IAC_PRESENT=FALSE"
expect((num_esp is None) or (int(num_esp) == 1), "ESP component restricted to one instance")

Expand Down Expand Up @@ -109,7 +114,7 @@ def _main_func():
.format(gmake, gmake_j, exename, gmake_args, makefile)


rc, out, err = run_cmd(cmd,from_dir=bld_root)
rc, out, err = run_cmd(cmd,from_dir=bld_root, verbose=True)
expect(rc==0,"Command {} failed rc={}\nout={}\nerr={}".format(cmd,rc,out,err))
logger.info(out)

Expand Down
2 changes: 1 addition & 1 deletion cime_config/buildnml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def _create_drv_namelists(case, infile, confdir, nmlgen, files):
# (2) Write namelist file drv_in and initial input dataset list.
#--------------------------------
namelist_file = os.path.join(confdir, "drv_in")
drv_namelist_groups = ["papi_inparm", "pio_default_inparm", "prof_inparm", "debug_inparm"]
drv_namelist_groups = ["smartsim_inparm", "papi_inparm", "pio_default_inparm", "prof_inparm", "debug_inparm"]
nmlgen.write_output_file(namelist_file, data_list_path=data_list_path, groups=drv_namelist_groups)

#--------------------------------
Expand Down
42 changes: 30 additions & 12 deletions cime_config/config_component.xml
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@
<group>build_def</group>
<file>env_build.xml</file>
<desc>Output root directory for each machine.
Base directory for build and run directories.
Base directory for build and run directories.
</desc>
</entry>

Expand Down Expand Up @@ -1003,9 +1003,9 @@
<file>env_run.xml</file>
<desc>
Determines what ESMF log files (if any) are generated when
USE_ESMF_LIB is TRUE.
USE_ESMF_LIB is TRUE.
ESMF_LOGKIND_SINGLE: Use a single log file, combining messages from
all of the PETs. Not supported on some platforms.
all of the PETs. Not supported on some platforms.
ESMF_LOGKIND_MULTI: Use multiple log files -- one per PET.
ESMF_LOGKIND_NONE: Do not issue messages to a log file.
By default, no ESMF log files are generated.
Expand Down Expand Up @@ -1046,8 +1046,8 @@
<group>run_flags</group>
<file>env_run.xml</file>
<desc>Turns on component barriers for component timing.
This variable is for testing and debugging only and should never
be set for a production run.
This variable is for testing and debugging only and should never
be set for a production run.
</desc>
</entry>

Expand Down Expand Up @@ -1255,7 +1255,7 @@
<group>run_domain</group>
<file>env_run.xml</file>
<desc>Latitude of grid location, in single column mode interpolate datasets to this location
in single point mode assume all datasets are at this location</desc>
in single point mode assume all datasets are at this location</desc>
</entry>

<entry id="PTS_LON">
Expand All @@ -1264,7 +1264,7 @@
<group>run_domain</group>
<file>env_run.xml</file>
<desc>Longitude of grid location, in single column mode interpolate datasets to this location
in single point mode assume all datasets are at this location</desc>
in single point mode assume all datasets are at this location</desc>
</entry>

<entry id="PTS_DOMAINFILE">
Expand All @@ -1273,10 +1273,10 @@
<group>run_domain</group>
<file>env_run.xml</file>
<desc>used only if if PTS_LAT and PTS_LON are greater than or
equal to 0. If this is the case then if PTS_DOMAINFILE is not
equal to UNSET a nearest neighbor search of PTS_DOMAINFILE using
PTS_LAT and PTS_LON will be done and the component mesh will have
this nearest neighbor value. </desc>
equal to 0. If this is the case then if PTS_DOMAINFILE is not
equal to UNSET a nearest neighbor search of PTS_DOMAINFILE using
PTS_LAT and PTS_LON will be done and the component mesh will have
this nearest neighbor value. </desc>
</entry>

<!-- ======================================================================= -->
Expand Down Expand Up @@ -1947,7 +1947,7 @@
<desc>pio rearranger communication max pending requests (io2comp) :
-2 implies that CIME internally calculates the value ( = 64),
-1 implies no bound on max pending requests
0 implies that MPI_ALLTOALL will be used
0 implies that MPI_ALLTOALL will be used
</desc>
</entry>

Expand Down Expand Up @@ -2528,6 +2528,24 @@
<desc>if true, create ESMF PET log files even if there is no error encountered </desc>
</entry>

<entry id="USE_SMARTSIM">
<type>logical</type>
<valid_values>TRUE,FALSE</valid_values>
<default_value>FALSE</default_value>
<group>smartsim</group>
<file>env_build.xml</file>
<desc>if true, link with the HPE SmartRedis library </desc>
</entry>

<entry id="CREATE_SMARTSIM_CLUSTER">
<type>logical</type>
<valid_values>TRUE,FALSE</valid_values>
<default_value>FALSE</default_value>
<group>smartsim</group>
<file>env_run.xml</file>
<desc>Set to true if SmartSim database spans multiple nodes </desc>
</entry>

<!-- ===================================================================== -->
<!-- Include the AOFLUX calculation for this compset -->
<!-- ===================================================================== -->
Expand Down
38 changes: 31 additions & 7 deletions cime_config/namelist_definition_drv.xml
Original file line number Diff line number Diff line change
Expand Up @@ -987,11 +987,11 @@
'on': always do this renormalization
'off': never do this renormalization (see WARNING below)
'on_if_glc_coupled_fluxes': Determine at runtime whether to do this renormalization.
Does the renormalization if we're running a two-way-coupled glc that sends fluxes
to other components (which is the case where we need conservation).
Does NOT do the renormalization if we're running a one-way-coupled glc, or if
we're running a glc-only compset (T compsets).
(In these cases, conservation is not important.)
Does the renormalization if we're running a two-way-coupled glc that sends fluxes
to other components (which is the case where we need conservation).
Does NOT do the renormalization if we're running a one-way-coupled glc, or if
we're running a glc-only compset (T compsets).
(In these cases, conservation is not important.)

Only used if running with a prognostic GLC component.

Expand Down Expand Up @@ -3322,9 +3322,9 @@
<group>PELAYOUT_attributes</group>
<desc>
Determines what ESMF log files (if any) are generated when
USE_ESMF_LIB is TRUE.
USE_ESMF_LIB is TRUE.
ESMF_LOGKIND_SINGLE: Use a single log file, combining messages from
all of the PETs. Not supported on some platforms.
all of the PETs. Not supported on some platforms.
ESMF_LOGKIND_MULTI: Use multiple log files — one per PET.
ESMF_LOGKIND_NONE: Do not issue messages to a log file.
By default, no ESMF log files are generated.
Expand All @@ -3350,6 +3350,30 @@
</values>
</entry>

<entry id="use_smartredis" modify_via_xml="USE_SMARTSIM">
<type>logical</type>
<category>smartsim</category>
<group>smartsim_inparm</group>
<desc>
Link in the CrayLabs SmartRedis code https://github.com/CrayLabs/SmartRedis for use with SmartSim https://github.com/CrayLabs/SmartSim
</desc>
<values>
<value>$USE_SMARTSIM</value>
</values>
</entry>

<entry id="create_smartsim_cluster" modify_via_xml="CREATE_SMARTSIM_CLUSTER">
<type>logical</type>
<category>smartsim</category>
<group>smartsim_inparm</group>
<desc>
If the SmartSim Database spans more than one node set this true.
</desc>
<values>
<value>$CREATE_SMARTSIM_CLUSTER</value>
</values>
</entry>

<!-- =========================== -->
<!-- group prof_inparm -->
<!-- in perf_mod.F90 -->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./xmlchange USE_SMARTSIM=TRUE
4 changes: 2 additions & 2 deletions drivers/cime/ensemble_driver.F90
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ subroutine SetModelServices(ensemble_driver, rc)
call ReadAttributes(ensemble_driver, config, "CLOCK_attributes::", rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

call NUOPC_CompAttributeGet(ensemble_driver, 'calendar', calendar, rc=rc)
call NUOPC_CompAttributeGet(ensemble_driver, 'calendar', calendar, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
if (calendar == 'NO_LEAP') then
call ESMF_CalendarSetDefault(ESMF_CALKIND_NOLEAP, rc=rc)
Expand Down Expand Up @@ -247,7 +247,7 @@ subroutine SetModelServices(ensemble_driver, rc)
call ReadAttributes(driver, config, "DRV_modelio"//trim(inst_suffix)//"::", rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

! Set the driver log to the driver task 0
! Set the driver log to the driver task 0
if (mod(localPet, ntasks_per_member) == 0) then
call NUOPC_CompAttributeGet(driver, name="diro", value=diro, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
Expand Down
37 changes: 35 additions & 2 deletions drivers/cime/esmApp.F90
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ program esmApp
use ensemble_driver, only : SetServices
use shr_pio_mod, only : shr_pio_init1
use shr_sys_mod, only : shr_sys_abort
!
! The CrayLabs SmartSim interface is provided in directory share https://github.com/ESCOMP/CESM_share
! Please see file cime/tools/smartsim/README.md for a complete explanation of the CESM interface to smartsim
! create_smartsim_cluster is set to true if the database is using 3 or more nodes, false if its using 1 and
! (in the pbs interface at least) 2 is not allowed.
!
use nuopc_shr_methods, only : sr_client, use_smartredis

implicit none

Expand All @@ -30,9 +37,15 @@ program esmApp
integer :: fileunit
integer :: provided
type(ESMF_VM) :: vm
logical :: create_smartsim_cluster = .false.

namelist /debug_inparm / create_esmf_pet_files

!
! The CrayLabs SmartSim interface is provided in directory share https://github.com/ESCOMP/CESM_share
! Please see file cime/tools/smartsim/README.md for a complete explanation of the CESM interface to smartsim
! The use_smartredis variable is set in file drv_in and if true the variable sr_client is initialized in esmApp.F90
!
namelist /smartsim_inparm/ use_smartredis, create_smartsim_cluster
!-----------------------------------------------------------------------------
! Initiallize MPI
!-----------------------------------------------------------------------------
Expand Down Expand Up @@ -66,7 +79,7 @@ program esmApp
open(newunit=fileunit, status="old", file="drv_in")
read(fileunit, debug_inparm, iostat=ier)
if (ier > 0) then
call shr_sys_abort('esmApp: error reading in debug_inparm namelist from drv_in')
call shr_sys_abort('esmApp: error reading in debug_inparm namelist from drv_in')
end if
close(fileunit)
end if
Expand Down Expand Up @@ -99,6 +112,26 @@ program esmApp
file=__FILE__)) &
call ESMF_Finalize(endflag=ESMF_END_ABORT)

!-----------------------------------------------------------------------------
! Initialize the CrayLabs SmartRedis client, a stub is provided in share if
! smartsim is not used. This client shall be used by all component models.
!-----------------------------------------------------------------------------

if (iam==0) then
open(newunit=fileunit, status="old", file="drv_in")
read(fileunit, smartsim_inparm, iostat=ier)
if (ier > 0) then
call shr_sys_abort('esmApp: error reading in smartsim_inparm namelist from drv_in')
end if
close(fileunit)
end if
call mpi_bcast (use_smartredis, 1, MPI_LOGICAL, 0, COMP_COMM, ier)
call mpi_bcast (create_smartsim_cluster, 1, MPI_LOGICAL, 0, COMP_COMM, ier)
if (use_smartredis) then
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please write to the cmeps logunit and not the PET. It would be good to have this in the mediator log file. Or does this belong in the driver write file?

call ESMF_Logwrite("Using SmartSim interface", ESMF_LOGMSG_INFO, rc=rc)
call sr_client%initialize(create_smartsim_cluster)
endif

!-----------------------------------------------------------------------------
! Operate on the NUOPC Field dictionary
!-----------------------------------------------------------------------------
Expand Down
2 changes: 0 additions & 2 deletions drivers/cime/esm_utils_mod.F90
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
module esm_utils_mod

implicit none
public

logical :: mastertask
integer :: logunit
integer :: dbug_flag = 0

character(*), parameter :: u_FILE_u = &
__FILE__

Expand Down
17 changes: 14 additions & 3 deletions nuopc_cap_share/nuopc_shr_methods.F90
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
module nuopc_shr_methods

use ESMF , only : operator(<), operator(/=), operator(+)
use ESMF , only : operator(-), operator(*) , operator(>=)
use ESMF , only : operator(<=), operator(>), operator(==)
Expand All @@ -23,6 +22,11 @@ module nuopc_shr_methods
use shr_kind_mod , only : r8 => shr_kind_r8, cl=>shr_kind_cl, cs=>shr_kind_cs
use shr_sys_mod , only : shr_sys_abort
use shr_file_mod , only : shr_file_setlogunit, shr_file_getLogUnit
!
! The CrayLabs SmartSim interface is provided in directory share https://github.com/ESCOMP/CESM_share
! Please see file cime/tools/smartsim/README.md for a complete explanation of the CESM interface to smartsim
!
use smartredis_client, only : client_type

implicit none
private
Expand All @@ -36,10 +40,10 @@ module nuopc_shr_methods
public :: state_diagnose
public :: alarmInit
public :: chkerr

private :: timeInit
private :: field_getfldptr


! Clock and alarm options
character(len=*), private, parameter :: &
optNONE = "none" , &
Expand Down Expand Up @@ -68,6 +72,14 @@ module nuopc_shr_methods
character(len=1024) :: msgString
character(len=*), parameter :: u_FILE_u = &
__FILE__
!
! The CrayLabs SmartSim interface is provided in directory share https://github.com/ESCOMP/CESM_share
! Please see file cime/tools/smartsim/README.md for a complete explanation of the CESM interface to smartsim
! The use_smartredis variable is set in file drv_in and if true the variable sr_client is initialized in esmApp.F90
!
logical, public :: use_smartredis = .false.
type(client_type), public :: sr_client


!===============================================================================
contains
Expand Down Expand Up @@ -222,7 +234,6 @@ subroutine state_getscalar(state, scalar_id, scalar_value, flds_scalar_name, fld

call ESMF_VMGet(vm, localPet=mytask, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

call ESMF_StateGet(State, itemName=trim(flds_scalar_name), field=field, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

Expand Down