From d6628292da0d04974cb25a405b490f2a637f4ae8 Mon Sep 17 00:00:00 2001 From: Tom Clune Date: Wed, 8 May 2024 14:19:57 -0400 Subject: [PATCH 1/3] Appears to run. --- .../HistoryCollectionGridComp_private.F90 | 1 + gridcomps/cap3g/tests/basic_captest/cap.yaml | 27 ++ mapl3g/CMakeLists.txt | 2 +- mapl3g/GEOS.F90 | 9 +- mapl3g/MaplFramework.F90 | 444 ++++++++++++++---- mapl3g/ServerDriver.F90 | 59 +++ 6 files changed, 451 insertions(+), 91 deletions(-) create mode 100644 mapl3g/ServerDriver.F90 diff --git a/gridcomps/History3G/HistoryCollectionGridComp_private.F90 b/gridcomps/History3G/HistoryCollectionGridComp_private.F90 index c8a7e6af90e2..6e2bc792dfec 100644 --- a/gridcomps/History3G/HistoryCollectionGridComp_private.F90 +++ b/gridcomps/History3G/HistoryCollectionGridComp_private.F90 @@ -66,6 +66,7 @@ subroutine register_imports(gridcomp, hconfig, rc) iter_end = ESMF_HConfigIterEnd(var_list,_RC) iter = iter_begin do while (ESMF_HConfigIterLoop(iter,iter_begin,iter_end,rc=status)) + _VERIFY(status) call parse_item(iter, item_name, variable_names, _RC) call add_specs(gridcomp, variable_names, _RC) end do diff --git a/gridcomps/cap3g/tests/basic_captest/cap.yaml b/gridcomps/cap3g/tests/basic_captest/cap.yaml index 3306c41fb67e..f9e9b397a040 100644 --- a/gridcomps/cap3g/tests/basic_captest/cap.yaml +++ b/gridcomps/cap3g/tests/basic_captest/cap.yaml @@ -3,6 +3,14 @@ esmf: #mapl: # pflogger_cfg_file: pflogger.yaml +# +# petcount_model: 1 +# +# servers: +# pfio: +# nodes: 1 +# mit: +# nodes: 0 cap: name: cap @@ -13,6 +21,25 @@ cap: stop: 2999-03-02T21:00:00 segment_duration: PT10H + + + + + + + + + + + + + + + + + + + num_segments: 1 # segments per batch submission servers: diff --git a/mapl3g/CMakeLists.txt b/mapl3g/CMakeLists.txt index 7026154e9b5f..16c33a04e8c9 100644 --- a/mapl3g/CMakeLists.txt +++ b/mapl3g/CMakeLists.txt @@ -2,7 +2,7 @@ esma_set_this() esma_add_library (${this} - SRCS mapl3g.F90 MaplFramework.F90 + SRCS mapl3g.F90 MaplFramework.F90 ServerDriver.F90 DEPENDENCIES MAPL.generic3g MAPL.pfio MAPL.cap3g MAPL.gridcomps MAPL.griddedio MAPL.field_utils ${EXTDATA_TARGET} ESMF::ESMF NetCDF::NetCDF_Fortran MPI::MPI_Fortran PFLOGGER::pflogger TYPE ${MAPL_LIBRARY_TYPE} diff --git a/mapl3g/GEOS.F90 b/mapl3g/GEOS.F90 index 31e3765aaafe..16772acc13c9 100644 --- a/mapl3g/GEOS.F90 +++ b/mapl3g/GEOS.F90 @@ -8,9 +8,12 @@ program geos integer :: status type(ESMF_HConfig) :: hconfig + logical :: is_model_pet - call MAPL_Initialize(hconfig, _RC) - call run_geos(hconfig, _RC) + call MAPL_Initialize(hconfig, is_model_pet=is_model_pet, _RC) + if (is_model_pet) then + call run_geos(hconfig, _RC) + end if call MAPL_Finalize(_RC) contains @@ -29,7 +32,9 @@ subroutine run_geos(hconfig, rc) has_cap_hconfig = ESMF_HConfigIsDefined(hconfig, keystring='cap', _RC) _ASSERT(has_cap_hconfig, 'No cap section found in configuration file') cap_hconfig = ESMF_HConfigCreateAt(hconfig, keystring='cap', _RC) + call MAPL_run_driver(cap_hconfig, _RC) + call ESMF_HConfigDestroy(cap_hconfig, _RC) _RETURN(_SUCCESS) diff --git a/mapl3g/MaplFramework.F90 b/mapl3g/MaplFramework.F90 index 5b331a4675c5..72167ad0a627 100644 --- a/mapl3g/MaplFramework.F90 +++ b/mapl3g/MaplFramework.F90 @@ -6,6 +6,7 @@ module mapl3g_MaplFramework + use mapl3g_ServerDriver use mapl_ErrorHandling use mapl_KeywordEnforcerMod use mapl_profiler, only: DistributedProfiler @@ -16,6 +17,7 @@ module mapl3g_MaplFramework use pfio_AbstractDirectoryServiceMod, only: PortInfo use pflogger, only: logging use pflogger, only: Logger + use mpi use esmf implicit none private @@ -29,6 +31,8 @@ module mapl3g_MaplFramework private logical :: mapl_initialized = .false. logical :: esmf_internally_initialized = .false. + type(ESMF_VM) :: mapl_vm + type(ESMF_HConfig) :: mapl_hconfig type(DirectoryService) :: directory_service type(MpiServer), pointer :: o_server => null() @@ -36,9 +40,16 @@ module mapl3g_MaplFramework contains procedure :: initialize procedure :: initialize_esmf - procedure :: initialize_mapl + procedure :: initialize_pflogger + procedure :: initialize_profilers + procedure :: initialize_servers procedure :: initialize_simple_oserver + procedure :: finalize + procedure :: finalize_servers + procedure :: finalize_profiler + procedure :: finalize_pflogger + procedure :: finalize_esmf procedure :: get procedure :: is_initialized end type MaplFramework @@ -60,124 +71,307 @@ module mapl3g_MaplFramework ! Type-bound procedures ! Note: HConfig is an output if ESMF is not already initialized. Otherwise it is an input. - subroutine initialize(this, hconfig, unusable, mpiCommunicator, rc) + subroutine initialize(this, hconfig, unusable, is_model_pet, mpiCommunicator, rc) class(MaplFramework), intent(inout) :: this type(ESMF_HConfig), intent(inout) :: hconfig class(KeywordEnforcer), optional, intent(in) :: unusable + logical, optional, intent(out) :: is_model_pet integer, optional, intent(in) :: mpiCommunicator integer, optional, intent(out) :: rc integer :: status _ASSERT(.not. this%mapl_initialized, "MaplFramework object is already initialized") - this%mapl_hconfig = hconfig + this%mapl_initialized = .true. + this%mapl_hconfig = hconfig call this%initialize_esmf(hconfig, mpiCommunicator=mpiCommunicator, _RC) + call ESMF_VMGetCurrent(this%mapl_vm, _RC) - call this%initialize_mapl(_RC) - this%mapl_initialized = .true. + call this%initialize_pflogger(_RC) + call this%initialize_profilers(_RC) + call this%initialize_servers(is_model_pet=is_model_pet, _RC) _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) end subroutine initialize + ! If ESMF is already initialized, then we expect hconfig to be + ! externally provided. Otherwise, we retrieve the top level + ! hconfig from ESMF_Initialize and return that. subroutine initialize_esmf(this, hconfig, unusable, mpiCommunicator, rc) class(MaplFramework), intent(inout) :: this type(ESMF_HConfig), intent(inout) :: hconfig class(KeywordEnforcer), optional, intent(in) :: unusable integer, optional, intent(in) :: mpiCommunicator integer, optional, intent(out) :: rc - + integer :: status type(ESMF_Config) :: config logical :: esmf_is_initialized - logical :: has_mapl_section - - esmf_is_initialized = ESMF_IsInitialized(_RC) + + esmf_is_initialized = ESMF_IsInitialized(_RC) _RETURN_IF(esmf_is_initialized) this%esmf_internally_initialized = .true. call ESMF_Initialize(configFilenameFromArgNum=1, configKey=['esmf'], config=config, mpiCommunicator=mpiCommunicator, _RC) - ! If ESMF is externally initialized, then we expect the mapl hconfig to be passed in. Otherwise, it - ! must be extracted from the top level ESMF Config. - call ESMF_ConfigGet(config, hconfig=hconfig, _RC) - has_mapl_section = ESMF_HConfigIsDefined(hconfig, keystring='mapl', _RC) - if (has_mapl_section) then - this%mapl_hconfig = ESMF_HConfigCreateAt(hconfig, keystring='mapl', _RC) - _RETURN(_SUCCESS) - end if - - this%mapl_hconfig = ESMF_HConfigCreate(content='{}', _RC) + this%mapl_hconfig = get_subconfig(hconfig, keystring='mapl', _RC) _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) + + contains + + ! Return an empty mapping unless named dictionary is found. + function get_subconfig(hconfig, keystring, rc) result(subcfg) + type(ESMF_HConfig) :: subcfg + type(ESMF_HConfig), intent(in) :: hconfig + character(*), intent(in) :: keystring + integer, optional, intent(out) :: rc + + integer :: status + logical :: has_keystring + + has_keystring = ESMF_HConfigIsDefined(hconfig, keystring=keystring, _RC) + if (has_keystring) then + subcfg = ESMF_HConfigCreateAt(hconfig, keystring='mapl', _RC) + _RETURN(_SUCCESS) + end if + + subcfg = ESMF_HConfigCreate(content='{}', _RC) + _RETURN(_SUCCESS) + end function get_subconfig + end subroutine initialize_esmf - subroutine initialize_mapl(this, unusable, rc) +#ifdef BUILD_WITH_PFLOGGER + subroutine initialize_pflogger(this, unusable, rc) + use PFL_Formatter, only: get_sim_time + use pflogger, only: pfl_initialize => initialize + use mapl_SimulationTime, only: fill_time_dict + class(MaplFramework), intent(inout) :: this - class(KeywordEnforcer), optional, intent(out) :: unusable + class(KeywordEnforcer), optional, intent(in) :: unusable integer, optional, intent(out) :: rc integer :: status - integer :: comm_world - type(ESMF_VM) :: mapl_vm + integer :: world_comm logical :: has_pflogger_cfg_file character(:), allocatable :: pflogger_cfg_file - call ESMF_VMGetCurrent(mapl_vm, _RC) - call ESMF_VMGet(mapl_vm, mpiCommunicator=comm_world, _RC) + call pfl_initialize() + get_sim_time => fill_time_dict -#ifdef BUILD_WITH_PFLOGGER has_pflogger_cfg_file = ESMF_HConfigIsDefined(this%mapl_hconfig, keystring="pflogger_cfg_file", _RC) if (has_pflogger_cfg_file) then pflogger_cfg_file = ESMF_HConfigAsString(this%mapl_hconfig, keystring="pflogger_cfg_file", _RC) + call logging%load_file(pflogger_cfg_file) + _RETURN(_SUCCESS) end if - call initialize_pflogger(pflogger_cfg_file=pflogger_cfg_file, comm_world=comm_world, _RC) + + call ESMF_VMGet(this%mapl_vm, mpiCommunicator=world_comm, _RC) + call default_initialize_pflogger(world_comm=world_comm, _RC) + + _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) + end subroutine initialize_pflogger #endif -!# call initialize_profiler(comm=comm_world, enable_global_timeprof=enable_global_timeprof, enable_global_memprof=enable_global_memprof, _RC) - call this%initialize_simple_oserver(_RC) + subroutine initialize_profilers(this, unusable, rc) + class(MaplFramework), target, intent(inout) :: this + class(KeywordEnforcer), optional, intent(in) :: unusable + integer, optional, intent(out) :: rc + + integer :: status + integer :: world_comm + call ESMF_VMGet(this%mapl_vm, mpiCommunicator=world_comm, _RC) +!# call initialize_profiler(comm=world_comm, enable_global_timeprof=enable_global_timeprof, & +! # enable_global_memprof=enable_global_memprof, _RC) _RETURN(_SUCCESS) - end subroutine initialize_mapl + _UNUSED_DUMMY(unusable) + end subroutine initialize_profilers - subroutine initialize_simple_oserver(this, unusable, rc) + subroutine initialize_servers(this, unusable, is_model_pet, rc) class(MaplFramework), target, intent(inout) :: this - class(KeywordEnforcer), optional, intent(out) :: unusable + class(KeywordEnforcer), optional, intent(in) :: unusable + logical, optional, intent(out) :: is_model_pet integer, optional, intent(out) :: rc - integer :: status, stat_alloc, comm_world - type(ESMF_VM) :: vm - type(ClientThread), pointer :: clientPtr + integer :: status + type(ESMF_HConfig) :: servers_hconfig + logical :: has_server_section + integer :: model_petcount + integer :: world_group, model_group, server_group, model_server_group + integer :: world_comm, model_comm, server_comm, model_server_comm + integer :: ssiCount ! total number of nodes participating + integer, allocatable :: ssiMap(:) + integer, allocatable :: model_pets(:), server_pets(:) + integer, allocatable :: ssis_per_server(:) + integer :: required_ssis + type(ServerDriver), allocatable :: server_drivers(:) + integer :: num_model_ssis + type(ESMF_HConfig), allocatable :: server_hconfigs(:) + integer :: n + integer :: ssi_0, ssi_1, i_server + class(Logger), pointer :: lgr + integer :: ignore ! workaround for ESMF bug in v8.6.0 + + call ESMF_VMGet(this%mapl_vm, ssiMap=ssiMap, ssiCount=ssiCount, mpiCommunicator=world_comm, petCount=ignore, _RC) + ! do something with this line + + has_server_section = ESMF_HConfigIsDefined(this%mapl_hconfig, keystring='servers', _RC) + if (.not. has_server_section) then + this%directory_service = DirectoryService(world_comm) + call this%initialize_simple_oserver(_RC) + _RETURN(_SUCCESS) + end if + + model_petCount = get_model_petcount(this%mapl_hconfig, _RC) + num_model_ssis = get_num_ssis(model_petCount, ssiCount, ssiMap, ssiOffset=0, _RC) + + servers_hconfig = ESMF_HConfigCreateAt(this%mapl_hconfig, keystring='servers', _RC) + server_hconfigs = get_server_hconfigs(servers_hconfig, _RC) + + ssis_per_server = get_ssis_per_server(server_hconfigs, _RC) + required_ssis = num_model_ssis + sum(ssis_per_server) + + _ASSERT(required_ssis <= ssiCount, "Insufficient resources for requested servers.") + if (required_ssis < ssiCount) then + call lgr%warning("Unused nodes. Required %i0 nodes, but %i0 available.", required_ssis, ssicount) + end if + + call MPI_Comm_group(world_comm, world_group, _IERROR) + + model_pets = pack([(n, n = 0, size(ssiMap))], ssiMap <= num_model_ssis) + call MPI_Group_incl(world_group, model_petCount, model_pets, model_group, _IERROR) + is_model_pet = (model_group /= MPI_GROUP_NULL) + + call MPI_Comm_create_group(world_comm, model_group, 0, model_comm, _IERROR) + + ssi_0 = num_model_ssis + do i_server = 1, size(server_hconfigs) + ssi_1 = ssi_0 + ssis_per_server(i_server) + server_pets = pack([(n, n = 0, size(ssiMap))], ssiMap >= ssi_0 .and. ssiMap < ssi_1) + + call MPI_Group_incl(world_group, size(server_pets), server_pets, server_group, _IERROR) + call MPI_Group_union(server_group, model_group, model_server_group, _IERROR) + + call MPI_Comm_create_group(world_comm, server_group, 0, server_comm, _IERROR) + call MPI_Comm_create_group(world_comm, model_server_group, 0, model_server_comm, _IERROR) + + call MPI_Group_Free(server_group, _IERROR) + call MPI_Group_Free(model_server_group, _IERROR) + + server_drivers(i_server) = ServerDriver(server_hconfigs(i_server), model_server_comm, model_comm, server_comm) + + ssi_0 = ssi_1 + end do + + do i_server = 1, size(server_drivers) + call server_drivers(i_server)%run(_RC) + end do + + call MPI_Group_Free(world_group, _IERROR) + call ESMF_HConfigDestroy(servers_hconfig, _RC) + + _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) + end subroutine initialize_servers + + function get_server_hconfigs(servers_hconfig, rc) result(server_hconfigs) + type(ESMF_HConfig), allocatable :: server_hconfigs(:) + type(ESMF_HConfig), intent(in) :: servers_hconfig + integer, optional, intent(out) :: rc + + integer :: status + + integer :: n_servers, i_server + type(ESMF_HConfigIter) :: iter_begin, iter_end, iter + + n_servers = ESMF_HConfigGetSize(servers_hconfig, _RC) + allocate(server_hconfigs(n_servers)) + + iter_begin = ESMF_HConfigIterBegin(servers_hconfig,_RC) + iter_end = ESMF_HConfigIterEnd(servers_hconfig, _RC) + iter = iter_begin + + i_server = 0 + do while (ESMF_HConfigIterLoop(iter, iter_begin, iter_end, rc=status)) + i_server = i_server + 1 + server_hconfigs(i_server) = ESMF_HConfigCreateAt(iter, _RC) + end do + + _RETURN(_SUCCESS) + end function get_server_hconfigs + + function get_ssis_per_server(server_hconfigs, rc) result(ssis_per_server) + integer, allocatable :: ssis_per_server(:) + type(ESMF_HConfig), intent(in) :: server_hconfigs(:) + integer, optional, intent(out) :: rc + + integer :: status + integer :: i_server + + associate (n_servers => size(server_hconfigs)) + allocate(ssis_per_server(n_servers)) + do i_server = 1, n_servers + ssis_per_server(i_server) = ESMF_HConfigAsI4(server_hconfigs(i_server), keystring='num_nodes', _RC) + end do + end associate + _RETURN(_SUCCESS) + end function get_ssis_per_server + + + integer function get_model_petCount(hconfig, rc) result(model_petcount) + type(ESMF_HConfig), intent(in) :: hconfig + integer, optional, intent(out) :: rc + + integer :: status + logical :: has_model_petcount + + has_model_petcount = ESMF_HConfigIsDefined(hconfig, keystring='model_petcount', _RC) + _ASSERT(has_model_petcount, 'Unknown petcount reservation for model.') + model_petcount = ESMF_HConfigAsI4(hconfig, keystring='model_petcount', _RC) + + _RETURN(_SUCCESS) + end function get_model_petCount + + subroutine initialize_simple_oserver(this, unusable, rc) + class(MaplFramework), target, intent(inout) :: this + class(KeywordEnforcer), optional, intent(in) :: unusable + integer, optional, intent(out) :: rc - call ESMF_VMGetCurrent(vm, _RC) - call ESMF_VMGet(vm, mpiCommunicator=comm_world, _RC) + integer :: status, stat_alloc + integer :: model_comm + type(ClientThread), pointer :: clientPtr - this%directory_service = DirectoryService(comm_world) - call init_IO_ClientManager(comm_world, _RC) - allocate(this%o_server, source=MpiServer(comm_world, 'o_server', rc=status), stat=stat_alloc) + call ESMF_VMGet(this%mapl_vm, mpiCommunicator=model_comm, _RC) + call init_IO_ClientManager(model_comm, _RC) + allocate(this%o_server, source=MpiServer(model_comm, 'o_server', rc=status), stat=stat_alloc) _VERIFY(status) _VERIFY(stat_alloc) call this%directory_service%publish(PortInfo('o_server', this%o_server), this%o_server) clientPtr => o_Clients%current() - call this%directory_service%connect_to_server('o_server', clientPtr, comm_world) - - _RETURN(_SUCCESS) + call this%directory_service%connect_to_server('o_server', clientPtr, model_comm) + _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) end subroutine initialize_simple_oserver - + subroutine get(this, unusable, directory_service, rc) class(MaplFramework), target, intent(in) :: this - class(KeywordEnforcer), optional, intent(out) :: unusable + class(KeywordEnforcer), optional, intent(in) :: unusable type(DirectoryService), pointer, optional, intent(out) :: directory_service integer, optional, intent(out) :: rc - integer :: status - _ASSERT(this%is_initialized(), "MaplFramework object is not initialized") if (present(directory_service)) directory_service => this%directory_service _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) end subroutine get logical function is_initialized(this) @@ -185,27 +379,87 @@ logical function is_initialized(this) is_initialized = this%mapl_initialized end function is_initialized - subroutine finalize(this, rc) + subroutine finalize(this, unusable, rc) class(MaplFramework), intent(inout) :: this + class(KeywordEnforcer), optional, intent(in) :: unusable integer, optional, intent(out) :: rc integer :: status -!# call finalize_profiler(_RC) - call logging%free() call this%directory_service%free_directory_resources() + call this%finalize_servers(_RC) +!# call server_comm%free_comms(_RC) +!# if (server_comm /= MPI_COMM_NULL) then +!# call MPI_Comm_free(server_comm, _IERROR) +!# end if +!# if (server_comm_model /= MPI_COMM_NULL) then +!# call MPI_Comm_free(server_comm_model, _IERROR) +!# end if + + call this%finalize_profiler(_RC) + call this%finalize_pflogger(_RC) + call this%finalize_esmf(_RC) - if (this%esmf_internally_initialized) then - call ESMF_HConfigDestroy(this%mapl_hconfig, _RC) - call ESMF_Finalize(_RC) - end if - _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) end subroutine finalize - ! Procedures using singleton object + subroutine finalize_servers(this, unusable, rc) + class(MaplFramework), intent(inout) :: this + class(KeywordEnforcer), optional, intent(in) :: unusable + integer, optional, intent(out) :: rc + +!# integer :: status + + _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) + _UNUSED_DUMMY(this) + end subroutine finalize_servers + + subroutine finalize_profiler(this, unusable, rc) + class(MaplFramework), intent(inout) :: this + class(KeywordEnforcer), optional, intent(in) :: unusable + integer, optional, intent(out) :: rc + +!# integer :: status + + _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) + _UNUSED_DUMMY(this) + end subroutine finalize_profiler + + subroutine finalize_pflogger(this, unusable, rc) + class(MaplFramework), intent(inout) :: this + class(KeywordEnforcer), optional, intent(in) :: unusable + integer, optional, intent(out) :: rc + +!# integer :: status + call logging%free() + + _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) + _UNUSED_DUMMY(this) + end subroutine finalize_pflogger + + subroutine finalize_esmf(this, unusable, rc) + class(MaplFramework), intent(inout) :: this + class(KeywordEnforcer), optional, intent(in) :: unusable + integer, optional, intent(out) :: rc + + integer :: status + + _RETURN_UNLESS(this%esmf_internally_initialized) + + call ESMF_HConfigDestroy(this%mapl_hconfig, _RC) + call ESMF_Finalize(_RC) + + _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) + end subroutine finalize_esmf + + ! Public interfaces that rely on the singleton object subroutine mapl_get(unusable, directory_service, rc) - class(KeywordEnforcer), optional, intent(out) :: unusable + class(KeywordEnforcer), optional, intent(in) :: unusable type(DirectoryService), pointer, optional, intent(out) :: directory_service integer, optional, intent(out) :: rc @@ -214,7 +468,8 @@ subroutine mapl_get(unusable, directory_service, rc) call the_mapl_object%get(directory_service=directory_service, _RC) _RETURN(_SUCCESS) - end subroutine mapl_get + _UNUSED_DUMMY(unusable) + end subroutine mapl_get subroutine mapl_get_mapl(mapl) type(MaplFramework), pointer, intent(out) :: mapl @@ -223,18 +478,19 @@ subroutine mapl_get_mapl(mapl) end subroutine mapl_get_mapl - subroutine mapl_initialize(hconfig, unusable, mpiCommunicator, rc) - use mapl_KeywordEnforcerMod + subroutine mapl_initialize(hconfig, unusable, is_model_pet, mpiCommunicator, rc) type(ESMF_HConfig), intent(inout) :: hconfig class(KeywordEnforcer), optional, intent(in) :: unusable + logical, optional, intent(out) :: is_model_pet integer, optional, intent(in) :: mpiCommunicator integer, optional, intent(out) :: rc integer :: status - call the_mapl_object%initialize(hconfig=hconfig, mpiCommunicator=mpiCommunicator, _RC) + call the_mapl_object%initialize(hconfig=hconfig, is_model_pet=is_model_pet, mpiCommunicator=mpiCommunicator, _RC) _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) end subroutine mapl_initialize subroutine mapl_finalize(rc) @@ -248,20 +504,15 @@ subroutine mapl_finalize(rc) end subroutine mapl_finalize #ifdef BUILD_WITH_PFLOGGER - subroutine initialize_pflogger(comm_world, unusable, pflogger_cfg_file, rc) - use pflogger, only: pfl_initialize => initialize + subroutine default_initialize_pflogger(world_comm, unusable, rc) use pflogger, only: StreamHandler, FileHandler, HandlerVector use pflogger, only: MpiLock, MpiFormatter use pflogger, only: INFO, WARNING - use PFL_Formatter, only: get_sim_time - use mapl_SimulationTime, only: fill_time_dict use, intrinsic :: iso_fortran_env, only: OUTPUT_UNIT - - integer, intent(in) :: comm_world + integer, intent(in) :: world_comm class (KeywordEnforcer), optional, intent(in) :: unusable - character(len=*), optional,intent(in) :: pflogger_cfg_file integer, optional, intent(out) :: rc type (HandlerVector) :: handlers @@ -270,46 +521,63 @@ subroutine initialize_pflogger(comm_world, unusable, pflogger_cfg_file, rc) integer :: level,rank,status type(Logger), pointer :: lgr - - call pfl_initialize() - get_sim_time => fill_time_dict - - if (present(pflogger_cfg_file)) then - call logging%load_file(pflogger_cfg_file) - _RETURN(_SUCCESS) - end if - ! Default configuration if no file provided - call MPI_COMM_Rank(comm_world,rank,status) + call MPI_COMM_Rank(world_comm,rank,status) console = StreamHandler(OUTPUT_UNIT) call console%set_level(INFO) - call console%set_formatter(MpiFormatter(comm_world, fmt='%(short_name)a10~: %(message)a')) + call console%set_formatter(MpiFormatter(world_comm, fmt='%(short_name)a10~: %(message)a')) call handlers%push_back(console) - + file_handler = FileHandler('warnings_and_errors.log') call file_handler%set_level(WARNING) - call file_handler%set_formatter(MpiFormatter(comm_world, fmt='pe=%(mpi_rank)i5.5~: %(short_name)a~: %(message)a')) - call file_handler%set_lock(MpiLock(comm_world)) + call file_handler%set_formatter(MpiFormatter(world_comm, fmt='pe=%(mpi_rank)i5.5~: %(short_name)a~: %(message)a')) + call file_handler%set_lock(MpiLock(world_comm)) call handlers%push_back(file_handler) - + level = WARNING if (rank == 0) then level = INFO end if - + call logging%basic_config(level=level, handlers=handlers, rc=status) _VERIFY(status) - + if (rank == 0) then lgr => logging%get_logger('MAPL') call lgr%warning('No configure file specified for logging layer. Using defaults.') end if _RETURN(_SUCCESS) - - _UNUSED_DUMMY(unusable) - end subroutine initialize_pflogger + _UNUSED_DUMMY(unusable) + end subroutine default_initialize_pflogger #endif + + integer function get_num_ssis(petCount, ssiCount, ssiMap, ssiOffset, rc) result(num_ssis) + integer, intent(in) :: petCount + integer, intent(in) :: ssiCount + integer, intent(in) :: ssiMap(:) + integer, intent(in) :: ssiOffset + integer, optional, intent(out) :: rc + + integer :: n + integer :: found + + num_ssis = 0 + + found = 0 + do n = ssiOffset, ssiCount - 1 + found = found + count(ssiMap == n) + if (found >= petCount) exit + end do + + _ASSERT(found >= petCount, 'Insufficient resources for running model.') + num_ssis = 1 + (n - ssiOffset) + + _RETURN(_SUCCESS) + end function get_num_ssis + end module mapl3g_MaplFramework + + diff --git a/mapl3g/ServerDriver.F90 b/mapl3g/ServerDriver.F90 new file mode 100644 index 000000000000..141e705c3ac5 --- /dev/null +++ b/mapl3g/ServerDriver.F90 @@ -0,0 +1,59 @@ +#include "MAPL_Generic.h" + +module mapl3g_ServerDriver + use mapl_ErrorHandling + use mpi + use esmf +!# use dll + implicit none + private + + public :: ServerDriver + + type :: ServerDriver + type(ESMF_HConfig) :: hconfig + integer :: world_comm + integer :: model_comm + integer :: server_comm + contains + procedure :: run + end type ServerDriver + + interface ServerDriver + procedure :: new_ServerDriver + end interface ServerDriver + +contains + + function new_ServerDriver(hconfig, world_comm, model_comm, server_comm) result(driver) + type(ServerDriver) :: driver + type(ESMF_HConfig), optional, intent(in) :: hconfig + integer, intent(in) :: world_comm + integer, intent(in) :: model_comm + integer, intent(in) :: server_comm + + end function new_ServerDriver + + + subroutine run(this, rc) + class(ServerDriver), intent(in) :: this + integer, optional, intent(out) :: rc + + integer :: status + character(:), allocatable :: dso_name, dso_procedure + + _RETURN_IF(this%server_comm == MPI_COMM_NULL) + + dso_name = ESMF_HConfigAsString(this%hconfig, keystring="dso_name", _RC) + dso_procedure = ESMF_HConfigAsString(this%hconfig, keystring="dso_procedure", _RC) + +!# call dlopen(dso_name,...) +!# call dlload(dso_procedure ...) +!# +!# call server_initialize(this%hconfig, this%world_comm, this%model_comm, this%server_comm, _RC) + + _RETURN(_SUCCESS) + end subroutine run + + +end module mapl3g_ServerDriver From bff85e202b35ab2eca1848c78b88f03dfa01fb49 Mon Sep 17 00:00:00 2001 From: Tom Clune Date: Wed, 8 May 2024 14:53:22 -0400 Subject: [PATCH 2/3] Small correction to idle unused cores --- gridcomps/cap3g/tests/basic_captest/cap.yaml | 5 ++-- .../cap3g/tests/parent_child_captest/cap.yaml | 3 ++- mapl3g/MaplFramework.F90 | 27 ++++++++++--------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/gridcomps/cap3g/tests/basic_captest/cap.yaml b/gridcomps/cap3g/tests/basic_captest/cap.yaml index f9e9b397a040..44049e622592 100644 --- a/gridcomps/cap3g/tests/basic_captest/cap.yaml +++ b/gridcomps/cap3g/tests/basic_captest/cap.yaml @@ -1,11 +1,10 @@ esmf: logKindFlag: ESMF_LOGKIND_MULTI_ON_ERROR -#mapl: +mapl: + model_petcount: 1 # pflogger_cfg_file: pflogger.yaml # -# petcount_model: 1 -# # servers: # pfio: # nodes: 1 diff --git a/gridcomps/cap3g/tests/parent_child_captest/cap.yaml b/gridcomps/cap3g/tests/parent_child_captest/cap.yaml index 5e486a162624..0e01364eb339 100644 --- a/gridcomps/cap3g/tests/parent_child_captest/cap.yaml +++ b/gridcomps/cap3g/tests/parent_child_captest/cap.yaml @@ -1,7 +1,8 @@ esmf: logKindFlag: ESMF_LOGKIND_MULTI_ON_ERROR -#mapl: +mapl: + model_petcount: 1 # pflogger_cfg_file: pflogger.yaml cap: diff --git a/mapl3g/MaplFramework.F90 b/mapl3g/MaplFramework.F90 index 72167ad0a627..74cee49894ce 100644 --- a/mapl3g/MaplFramework.F90 +++ b/mapl3g/MaplFramework.F90 @@ -32,6 +32,7 @@ module mapl3g_MaplFramework logical :: mapl_initialized = .false. logical :: esmf_internally_initialized = .false. type(ESMF_VM) :: mapl_vm + integer :: model_comm type(ESMF_HConfig) :: mapl_hconfig type(DirectoryService) :: directory_service @@ -205,7 +206,7 @@ subroutine initialize_servers(this, unusable, is_model_pet, rc) logical :: has_server_section integer :: model_petcount integer :: world_group, model_group, server_group, model_server_group - integer :: world_comm, model_comm, server_comm, model_server_comm + integer :: world_comm, server_comm, model_server_comm integer :: ssiCount ! total number of nodes participating integer, allocatable :: ssiMap(:) integer, allocatable :: model_pets(:), server_pets(:) @@ -220,16 +221,20 @@ subroutine initialize_servers(this, unusable, is_model_pet, rc) integer :: ignore ! workaround for ESMF bug in v8.6.0 call ESMF_VMGet(this%mapl_vm, ssiMap=ssiMap, ssiCount=ssiCount, mpiCommunicator=world_comm, petCount=ignore, _RC) - ! do something with this line + call MPI_Comm_group(world_comm, world_group, _IERROR) + model_petCount = get_model_petcount(this%mapl_hconfig, _RC) has_server_section = ESMF_HConfigIsDefined(this%mapl_hconfig, keystring='servers', _RC) if (.not. has_server_section) then - this%directory_service = DirectoryService(world_comm) + ! Should only run on model PETs + call MPI_Group_range_incl(world_group, 1, [0, model_petCount-1, 1], model_group, _IERROR) + call MPI_Comm_create_group(world_comm, model_group, 0, this%model_comm, _IERROR) + call MPI_Group_free(model_group, _IERROR) + this%directory_service = DirectoryService(this%model_comm) call this%initialize_simple_oserver(_RC) _RETURN(_SUCCESS) end if - model_petCount = get_model_petcount(this%mapl_hconfig, _RC) num_model_ssis = get_num_ssis(model_petCount, ssiCount, ssiMap, ssiOffset=0, _RC) servers_hconfig = ESMF_HConfigCreateAt(this%mapl_hconfig, keystring='servers', _RC) @@ -243,13 +248,12 @@ subroutine initialize_servers(this, unusable, is_model_pet, rc) call lgr%warning("Unused nodes. Required %i0 nodes, but %i0 available.", required_ssis, ssicount) end if - call MPI_Comm_group(world_comm, world_group, _IERROR) model_pets = pack([(n, n = 0, size(ssiMap))], ssiMap <= num_model_ssis) call MPI_Group_incl(world_group, model_petCount, model_pets, model_group, _IERROR) is_model_pet = (model_group /= MPI_GROUP_NULL) - call MPI_Comm_create_group(world_comm, model_group, 0, model_comm, _IERROR) + call MPI_Comm_create_group(world_comm, model_group, 0, this%model_comm, _IERROR) ssi_0 = num_model_ssis do i_server = 1, size(server_hconfigs) @@ -265,7 +269,7 @@ subroutine initialize_servers(this, unusable, is_model_pet, rc) call MPI_Group_Free(server_group, _IERROR) call MPI_Group_Free(model_server_group, _IERROR) - server_drivers(i_server) = ServerDriver(server_hconfigs(i_server), model_server_comm, model_comm, server_comm) + server_drivers(i_server) = ServerDriver(server_hconfigs(i_server), model_server_comm, this%model_comm, server_comm) ssi_0 = ssi_1 end do @@ -345,17 +349,15 @@ subroutine initialize_simple_oserver(this, unusable, rc) integer, optional, intent(out) :: rc integer :: status, stat_alloc - integer :: model_comm type(ClientThread), pointer :: clientPtr - call ESMF_VMGet(this%mapl_vm, mpiCommunicator=model_comm, _RC) - call init_IO_ClientManager(model_comm, _RC) - allocate(this%o_server, source=MpiServer(model_comm, 'o_server', rc=status), stat=stat_alloc) + call init_IO_ClientManager(this%model_comm, _RC) + allocate(this%o_server, source=MpiServer(this%model_comm, 'o_server', rc=status), stat=stat_alloc) _VERIFY(status) _VERIFY(stat_alloc) call this%directory_service%publish(PortInfo('o_server', this%o_server), this%o_server) clientPtr => o_Clients%current() - call this%directory_service%connect_to_server('o_server', clientPtr, model_comm) + call this%directory_service%connect_to_server('o_server', clientPtr, this%model_comm) _RETURN(_SUCCESS) _UNUSED_DUMMY(unusable) @@ -450,6 +452,7 @@ subroutine finalize_esmf(this, unusable, rc) _RETURN_UNLESS(this%esmf_internally_initialized) + call MPI_Comm_free(this%model_comm, _IERROR) call ESMF_HConfigDestroy(this%mapl_hconfig, _RC) call ESMF_Finalize(_RC) From 868a6eb517c15554e14f18c78a8aedeeb0f9e16d Mon Sep 17 00:00:00 2001 From: Tom Clune Date: Thu, 9 May 2024 12:42:06 -0400 Subject: [PATCH 3/3] Progress towards support for remote servers. --- generic3g/GriddedComponentDriver_smod.F90 | 3 ++ gridcomps/cap3g/Cap.F90 | 47 +++++++++++++++++++---- mapl3g/GEOS.F90 | 10 ++--- mapl3g/MaplFramework.F90 | 19 +++++---- 4 files changed, 58 insertions(+), 21 deletions(-) diff --git a/generic3g/GriddedComponentDriver_smod.F90 b/generic3g/GriddedComponentDriver_smod.F90 index 6add63a3acf6..31480c622bd1 100644 --- a/generic3g/GriddedComponentDriver_smod.F90 +++ b/generic3g/GriddedComponentDriver_smod.F90 @@ -78,6 +78,8 @@ module recursive subroutine finalize(this, unusable, phase_idx, rc) end associate + call ESMF_GridCompDestroy(this%gridcomp, _RC) + _RETURN(_SUCCESS) _UNUSED_DUMMY(unusable) end subroutine finalize @@ -145,6 +147,7 @@ recursive module subroutine run_export_couplers(this, unusable, phase_idx, rc) end associate _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) end subroutine run_export_couplers module subroutine clock_advance(this, rc) diff --git a/gridcomps/cap3g/Cap.F90 b/gridcomps/cap3g/Cap.F90 index 87da25d7a86e..a9bbcbc8e3ee 100644 --- a/gridcomps/cap3g/Cap.F90 +++ b/gridcomps/cap3g/Cap.F90 @@ -17,28 +17,33 @@ module mapl3g_Cap contains - subroutine MAPL_run_driver(hconfig, unusable, rc) + subroutine MAPL_run_driver(hconfig, is_model_pet, unusable, rc) USE MAPL_ApplicationSupport type(ESMF_HConfig), intent(inout) :: hconfig + logical, intent(in) :: is_model_pet class(KeywordEnforcer), optional, intent(in) :: unusable integer, optional, intent(out) :: rc type(GriddedComponentDriver) :: driver integer :: status - driver = make_driver(hconfig, _RC) + driver = make_driver(hconfig, is_model_pet, _RC) - call initialize_phases(driver, phases=GENERIC_INIT_PHASE_SEQUENCE, _RC) - call integrate(driver, _RC) - call driver%finalize(_RC) + if (is_model_pet) then + call initialize_phases(driver, phases=GENERIC_INIT_PHASE_SEQUENCE, _RC) + call integrate(driver, _RC) + call driver%finalize(_RC) + end if _RETURN(_SUCCESS) + _UNUSED_DUMMY(unusable) end subroutine MAPL_run_driver - function make_driver(hconfig, rc) result(driver) + function make_driver(hconfig, is_model_pet, rc) result(driver) use mapl3g_GenericGridComp, only: generic_SetServices => setServices type(GriddedComponentDriver) :: driver type(ESMF_HConfig), intent(inout) :: hconfig + logical, intent(in) :: is_model_pet integer, optional, intent(out) :: rc type(ESMF_GridComp) :: cap_gridcomp @@ -46,12 +51,15 @@ function make_driver(hconfig, rc) result(driver) character(:), allocatable :: cap_name integer :: status, user_status type(ESMF_HConfig) :: cap_gc_hconfig + integer, allocatable :: petList(:) cap_name = ESMF_HConfigAsString(hconfig, keystring='name', _RC) - ! TODO: Rename to MAPL_CreateGridComp() ? clock = create_clock(hconfig, _RC) + cap_gc_hconfig = ESMF_HConfigCreateAt(hconfig, keystring='cap_gc', _RC) - cap_gridcomp = create_grid_comp(cap_name, user_setservices(cap_setservices), cap_gc_hconfig, clock, _RC) + petList = get_model_pets(is_model_pet, _RC) + cap_gridcomp = create_grid_comp(cap_name, user_setservices(cap_setservices), cap_gc_hconfig, clock, petList=petList, _RC) + call ESMF_GridCompSetServices(cap_gridcomp, generic_setServices, userRC=user_status, _RC) _VERIFY(user_status) @@ -60,6 +68,29 @@ function make_driver(hconfig, rc) result(driver) _RETURN(_SUCCESS) end function make_driver + ! Create function that accepts a logical flag returns list of mpi processes that have .true.. + function get_model_pets(flag, rc) result(petList) + use mpi + integer, allocatable :: petList(:) + logical, intent(in) :: flag + integer, optional, intent(out) :: rc + + integer :: status + type(ESMF_VM) :: vm + logical, allocatable, target :: flags(:) + integer :: world_comm + integer :: i, petCount + + call ESMF_VMGetCurrent(vm, _RC) + call ESMF_VMGet(vm, petCount=petCount, mpiCommunicator=world_comm, _RC) + allocate(flags(petCount)) + call MPI_Allgather(flag, 1, MPI_LOGICAL, flags, 1, MPI_LOGICAL, world_comm, status) + _VERIFY(status) + petList = pack([(i, i=0,petCount-1)], flags) + + _RETURN(_SUCCESS) + end function get_model_pets + function create_clock(hconfig, rc) result(clock) type(ESMF_Clock) :: clock type(ESMF_HConfig), intent(in) :: hconfig diff --git a/mapl3g/GEOS.F90 b/mapl3g/GEOS.F90 index 16772acc13c9..d304caeec99e 100644 --- a/mapl3g/GEOS.F90 +++ b/mapl3g/GEOS.F90 @@ -11,9 +11,7 @@ program geos logical :: is_model_pet call MAPL_Initialize(hconfig, is_model_pet=is_model_pet, _RC) - if (is_model_pet) then - call run_geos(hconfig, _RC) - end if + call run_geos(hconfig, is_model_pet=is_model_pet, _RC) call MAPL_Finalize(_RC) contains @@ -21,8 +19,9 @@ program geos #undef I_AM_MAIN #include "MAPL_Generic.h" - subroutine run_geos(hconfig, rc) + subroutine run_geos(hconfig, is_model_pet, rc) type(ESMF_HConfig), intent(inout) :: hconfig + logical, intent(in) :: is_model_pet integer, optional, intent(out) :: rc logical :: has_cap_hconfig @@ -33,8 +32,7 @@ subroutine run_geos(hconfig, rc) _ASSERT(has_cap_hconfig, 'No cap section found in configuration file') cap_hconfig = ESMF_HConfigCreateAt(hconfig, keystring='cap', _RC) - call MAPL_run_driver(cap_hconfig, _RC) - + call MAPL_run_driver(cap_hconfig, is_model_pet=is_model_pet, _RC) call ESMF_HConfigDestroy(cap_hconfig, _RC) _RETURN(_SUCCESS) diff --git a/mapl3g/MaplFramework.F90 b/mapl3g/MaplFramework.F90 index 74cee49894ce..784f49b7b010 100644 --- a/mapl3g/MaplFramework.F90 +++ b/mapl3g/MaplFramework.F90 @@ -230,6 +230,10 @@ subroutine initialize_servers(this, unusable, is_model_pet, rc) call MPI_Group_range_incl(world_group, 1, [0, model_petCount-1, 1], model_group, _IERROR) call MPI_Comm_create_group(world_comm, model_group, 0, this%model_comm, _IERROR) call MPI_Group_free(model_group, _IERROR) + if (present(is_model_pet)) then + is_model_pet = (this%model_comm /= MPI_COMM_NULL) + end if + _RETURN_IF(this%model_comm == MPI_COMM_NULL) this%directory_service = DirectoryService(this%model_comm) call this%initialize_simple_oserver(_RC) _RETURN(_SUCCESS) @@ -248,12 +252,10 @@ subroutine initialize_servers(this, unusable, is_model_pet, rc) call lgr%warning("Unused nodes. Required %i0 nodes, but %i0 available.", required_ssis, ssicount) end if - model_pets = pack([(n, n = 0, size(ssiMap))], ssiMap <= num_model_ssis) call MPI_Group_incl(world_group, model_petCount, model_pets, model_group, _IERROR) - is_model_pet = (model_group /= MPI_GROUP_NULL) - call MPI_Comm_create_group(world_comm, model_group, 0, this%model_comm, _IERROR) + is_model_pet = (this%model_comm /= MPI_COMM_NULL) ssi_0 = num_model_ssis do i_server = 1, size(server_hconfigs) @@ -266,6 +268,7 @@ subroutine initialize_servers(this, unusable, is_model_pet, rc) call MPI_Comm_create_group(world_comm, server_group, 0, server_comm, _IERROR) call MPI_Comm_create_group(world_comm, model_server_group, 0, model_server_comm, _IERROR) + call MPI_Group_Free(model_group, _IERROR) call MPI_Group_Free(server_group, _IERROR) call MPI_Group_Free(model_server_group, _IERROR) @@ -351,6 +354,7 @@ subroutine initialize_simple_oserver(this, unusable, rc) integer :: status, stat_alloc type(ClientThread), pointer :: clientPtr + call init_IO_ClientManager(this%model_comm, _RC) allocate(this%o_server, source=MpiServer(this%model_comm, 'o_server', rc=status), stat=stat_alloc) _VERIFY(status) @@ -358,7 +362,7 @@ subroutine initialize_simple_oserver(this, unusable, rc) call this%directory_service%publish(PortInfo('o_server', this%o_server), this%o_server) clientPtr => o_Clients%current() call this%directory_service%connect_to_server('o_server', clientPtr, this%model_comm) - + _RETURN(_SUCCESS) _UNUSED_DUMMY(unusable) end subroutine initialize_simple_oserver @@ -388,7 +392,10 @@ subroutine finalize(this, unusable, rc) integer :: status - call this%directory_service%free_directory_resources() + if (this%model_comm /= MPI_COMM_NULL) then + call this%directory_service%free_directory_resources() + call MPI_Comm_free(this%model_comm, _IERROR) + end if call this%finalize_servers(_RC) !# call server_comm%free_comms(_RC) !# if (server_comm /= MPI_COMM_NULL) then @@ -452,7 +459,6 @@ subroutine finalize_esmf(this, unusable, rc) _RETURN_UNLESS(this%esmf_internally_initialized) - call MPI_Comm_free(this%model_comm, _IERROR) call ESMF_HConfigDestroy(this%mapl_hconfig, _RC) call ESMF_Finalize(_RC) @@ -583,4 +589,3 @@ end function get_num_ssis end module mapl3g_MaplFramework -