From b2a9b0fe669b0f3b9bc51fabebd24001889a030a Mon Sep 17 00:00:00 2001 From: Anton Steketee <79179784+anton-seaice@users.noreply.github.com> Date: Sat, 11 May 2024 06:23:20 +1000 Subject: [PATCH] Autoset nprocs and improve max_blocks estimate (#949) This change allows nprocs to be set to -1 in 'ice_in' and then the number of processors will be automatically detected. This change improves the automatic calculation of max_blocks to give a better (but still not foolproof) estimate of max_blocks if it is not set in ice_in. --- .../cicedyn/infrastructure/ice_domain.F90 | 48 +++++++++++-------- cicecore/shared/ice_distribution.F90 | 3 +- configuration/scripts/ice_in | 2 +- doc/source/user_guide/ug_case_settings.rst | 3 +- 4 files changed, 32 insertions(+), 24 deletions(-) diff --git a/cicecore/cicedyn/infrastructure/ice_domain.F90 b/cicecore/cicedyn/infrastructure/ice_domain.F90 index 8b680f2d4..df112eb50 100644 --- a/cicecore/cicedyn/infrastructure/ice_domain.F90 +++ b/cicecore/cicedyn/infrastructure/ice_domain.F90 @@ -101,7 +101,7 @@ subroutine init_domain_blocks ! This routine reads in domain information and calls the routine ! to set up the block decomposition. - use ice_distribution, only: processor_shape + use ice_distribution, only: processor_shape, proc_decomposition use ice_domain_size, only: ncat, nilyr, nslyr, max_blocks, & nx_global, ny_global, block_size_x, block_size_y use ice_fileunits, only: goto_nml @@ -112,7 +112,8 @@ subroutine init_domain_blocks !---------------------------------------------------------------------- integer (int_kind) :: & - nml_error ! namelist read error flag + nml_error, & ! namelist read error flag + nprocs_x, nprocs_y ! procs decomposed into blocks character(len=char_len) :: nml_name ! text namelist name character(len=char_len_long) :: tmpstr2 ! for namelist check @@ -216,21 +217,36 @@ subroutine init_domain_blocks call broadcast_scalar(maskhalo_bound, master_task) call broadcast_scalar(add_mpi_barriers, master_task) call broadcast_scalar(debug_blocks, master_task) - if (my_task == master_task) then - if (max_blocks < 1) then - max_blocks=( ((nx_global-1)/block_size_x + 1) * & - ((ny_global-1)/block_size_y + 1) - 1) / nprocs + 1 - max_blocks=max(1,max_blocks) - write(nu_diag,'(/,a52,i6,/)') & - '(ice_domain): max_block < 1: max_block estimated to ',max_blocks - endif - endif call broadcast_scalar(max_blocks, master_task) call broadcast_scalar(block_size_x, master_task) call broadcast_scalar(block_size_y, master_task) call broadcast_scalar(nx_global, master_task) call broadcast_scalar(ny_global, master_task) + ! Set nprocs if not set in namelist +#ifdef CESMCOUPLED + nprocs = get_num_procs() +#else + if (nprocs < 0) then + nprocs = get_num_procs() + else if (nprocs /= get_num_procs()) then + write(nu_diag,*) subname,' ERROR: nprocs, get_num_procs = ',nprocs,get_num_procs() + call abort_ice(subname//' ERROR: Input nprocs not same as system (e.g MPI) request', file=__FILE__, line=__LINE__) + endif +#endif + + ! Determine max_blocks if not set + if (max_blocks < 1) then + call proc_decomposition(nprocs, nprocs_x, nprocs_y) + max_blocks=((nx_global-1)/block_size_x/nprocs_x+1) * & + ((ny_global-1)/block_size_y/nprocs_y+1) + max_blocks=max(1,max_blocks) + if (my_task == master_task) then + write(nu_diag,'(/,a52,i6,/)') & + '(ice_domain): max_block < 1: max_block estimated to ',max_blocks + endif + endif + !---------------------------------------------------------------------- ! ! perform some basic checks on domain @@ -242,16 +258,6 @@ subroutine init_domain_blocks !*** domain size zero or negative !*** call abort_ice(subname//' ERROR: Invalid domain: size < 1', file=__FILE__, line=__LINE__) ! no domain - else if (nprocs /= get_num_procs()) then - !*** - !*** input nprocs does not match system (eg MPI) request - !*** -#if (defined CESMCOUPLED) - nprocs = get_num_procs() -#else - write(nu_diag,*) subname,' ERROR: nprocs, get_num_procs = ',nprocs,get_num_procs() - call abort_ice(subname//' ERROR: Input nprocs not same as system request', file=__FILE__, line=__LINE__) -#endif else if (nghost < 1) then !*** !*** must have at least 1 layer of ghost cells diff --git a/cicecore/shared/ice_distribution.F90 b/cicecore/shared/ice_distribution.F90 index 0f3f6c198..6e06069ab 100644 --- a/cicecore/shared/ice_distribution.F90 +++ b/cicecore/shared/ice_distribution.F90 @@ -41,7 +41,8 @@ module ice_distribution ice_distributionGet, & ice_distributionGetBlockLoc, & ice_distributionGetBlockID, & - create_local_block_ids + create_local_block_ids, & + proc_decomposition character (char_len), public :: & processor_shape ! 'square-pop' (approx) POP default config diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in index 63a97d7d8..ad29e05ce 100644 --- a/configuration/scripts/ice_in +++ b/configuration/scripts/ice_in @@ -302,7 +302,7 @@ / &domain_nml - nprocs = 4 + nprocs = -1 nx_global = 100 ny_global = 116 block_size_x = 25 diff --git a/doc/source/user_guide/ug_case_settings.rst b/doc/source/user_guide/ug_case_settings.rst index 9f1f8a259..6deab8c11 100644 --- a/doc/source/user_guide/ug_case_settings.rst +++ b/doc/source/user_guide/ug_case_settings.rst @@ -369,7 +369,8 @@ domain_nml "``maskhalo_remap``", "logical", "mask unused halo cells for transport", "``.false.``" "``maskhalo_bound``", "logical", "mask unused halo cells for boundary updates", "``.false.``" "``max_blocks``", "integer", "maximum number of blocks per MPI task for memory allocation", "-1" - "``nprocs``", "integer", "number of processors to use", "-1" + "``nprocs``", "integer", "number of MPI tasks to use", "-1" + "", "``-1``", "find number of MPI tasks automatically", "" "``ns_boundary_type``", "``cyclic``", "periodic boundary conditions in y-direction", "``open``" "", "``open``", "Dirichlet boundary conditions in y", "" "", "``tripole``", "U-fold tripole boundary conditions in y", ""