From 6d78659bf016e9ef7c62b59937f12191e112049b Mon Sep 17 00:00:00 2001 From: Purnendu Chakraborty Date: Thu, 26 Jan 2023 20:47:18 -0500 Subject: [PATCH 01/57] Initialize GeosDycoreWrapper with bdt (timestep) --- .../fv3core/initialization/geos_wrapper.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 2bab75db..ea61ed7a 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -16,7 +16,7 @@ class GeosDycoreWrapper: Takes numpy arrays as inputs, returns a dictionary of numpy arrays as outputs """ - def __init__(self, namelist: f90nml.Namelist, comm: pace.util.Comm, backend: str): + def __init__(self, namelist: f90nml.Namelist, bdt: float, comm: pace.util.Comm, backend: str): # Make a custom performance collector for the GEOS wrapper self.perf_collector = PerformanceCollector("GEOS wrapper", comm) @@ -69,17 +69,18 @@ def __init__(self, namelist: f90nml.Namelist, comm: pace.util.Comm, backend: str quantity_factory=quantity_factory ) - self.dycore_state.bdt = float(namelist["dt_atmos"]) - if "fv_core_nml" in namelist.keys(): - self.dycore_state.bdt = ( - float(namelist["dt_atmos"]) / namelist["fv_core_nml"]["k_split"] - ) - elif "dycore_config" in namelist.keys(): - self.dycore_state.bdt = ( - float(namelist["dt_atmos"]) / namelist["dycore_config"]["k_split"] - ) - else: - raise KeyError("Cannot find k_split in namelist") + # self.dycore_state.bdt = float(namelist["dt_atmos"]) + # if "fv_core_nml" in namelist.keys(): + # self.dycore_state.bdt = ( + # float(namelist["dt_atmos"]) / namelist["fv_core_nml"]["k_split"] + # ) + # elif "dycore_config" in namelist.keys(): + # self.dycore_state.bdt = ( + # float(namelist["dt_atmos"]) / namelist["dycore_config"]["k_split"] + # ) + # else: + # raise KeyError("Cannot find k_split in namelist") + self.dycore_state.bdt = bdt damping_coefficients = pace.util.grid.DampingCoefficients.new_from_metric_terms( metric_terms @@ -92,7 +93,7 @@ def __init__(self, namelist: f90nml.Namelist, comm: pace.util.Comm, backend: str quantity_factory=quantity_factory, damping_coefficients=damping_coefficients, config=self.dycore_config, - timestep=timedelta(seconds=self.dycore_config.dt_atmos), + timestep=timedelta(seconds=self.dycore_state.bdt), phis=self.dycore_state.phis, state=self.dycore_state, ) @@ -128,7 +129,7 @@ def __call__( diss_estd: np.ndarray, ) -> Dict[str, np.ndarray]: - with self.perf_collector.timestep_timer.clock("move_to_pace"): + with self.perf_collector.timestep_timer.clock("numpy-to-dycore"): self.dycore_state = self._put_fortran_data_in_dycore( u, v, @@ -156,21 +157,20 @@ def __call__( diss_estd, ) - with self.perf_collector.timestep_timer.clock("dycore"): + with self.perf_collector.timestep_timer.clock("DynamicalCore"): self.dynamical_core.step_dynamics( state=self.dycore_state, timer=self.perf_collector.timestep_timer ) - with self.perf_collector.timestep_timer.clock("move_to_fortran"): + with self.perf_collector.timestep_timer.clock("dycore-to-numpy"): self.output_dict = self._prep_outputs_for_geos() - # Collect performance of the timestep and write - # a json file for rank 0 + # Collect performance of the timestep and write a json file for rank 0 self.perf_collector.collect_performance() self.perf_collector.write_out_rank_0( backend=self.backend, - is_orchestrated=False, # could be infered from config - dt_atmos=self.dycore_config.dt_atmos, + is_orchestrated=False, # could be inferred from config + dt_atmos=self.dycore_state.bdt, sim_status="Ongoing", ) From 0a3e8572691887cc63f74eff01de28b84b1c717a Mon Sep 17 00:00:00 2001 From: Purnendu Chakraborty Date: Thu, 26 Jan 2023 20:48:55 -0500 Subject: [PATCH 02/57] Use GEOS version of constants --- util/pace/util/constants.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/util/pace/util/constants.py b/util/pace/util/constants.py index a7fe5a53..8eb5143b 100644 --- a/util/pace/util/constants.py +++ b/util/pace/util/constants.py @@ -35,8 +35,21 @@ # The FV3GFS model ships with two sets of constants, one used in the GFS physics # package and the other used for the Dycore. Their difference are small but significant # Our Fortran executable on GCE has GFS_PHYS=True -GFS_PHYS = True -if GFS_PHYS: +CONST_VERSION = 'GEOS' +if CONST_VERSION == 'GEOS': + RADIUS = 6.371e6 + PI = 3.14159265358979323846 + OMEGA = 2.0*PI/86164.0 + GRAV = 9.80665 + RGRAV = 1.0 / GRAV + RDGAS = 8314.47/28.965 + RVGAS = 8314.47/18.015 + HLV = 2.4665E6 + HLF = 3.3370E5 + KAPPA = RDGAS/(3.5*RDGAS) + CP_AIR = RDGAS/KAPPA + TFREEZE = 273.15 +elif CONST_VERSION == 'GFS': RADIUS = 6.3712e6 # Radius of the Earth [m] PI = 3.1415926535897931 OMEGA = 7.2921e-5 # Rotation of the earth From 0a8d7052da93567c7e10b357b5e64295db719da5 Mon Sep 17 00:00:00 2001 From: Purnendu Chakraborty Date: Fri, 27 Jan 2023 13:02:13 -0500 Subject: [PATCH 03/57] 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment --- fv3core/pace/fv3core/stencils/fv_dynamics.py | 3 ++- fv3core/pace/fv3core/stencils/saturation_adjustment.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fv3core/pace/fv3core/stencils/fv_dynamics.py b/fv3core/pace/fv3core/stencils/fv_dynamics.py index 8b4efad1..de146fa0 100644 --- a/fv3core/pace/fv3core/stencils/fv_dynamics.py +++ b/fv3core/pace/fv3core/stencils/fv_dynamics.py @@ -33,7 +33,7 @@ # ncnst = Atm(mytile)%ncnst # pnats = Atm(mytile)%flagstruct%pnats # here we hard-coded it because 8 is the only supported value, refactor this later! -NQ = 8 # state.nq_tot - spec.namelist.dnats +NQ = 9 # state.nq_tot - spec.namelist.dnats def pt_adjust( @@ -555,6 +555,7 @@ def _compute(self, state: DycoreState, timer: pace.util.Timer): state.w, self._cappa, state.q_con, + # Since NQ=9, we shouldn't need to pass qcld explicitly state.qcld, state.pkz, state.pk, diff --git a/fv3core/pace/fv3core/stencils/saturation_adjustment.py b/fv3core/pace/fv3core/stencils/saturation_adjustment.py index f8eeaa96..7ffe45b5 100644 --- a/fv3core/pace/fv3core/stencils/saturation_adjustment.py +++ b/fv3core/pace/fv3core/stencils/saturation_adjustment.py @@ -901,14 +901,14 @@ def satadjust( # icloud_f = 0: bug - fixed # icloud_f = 1: old fvgfs gfdl) mp implementation # icloud_f = 2: binary cloud scheme (0 / 1) - if rh > 0.75 and qpz > 1.0e-8: + if rh > 0.75 and qpz > 1.0e-6: dq = hvar * qpz q_plus = qpz + dq q_minus = qpz - dq if icloud_f == 2: # TODO untested if qpz > qstar: qa = 1.0 - elif (qstar < q_plus) and (q_cond > 1.0e-8): + elif (qstar < q_plus) and (q_cond > 1.0e-6): qa = min(1.0, ((q_plus - qstar) / dq) ** 2) else: qa = 0.0 @@ -924,7 +924,7 @@ def satadjust( else: qa = 0.0 # impose minimum cloudiness if substantial q_cond exist - if q_cond > 1.0e-8: + if q_cond > 1.0e-6: qa = max(cld_min, qa) qa = min(1, qa) else: From 3b73d71e75562572b162a0caa2cfe1688f24965e Mon Sep 17 00:00:00 2001 From: Purnendu Chakraborty Date: Fri, 27 Jan 2023 17:37:48 -0500 Subject: [PATCH 04/57] Accumulate diss_est --- fv3core/pace/fv3core/stencils/d_sw.py | 37 +++++++++++++++++++-------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/fv3core/pace/fv3core/stencils/d_sw.py b/fv3core/pace/fv3core/stencils/d_sw.py index 21f25151..8be6dcd5 100644 --- a/fv3core/pace/fv3core/stencils/d_sw.py +++ b/fv3core/pace/fv3core/stencils/d_sw.py @@ -94,8 +94,6 @@ def heat_diss( ke_bg (in): """ with computation(PARALLEL), interval(...): - heat_source = 0.0 - diss_est = 0.0 if damp_w > 1e-5: dd8 = ke_bg * abs(dt) dw = (fx2 - fx2[1, 0, 0] + fy2 - fy2[0, 1, 0]) * rarea @@ -503,7 +501,6 @@ def heat_source_from_vorticity_damping( rdx: FloatFieldIJ, rdy: FloatFieldIJ, heat_source: FloatField, - heat_source_total: FloatField, dissipation_estimate: FloatField, kinetic_energy_fraction_to_damp: FloatFieldK, ): @@ -526,8 +523,7 @@ def heat_source_from_vorticity_damping( rdy (in): 1 / dy heat_source (inout): heat source from vorticity damping implied by energy conservation - heat_source_total (inout): accumulated heat source - dissipation_estimate (out): dissipation estimate, only calculated if + dissipation_estimate (inout): dissipation estimate, only calculated if calculate_dissipation_estimate is 1. Used for stochastic kinetic energy backscatter (skeb) routine. kinetic_energy_fraction_to_damp (in): the fraction of kinetic energy @@ -572,11 +568,21 @@ def heat_source_from_vorticity_damping( if __INLINED((d_con > dcon_threshold) or do_stochastic_ke_backscatter): with horizontal(region[local_is : local_ie + 1, local_js : local_je + 1]): - heat_source_total = heat_source_total + heat_source if __INLINED(do_stochastic_ke_backscatter): dissipation_estimate -= dampterm +def accumulate_heat_source_and_dissipation_estimate( + heat_source: FloatField, + heat_source_total: FloatField, + diss_est: FloatField, + diss_est_total: FloatField, +): + with computation(PARALLEL), interval(...): + heat_source_total += heat_source + diss_est_total += diss_est + + # TODO(eddied): Had to split this into a separate stencil to get this to validate # with GTC, suspect a merging issue... def update_u_and_v( @@ -763,6 +769,7 @@ def make_quantity(): return quantity_factory.zeros([X_DIM, Y_DIM, Z_DIM], units="unknown") self._tmp_heat_s = make_quantity() + self._tmp_diss_e = make_quantity() self._vort_x_delta = make_quantity() self._vort_y_delta = make_quantity() self._dt_kinetic_energy_on_cell_corners = make_quantity() @@ -912,6 +919,12 @@ def make_quantity(): }, ) ) + self._accumulate_heat_source_and_dissipation_estimate_stencil = ( + stencil_factory.from_dims_halo( + func=accumulate_heat_source_and_dissipation_estimate, + compute_dims=[X_DIM, Y_DIM, Z_DIM], + ) + ) self._compute_vorticity_stencil = stencil_factory.from_dims_halo( compute_vorticity, compute_dims=[X_DIM, Y_DIM, Z_DIM], @@ -1046,7 +1059,7 @@ def __call__( w, self.grid_data.rarea, self._tmp_heat_s, - diss_est, + self._tmp_diss_e, self._tmp_dw, self._column_namelist["damp_w"], self._column_namelist["ke_bg"], @@ -1225,11 +1238,15 @@ def __call__( self.grid_data.rdx, self.grid_data.rdy, self._tmp_heat_s, - heat_source, - diss_est, + self._tmp_diss_e, self._column_namelist["d_con"], ) - + self._accumulate_heat_source_and_dissipation_estimate_stencil( + self._tmp_heat_s, + heat_source, + self._tmp_diss_e, + diss_est + ) self._update_u_and_v_stencil( self._tmp_ut, self._tmp_vt, From a68d1602cc20e7a8841d602bc75e920fe2332a1d Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 24 Feb 2023 17:11:04 +0000 Subject: [PATCH 05/57] Allow GEOS_WRAPPER to process device data --- .../fv3core/initialization/geos_wrapper.py | 388 +++++++++++------- 1 file changed, 238 insertions(+), 150 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 00b6a1b7..5c01b256 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -1,3 +1,4 @@ +import enum import os from datetime import timedelta from typing import Dict @@ -9,6 +10,17 @@ from pace import fv3core from pace.driver.performance.collector import PerformanceCollector from pace.dsl.dace import DaceConfig, orchestrate +from pace.dsl.gt4py_utils import is_gpu_backend + + +@enum.unique +class MemorySpace(enum.Enum): + HOST = 0 + DEVICE = 1 + + +def assign_no_copy(A, B): + A = B class GeosDycoreWrapper: @@ -23,6 +35,7 @@ def __init__( bdt: int, comm: pace.util.Comm, backend: str, + fortran_mem_space: MemorySpace = MemorySpace.HOST, ): # Look for an override to run on a single node gtfv3_single_rank_override = int(os.getenv("GTFV3_SINGLE_RANK_OVERRIDE", -1)) @@ -112,6 +125,11 @@ def __init__( state=self.dycore_state, ) + self._fortran_mem_space = fortran_mem_space + self._pace_mem_space = ( + MemorySpace.DEVICE if is_gpu_backend(backend) else MemorySpace.HOST + ) + self.output_dict: Dict[str, np.ndarray] = {} self._allocate_output_dir() @@ -300,162 +318,232 @@ def _prep_outputs_for_geos(self) -> Dict[str, np.ndarray]: iec = self._grid_indexing.iec + 1 jec = self._grid_indexing.jec + 1 - pace.util.utils.safe_assign_array( - output_dict["u"], self.dycore_state.u.data[:-1, :, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["v"], self.dycore_state.v.data[:, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["w"], self.dycore_state.w.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["ua"], self.dycore_state.ua.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["va"], self.dycore_state.va.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["uc"], self.dycore_state.uc.data[:, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["vc"], self.dycore_state.vc.data[:-1, :, :-1] - ) + if self._fortran_mem_space != self._pace_mem_space: + pace.util.utils.safe_assign_array( + output_dict["u"], self.dycore_state.u.data[:-1, :, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["v"], self.dycore_state.v.data[:, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["w"], self.dycore_state.w.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["ua"], self.dycore_state.ua.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["va"], self.dycore_state.va.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["uc"], self.dycore_state.uc.data[:, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["vc"], self.dycore_state.vc.data[:-1, :, :-1] + ) - pace.util.utils.safe_assign_array( - output_dict["delz"], self.dycore_state.delz.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["pt"], self.dycore_state.pt.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["delp"], self.dycore_state.delp.data[:-1, :-1, :-1] - ) + pace.util.utils.safe_assign_array( + output_dict["delz"], self.dycore_state.delz.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["pt"], self.dycore_state.pt.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["delp"], self.dycore_state.delp.data[:-1, :-1, :-1] + ) - pace.util.utils.safe_assign_array( - output_dict["mfxd"], - self.dycore_state.mfxd.data[isc : iec + 1, jsc:jec, :-1], - ) - pace.util.utils.safe_assign_array( - output_dict["mfyd"], - self.dycore_state.mfyd.data[isc:iec, jsc : jec + 1, :-1], - ) - pace.util.utils.safe_assign_array( - output_dict["cxd"], self.dycore_state.cxd.data[isc : iec + 1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["cyd"], self.dycore_state.cyd.data[:-1, jsc : jec + 1, :-1] - ) + pace.util.utils.safe_assign_array( + output_dict["mfxd"], + self.dycore_state.mfxd.data[isc : iec + 1, jsc:jec, :-1], + ) + pace.util.utils.safe_assign_array( + output_dict["mfyd"], + self.dycore_state.mfyd.data[isc:iec, jsc : jec + 1, :-1], + ) + pace.util.utils.safe_assign_array( + output_dict["cxd"], self.dycore_state.cxd.data[isc : iec + 1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["cyd"], self.dycore_state.cyd.data[:-1, jsc : jec + 1, :-1] + ) - pace.util.utils.safe_assign_array( - output_dict["ps"], self.dycore_state.ps.data[:-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["pe"], - self.dycore_state.pe.data[isc - 1 : iec + 1, jsc - 1 : jec + 1, :], - ) - pace.util.utils.safe_assign_array( - output_dict["pk"], self.dycore_state.pk.data[isc:iec, jsc:jec, :] - ) - pace.util.utils.safe_assign_array( - output_dict["peln"], self.dycore_state.peln.data[isc:iec, jsc:jec, :] - ) - pace.util.utils.safe_assign_array( - output_dict["pkz"], self.dycore_state.pkz.data[isc:iec, jsc:jec, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["phis"], self.dycore_state.phis.data[:-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["q_con"], self.dycore_state.q_con.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["omga"], self.dycore_state.omga.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["diss_estd"], self.dycore_state.diss_estd.data[:-1, :-1, :-1] - ) + pace.util.utils.safe_assign_array( + output_dict["ps"], self.dycore_state.ps.data[:-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["pe"], + self.dycore_state.pe.data[isc - 1 : iec + 1, jsc - 1 : jec + 1, :], + ) + pace.util.utils.safe_assign_array( + output_dict["pk"], self.dycore_state.pk.data[isc:iec, jsc:jec, :] + ) + pace.util.utils.safe_assign_array( + output_dict["peln"], self.dycore_state.peln.data[isc:iec, jsc:jec, :] + ) + pace.util.utils.safe_assign_array( + output_dict["pkz"], self.dycore_state.pkz.data[isc:iec, jsc:jec, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["phis"], self.dycore_state.phis.data[:-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["q_con"], self.dycore_state.q_con.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["omga"], self.dycore_state.omga.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["diss_estd"], + self.dycore_state.diss_estd.data[:-1, :-1, :-1], + ) - pace.util.utils.safe_assign_array( - output_dict["qvapor"], self.dycore_state.qvapor.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["qliquid"], self.dycore_state.qliquid.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["qice"], self.dycore_state.qice.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["qrain"], self.dycore_state.qrain.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["qsnow"], self.dycore_state.qsnow.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["qgraupel"], self.dycore_state.qgraupel.data[:-1, :-1, :-1] - ) - pace.util.utils.safe_assign_array( - output_dict["qcld"], self.dycore_state.qcld.data[:-1, :-1, :-1] - ) + pace.util.utils.safe_assign_array( + output_dict["qvapor"], self.dycore_state.qvapor.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["qliquid"], self.dycore_state.qliquid.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["qice"], self.dycore_state.qice.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["qrain"], self.dycore_state.qrain.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["qsnow"], self.dycore_state.qsnow.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["qgraupel"], self.dycore_state.qgraupel.data[:-1, :-1, :-1] + ) + pace.util.utils.safe_assign_array( + output_dict["qcld"], self.dycore_state.qcld.data[:-1, :-1, :-1] + ) + else: + output_dict["u"] = self.dycore_state.u.data[:-1, :, :-1] + output_dict["v"] = self.dycore_state.v.data[:, :-1, :-1] + output_dict["w"] = self.dycore_state.w.data[:-1, :-1, :-1] + output_dict["ua"] = self.dycore_state.ua.data[:-1, :-1, :-1] + output_dict["va"] = self.dycore_state.va.data[:-1, :-1, :-1] + output_dict["uc"] = self.dycore_state.uc.data[:, :-1, :-1] + output_dict["vc"] = self.dycore_state.vc.data[:-1, :, :-1] + output_dict["delz"] = self.dycore_state.delz.data[:-1, :-1, :-1] + output_dict["pt"] = self.dycore_state.pt.data[:-1, :-1, :-1] + output_dict["delp"] = self.dycore_state.delp.data[:-1, :-1, :-1] + output_dict["mfxd"] = self.dycore_state.mfxd.data[ + isc : iec + 1, jsc:jec, :-1 + ] + output_dict["mfyd"] = self.dycore_state.mfyd.data[ + isc:iec, jsc : jec + 1, :-1 + ] + output_dict["cxd"] = self.dycore_state.cxd.data[isc : iec + 1, :-1, :-1] + output_dict["cyd"] = self.dycore_state.cyd.data[:-1, jsc : jec + 1, :-1] + output_dict["ps"] = self.dycore_state.ps.data[:-1, :-1] + output_dict["pe"] = self.dycore_state.pe.data[ + isc - 1 : iec + 1, jsc - 1 : jec + 1, : + ] + output_dict["pk"] = self.dycore_state.pk.data[isc:iec, jsc:jec, :] + output_dict["peln"] = self.dycore_state.peln.data[isc:iec, jsc:jec, :] + output_dict["pkz"] = self.dycore_state.pkz.data[isc:iec, jsc:jec, :-1] + output_dict["phis"] = self.dycore_state.phis.data[:-1, :-1] + output_dict["q_con"] = self.dycore_state.q_con.data[:-1, :-1, :-1] + output_dict["omga"] = self.dycore_state.omga.data[:-1, :-1, :-1] + output_dict["diss_estd"] = self.dycore_state.diss_estd.data[:-1, :-1, :-1] + output_dict["qvapor"] = self.dycore_state.qvapor.data[:-1, :-1, :-1] + output_dict["qliquid"] = self.dycore_state.qliquid.data[:-1, :-1, :-1] + output_dict["qice"] = self.dycore_state.qice.data[:-1, :-1, :-1] + output_dict["qrain"] = self.dycore_state.qrain.data[:-1, :-1, :-1] + output_dict["qsnow"] = self.dycore_state.qsnow.data[:-1, :-1, :-1] + output_dict["qgraupel"] = self.dycore_state.qgraupel.data[:-1, :-1, :-1] + output_dict["qcld"] = self.dycore_state.qcld.data[:-1, :-1, :-1] return output_dict def _allocate_output_dir(self): + if self._fortran_mem_space != self._pace_mem_space: + nhalo = self._grid_indexing.n_halo + shape_centered = self._grid_indexing.domain_full(add=(0, 0, 0)) + shape_x_interface = self._grid_indexing.domain_full(add=(1, 0, 0)) + shape_y_interface = self._grid_indexing.domain_full(add=(0, 1, 0)) + shape_z_interface = self._grid_indexing.domain_full(add=(0, 0, 1)) + shape_2d = shape_centered[:-1] + + self.output_dict["u"] = np.empty((shape_y_interface)) + self.output_dict["v"] = np.empty((shape_x_interface)) + self.output_dict["w"] = np.empty((shape_centered)) + self.output_dict["ua"] = np.empty((shape_centered)) + self.output_dict["va"] = np.empty((shape_centered)) + self.output_dict["uc"] = np.empty((shape_x_interface)) + self.output_dict["vc"] = np.empty((shape_y_interface)) + + self.output_dict["delz"] = np.empty((shape_centered)) + self.output_dict["pt"] = np.empty((shape_centered)) + self.output_dict["delp"] = np.empty((shape_centered)) + + self.output_dict["mfxd"] = np.empty( + (self._grid_indexing.domain_full(add=(1 - 2 * nhalo, -2 * nhalo, 0))) + ) + self.output_dict["mfyd"] = np.empty( + (self._grid_indexing.domain_full(add=(-2 * nhalo, 1 - 2 * nhalo, 0))) + ) + self.output_dict["cxd"] = np.empty( + (self._grid_indexing.domain_full(add=(1 - 2 * nhalo, 0, 0))) + ) + self.output_dict["cyd"] = np.empty( + (self._grid_indexing.domain_full(add=(0, 1 - 2 * nhalo, 0))) + ) - nhalo = self._grid_indexing.n_halo - shape_centered = self._grid_indexing.domain_full(add=(0, 0, 0)) - shape_x_interface = self._grid_indexing.domain_full(add=(1, 0, 0)) - shape_y_interface = self._grid_indexing.domain_full(add=(0, 1, 0)) - shape_z_interface = self._grid_indexing.domain_full(add=(0, 0, 1)) - shape_2d = shape_centered[:-1] - - self.output_dict["u"] = np.empty((shape_y_interface)) - self.output_dict["v"] = np.empty((shape_x_interface)) - self.output_dict["w"] = np.empty((shape_centered)) - self.output_dict["ua"] = np.empty((shape_centered)) - self.output_dict["va"] = np.empty((shape_centered)) - self.output_dict["uc"] = np.empty((shape_x_interface)) - self.output_dict["vc"] = np.empty((shape_y_interface)) - - self.output_dict["delz"] = np.empty((shape_centered)) - self.output_dict["pt"] = np.empty((shape_centered)) - self.output_dict["delp"] = np.empty((shape_centered)) - - self.output_dict["mfxd"] = np.empty( - (self._grid_indexing.domain_full(add=(1 - 2 * nhalo, -2 * nhalo, 0))) - ) - self.output_dict["mfyd"] = np.empty( - (self._grid_indexing.domain_full(add=(-2 * nhalo, 1 - 2 * nhalo, 0))) - ) - self.output_dict["cxd"] = np.empty( - (self._grid_indexing.domain_full(add=(1 - 2 * nhalo, 0, 0))) - ) - self.output_dict["cyd"] = np.empty( - (self._grid_indexing.domain_full(add=(0, 1 - 2 * nhalo, 0))) - ) - - self.output_dict["ps"] = np.empty((shape_2d)) - self.output_dict["pe"] = np.empty( - (self._grid_indexing.domain_full(add=(2 - 2 * nhalo, 2 - 2 * nhalo, 1))) - ) - self.output_dict["pk"] = np.empty( - (self._grid_indexing.domain_full(add=(-2 * nhalo, -2 * nhalo, 1))) - ) - self.output_dict["peln"] = np.empty( - (self._grid_indexing.domain_full(add=(-2 * nhalo, -2 * nhalo, 1))) - ) - self.output_dict["pkz"] = np.empty( - (self._grid_indexing.domain_full(add=(-2 * nhalo, -2 * nhalo, 0))) - ) - self.output_dict["phis"] = np.empty((shape_2d)) - self.output_dict["q_con"] = np.empty((shape_centered)) - self.output_dict["omga"] = np.empty((shape_centered)) - self.output_dict["diss_estd"] = np.empty((shape_centered)) - - self.output_dict["qvapor"] = np.empty((shape_centered)) - self.output_dict["qliquid"] = np.empty((shape_centered)) - self.output_dict["qice"] = np.empty((shape_centered)) - self.output_dict["qrain"] = np.empty((shape_centered)) - self.output_dict["qsnow"] = np.empty((shape_centered)) - self.output_dict["qgraupel"] = np.empty((shape_centered)) - self.output_dict["qcld"] = np.empty((shape_centered)) + self.output_dict["ps"] = np.empty((shape_2d)) + self.output_dict["pe"] = np.empty( + (self._grid_indexing.domain_full(add=(2 - 2 * nhalo, 2 - 2 * nhalo, 1))) + ) + self.output_dict["pk"] = np.empty( + (self._grid_indexing.domain_full(add=(-2 * nhalo, -2 * nhalo, 1))) + ) + self.output_dict["peln"] = np.empty( + (self._grid_indexing.domain_full(add=(-2 * nhalo, -2 * nhalo, 1))) + ) + self.output_dict["pkz"] = np.empty( + (self._grid_indexing.domain_full(add=(-2 * nhalo, -2 * nhalo, 0))) + ) + self.output_dict["phis"] = np.empty((shape_2d)) + self.output_dict["q_con"] = np.empty((shape_centered)) + self.output_dict["omga"] = np.empty((shape_centered)) + self.output_dict["diss_estd"] = np.empty((shape_centered)) + + self.output_dict["qvapor"] = np.empty((shape_centered)) + self.output_dict["qliquid"] = np.empty((shape_centered)) + self.output_dict["qice"] = np.empty((shape_centered)) + self.output_dict["qrain"] = np.empty((shape_centered)) + self.output_dict["qsnow"] = np.empty((shape_centered)) + self.output_dict["qgraupel"] = np.empty((shape_centered)) + self.output_dict["qcld"] = np.empty((shape_centered)) + else: + self.output_dict["u"] = None + self.output_dict["v"] = None + self.output_dict["w"] = None + self.output_dict["ua"] = None + self.output_dict["va"] = None + self.output_dict["uc"] = None + self.output_dict["vc"] = None + self.output_dict["delz"] = None + self.output_dict["pt"] = None + self.output_dict["delp"] = None + self.output_dict["mfxd"] = None + self.output_dict["mfyd"] = None + self.output_dict["cxd"] = None + self.output_dict["cyd"] = None + self.output_dict["ps"] = None + self.output_dict["pe"] = None + self.output_dict["pk"] = None + self.output_dict["peln"] = None + self.output_dict["pkz"] = None + self.output_dict["phis"] = None + self.output_dict["q_con"] = None + self.output_dict["omga"] = None + self.output_dict["diss_estd"] = None + self.output_dict["qvapor"] = None + self.output_dict["qliquid"] = None + self.output_dict["qice"] = None + self.output_dict["qrain"] = None + self.output_dict["qsnow"] = None + self.output_dict["qgraupel"] = None + self.output_dict["qcld"] = None From 33ba53f5378eaf0d1af627e5fa3e98ba82b4863e Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 28 Feb 2023 17:23:05 +0000 Subject: [PATCH 06/57] Add clear to collector for 3rd party use. GEOS pass down timings to caller --- driver/pace/driver/performance/collector.py | 4 ++++ .../fv3core/initialization/geos_wrapper.py | 23 ++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/driver/pace/driver/performance/collector.py b/driver/pace/driver/performance/collector.py index 272e72be..cbc6c62a 100644 --- a/driver/pace/driver/performance/collector.py +++ b/driver/pace/driver/performance/collector.py @@ -66,6 +66,10 @@ def __init__(self, experiment_name: str, comm: pace.util.Comm): self.experiment_name = experiment_name self.comm = comm + def clear(self): + self.times_per_step = [] + self.hits_per_step = [] + def collect_performance(self): """ Take the accumulated timings and flush them into a new entry diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 5c01b256..64fbb6ff 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -1,7 +1,7 @@ import enum import os from datetime import timedelta -from typing import Dict +from typing import Dict, List, Tuple import f90nml import numpy as np @@ -135,7 +135,7 @@ def __init__( def _critical_path(self): """Top-level orchestration function""" - with self.perf_collector.timestep_timer.clock("dycore"): + with self.perf_collector.timestep_timer.clock("step_dynamics"): self.dynamical_core.step_dynamics( state=self.dycore_state, timer=self.perf_collector.timestep_timer, @@ -143,6 +143,7 @@ def _critical_path(self): def __call__( self, + timings: Dict[str, List[float]], u: np.ndarray, v: np.ndarray, w: np.ndarray, @@ -167,7 +168,7 @@ def __call__( cxd: np.ndarray, cyd: np.ndarray, diss_estd: np.ndarray, - ) -> Dict[str, np.ndarray]: + ) -> Tuple[Dict[str, np.ndarray], Dict[str, List[float]]]: with self.perf_collector.timestep_timer.clock("move_to_pace"): self.dycore_state = self._put_fortran_data_in_dycore( @@ -206,14 +207,14 @@ def __call__( # Collect performance of the timestep and write # a json file for rank 0 self.perf_collector.collect_performance() - self.perf_collector.write_out_rank_0( - backend=self.backend, - is_orchestrated=self._is_orchestrated, - dt_atmos=self.dycore_config.dt_atmos, - sim_status="Ongoing", - ) - - return self.output_dict + for k, v in self.perf_collector.times_per_step[0].items(): + if k not in timings.keys(): + timings[k] = [v] + else: + timings[k].append(v) + self.perf_collector.clear() + + return self.output_dict, timings def _put_fortran_data_in_dycore( self, From 2327cbe6fa39b4b21aa85169ab056a232e013535 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 3 Mar 2023 20:36:19 +0000 Subject: [PATCH 07/57] Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format --- driver/pace/driver/tools.py | 38 +++++++++- dsl/pace/dsl/dace/utils.py | 144 +++++++++++++++++++++++++++--------- 2 files changed, 146 insertions(+), 36 deletions(-) diff --git a/driver/pace/driver/tools.py b/driver/pace/driver/tools.py index c1dc5181..c58eb6fa 100644 --- a/driver/pace/driver/tools.py +++ b/driver/pace/driver/tools.py @@ -27,14 +27,48 @@ type=click.STRING, ) @click.option("--report_detail", is_flag=True, type=click.BOOL, default=False) -def command_line(action: str, sdfg_path: Optional[str], report_detail: Optional[bool]): +@click.option( + "--hardware_bw_in_gb_s", + required=False, + type=click.FLOAT, + default=0.0, +) +@click.option( + "--output_format", + required=False, + type=click.STRING, + default=None, +) +@click.option( + "--backend", + required=False, + type=click.STRING, + default="dace:gpu", +) +def command_line( + action: str, + sdfg_path: Optional[str], + report_detail: Optional[bool], + hardware_bw_in_gb_s: Optional[float], + output_format: Optional[str], + backend: Optional[str], +): """ Run tooling. """ if action == ACTION_SDFG_MEMORY_STATIC_ANALYSIS: print(memory_static_analysis_from_path(sdfg_path, detail_report=report_detail)) elif action == ACTION_SDFG_KERNEL_THEORETICAL_TIMING: - print(kernel_theoretical_timing_from_path(sdfg_path)) + print( + kernel_theoretical_timing_from_path( + sdfg_path, + hardware_bw_in_GB_s=( + None if hardware_bw_in_gb_s == 0 else hardware_bw_in_gb_s + ), + backend=backend, + output_format=output_format, + ) + ) if __name__ == "__main__": diff --git a/dsl/pace/dsl/dace/utils.py b/dsl/pace/dsl/dace/utils.py index cb268d3b..68b61331 100644 --- a/dsl/pace/dsl/dace/utils.py +++ b/dsl/pace/dsl/dace/utils.py @@ -1,15 +1,20 @@ import time from dataclasses import dataclass, field -from typing import Dict, List +from typing import Dict, List, Optional import dace +import numpy as np from dace.transformation.helpers import get_parent_map from pace.dsl.dace.dace_config import DaceConfig +from pace.dsl.typing import Float +from pace.util._optional_imports import cupy as cp from pace.util.logging import pace_log +# ---------------------------------------------------------- # Rough timer & log for major operations of DaCe build stack +# ---------------------------------------------------------- class DaCeProgress: """Timer and log to track build progress""" @@ -48,6 +53,9 @@ def _is_ref(sd: dace.sdfg.SDFG, aname: str): return found +# ---------------------------------------------------------- +# Memory analyser from SDFG +# ---------------------------------------------------------- @dataclass class ArrayReport: name: str = "" @@ -175,19 +183,45 @@ def memory_static_analysis_from_path(sdfg_path: str, detail_report=False) -> str ) -# TODO (floriand): in order for the timing analysis to be realistic the reference -# bandwidth of the hardware should be measured with GT4Py & simple in/out copy -# stencils. This allows to both measure the _actual_ deployed hardware and -# size it against the current GT4Py version. -# Below we bypass this needed automation by writing the P100 bw on Piz Daint -# measured with the above strategy. -# A better tool would allow this measure with a simple command and allow -# a one command that measure bw & report kernel analysis in one command -_HARDWARE_BW_GB_S = {"P100": 492.0} +# ---------------------------------------------------------- +# Theoritical bandwith from SDFG +# ---------------------------------------------------------- + +from gt4py.cartesian.gtscript import PARALLEL, computation, interval + +from pace.dsl.stencil import CompilationConfig, FrozenStencil, StencilConfig +from pace.dsl.typing import FloatField + + +def copy_defn(q_in: FloatField, q_out: FloatField): + with computation(PARALLEL), interval(...): + q_in = q_out + + +class MaxBandwithBenchmarkProgram: + def __init__(self, size, backend) -> None: + from pace.dsl.dace.orchestration import DaCeOrchestration, orchestrate + + dconfig = DaceConfig(None, backend, orchestration=DaCeOrchestration.BuildAndRun) + c = CompilationConfig(backend=backend) + s = StencilConfig(dace_config=dconfig, compilation_config=c) + self.copy_stencil = FrozenStencil( + func=copy_defn, + origin=(0, 0, 0), + domain=size, + stencil_config=s, + ) + orchestrate(obj=self, config=dconfig) + + def __call__(self, A, B, n: int): + for i in dace.nounroll(range(n)): + self.copy_stencil(A, B) def kernel_theoretical_timing( - sdfg: dace.sdfg.SDFG, hardware="P100", hardware_bw_in_Gb_s=None + sdfg: dace.sdfg.SDFG, + hardware_bw_in_GB_s=None, + backend=None, ) -> Dict[str, float]: """Compute a lower timing bound for kernels with the following hypothesis: @@ -197,6 +231,36 @@ def kernel_theoretical_timing( - Memory pressure is mostly in read/write from global memory, inner scalar & shared memory is not counted towards memory movement. """ + if not hardware_bw_in_GB_s: + size = np.array(sdfg.arrays["__g_self__w"].shape) + print( + f"Calculating experimental hardware bandwith on {size}" + f" arrays at {Float} precision..." + ) + bench = MaxBandwithBenchmarkProgram(size, backend) + if backend == "dace:gpu": + A = cp.ones(size, dtype=Float) + B = cp.ones(size, dtype=Float) + else: + A = np.ones(size, dtype=Float) + B = np.ones(size, dtype=Float) + n = 1000 + m = 4 + dt = [] + bench(A, B, n) + # Time + for _ in range(m): + s = time.time() + bench(A, B, n) + dt.append((time.time() - s) / n) + memory_size_in_b = np.prod(size) * np.dtype(Float).itemsize * 8 + bandwidth_in_bytes_s = memory_size_in_b / np.median(dt) + print( + f"Hardware bandwith computed: {bandwidth_in_bytes_s/(1024*1024*1024)} GB/s" + ) + else: + bandwidth_in_bytes_s = hardware_bw_in_GB_s * 1024 * 1024 * 1024 + allmaps = [ (me, state) for me, state in sdfg.all_nodes_recursive() @@ -228,19 +292,6 @@ def kernel_theoretical_timing( ] ) - # Compute hardware memory bandwidth in bytes/us - if hardware_bw_in_Gb_s and hardware in _HARDWARE_BW_GB_S.keys(): - raise NotImplementedError("can't specify hardware bandwidth and hardware") - if hardware_bw_in_Gb_s: - bandwidth_in_bytes_s = hardware_bw_in_Gb_s * 1024 * 1024 * 1024 - elif hardware in _HARDWARE_BW_GB_S.keys(): - # Time it has to take (at least): bytes / bandwidth_in_bytes_s - bandwidth_in_bytes_s = _HARDWARE_BW_GB_S[hardware] * 1024 * 1024 * 1024 - else: - print( - f"Timing analysis: hardware {hardware} unknown and no bandwidth given" - ) - in_us = 1000 * 1000 # Theoretical fastest timing @@ -249,8 +300,9 @@ def kernel_theoretical_timing( except TypeError: newresult_in_us = (alldata_in_bytes / bandwidth_in_bytes_s) * in_us + import sympy + if node.label in result: - import sympy newresult_in_us = sympy.Max(result[node.label], newresult_in_us).expand() try: @@ -259,29 +311,53 @@ def kernel_theoretical_timing( pass # Bad expansion - if not isinstance(newresult_in_us, float): + if not isinstance(newresult_in_us, sympy.core.numbers.Float): continue - result[node.label] = newresult_in_us + result[node.label] = float(newresult_in_us) return result def report_kernel_theoretical_timing( - timings: Dict[str, float], human_readable: bool = True, csv: bool = False + timings: Dict[str, float], + human_readable: bool = True, + out_format: Optional[str] = None, ) -> str: """Produce a human readable or CSV of the kernel timings""" result_string = f"Maps processed: {len(timings)}.\n" if human_readable: result_string += "Timing in microseconds Map name:\n" result_string += "\n".join(f"{v:.2f}\t{k}," for k, v in sorted(timings.items())) - if csv: - result_string += "#Map name,timing in microseconds\n" - result_string += "\n".join(f"{k},{v}," for k, v in sorted(timings.items())) + if out_format == "csv": + csv_string = "" + csv_string += "#Map name,timing in microseconds\n" + csv_string += "\n".join(f"{k},{v}," for k, v in sorted(timings.items())) + with open("kernel_theoretical_timing.csv", "w") as f: + f.write(csv_string) + elif out_format == "json": + import json + + with open("kernel_theoretical_timing.json", "w") as f: + json.dump(timings, f, indent=2) + return result_string -def kernel_theoretical_timing_from_path(sdfg_path: str) -> str: +def kernel_theoretical_timing_from_path( + sdfg_path: str, + hardware_bw_in_GB_s: Optional[float] = None, + backend: Optional[str] = None, + output_format: Optional[str] = None, +) -> str: """Load an SDFG and report the theoretical kernel timings""" - timings = kernel_theoretical_timing(dace.SDFG.from_file(sdfg_path)) - return report_kernel_theoretical_timing(timings, human_readable=True, csv=False) + timings = kernel_theoretical_timing( + dace.SDFG.from_file(sdfg_path), + hardware_bw_in_GB_s=hardware_bw_in_GB_s, + backend=backend, + ) + return report_kernel_theoretical_timing( + timings, + human_readable=True, + out_format=output_format, + ) From cb4ec5f94bd7d1ed81d638a2dc1f1ded457710c0 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 3 Mar 2023 21:15:05 +0000 Subject: [PATCH 08/57] Move constant on a env var Add saturation adjustement threshold to const --- .../fv3core/stencils/saturation_adjustment.py | 6 +-- util/pace/util/constants.py | 51 +++++++++++++++---- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/fv3core/pace/fv3core/stencils/saturation_adjustment.py b/fv3core/pace/fv3core/stencils/saturation_adjustment.py index 7ffe45b5..f537909f 100644 --- a/fv3core/pace/fv3core/stencils/saturation_adjustment.py +++ b/fv3core/pace/fv3core/stencils/saturation_adjustment.py @@ -901,14 +901,14 @@ def satadjust( # icloud_f = 0: bug - fixed # icloud_f = 1: old fvgfs gfdl) mp implementation # icloud_f = 2: binary cloud scheme (0 / 1) - if rh > 0.75 and qpz > 1.0e-6: + if rh > 0.75 and qpz > constants.SAT_ADJUST_THRESHOLD: dq = hvar * qpz q_plus = qpz + dq q_minus = qpz - dq if icloud_f == 2: # TODO untested if qpz > qstar: qa = 1.0 - elif (qstar < q_plus) and (q_cond > 1.0e-6): + elif (qstar < q_plus) and (q_cond > constants.SAT_ADJUST_THRESHOLD): qa = min(1.0, ((q_plus - qstar) / dq) ** 2) else: qa = 0.0 @@ -924,7 +924,7 @@ def satadjust( else: qa = 0.0 # impose minimum cloudiness if substantial q_cond exist - if q_cond > 1.0e-6: + if q_cond > constants.SAT_ADJUST_THRESHOLD: qa = max(cld_min, qa) qa = min(1, qa) else: diff --git a/util/pace/util/constants.py b/util/pace/util/constants.py index 8eb5143b..9e86c6d7 100644 --- a/util/pace/util/constants.py +++ b/util/pace/util/constants.py @@ -1,3 +1,32 @@ +import os +from enum import Enum, EnumMeta + + +CONST_VERSION = os.environ.get("PACE_CONSTANTS", "GFS") + + +class MetaEnum(EnumMeta): + def __contains__(cls, item): + try: + cls(item) + except ValueError: + return False + return True + + +class BaseEnum(Enum, metaclass=MetaEnum): + pass + + +class ConstantVersions(BaseEnum): + DEFAULT = "" + GEOS = "GEOS" + GFS = "GFS" + + +if CONST_VERSION not in ConstantVersions: + raise NotImplementedError(f"Constant {CONST_VERSION} not implemented") + ROOT_RANK = 0 X_DIM = "x" X_INTERFACE_DIM = "x_interface" @@ -35,21 +64,21 @@ # The FV3GFS model ships with two sets of constants, one used in the GFS physics # package and the other used for the Dycore. Their difference are small but significant # Our Fortran executable on GCE has GFS_PHYS=True -CONST_VERSION = 'GEOS' -if CONST_VERSION == 'GEOS': +if CONST_VERSION == ConstantVersions.GEOS: RADIUS = 6.371e6 PI = 3.14159265358979323846 - OMEGA = 2.0*PI/86164.0 + OMEGA = 2.0 * PI / 86164.0 GRAV = 9.80665 RGRAV = 1.0 / GRAV - RDGAS = 8314.47/28.965 - RVGAS = 8314.47/18.015 - HLV = 2.4665E6 - HLF = 3.3370E5 - KAPPA = RDGAS/(3.5*RDGAS) - CP_AIR = RDGAS/KAPPA + RDGAS = 8314.47 / 28.965 + RVGAS = 8314.47 / 18.015 + HLV = 2.4665e6 + HLF = 3.3370e5 + KAPPA = RDGAS / (3.5 * RDGAS) + CP_AIR = RDGAS / KAPPA TFREEZE = 273.15 -elif CONST_VERSION == 'GFS': + SAT_ADJUST_THRESHOLD = 1.0e-6 +elif CONST_VERSION == ConstantVersions.GFS: RADIUS = 6.3712e6 # Radius of the Earth [m] PI = 3.1415926535897931 OMEGA = 7.2921e-5 # Rotation of the earth @@ -62,6 +91,7 @@ CP_AIR = 1004.6 KAPPA = RDGAS / CP_AIR # Specific heat capacity of dry air at TFREEZE = 273.15 + SAT_ADJUST_THRESHOLD = 1.0e-8 else: RADIUS = 6371.0e3 # Radius of the Earth [m] #6371.0e3 PI = 3.14159265358979323846 # 3.14159265358979323846 @@ -75,6 +105,7 @@ KAPPA = 2.0 / 7.0 CP_AIR = RDGAS / KAPPA # Specific heat capacity of dry air at TFREEZE = 273.16 # Freezing temperature of fresh water [K] + SAT_ADJUST_THRESHOLD = 1.0e-8 DZ_MIN = 2.0 CV_AIR = CP_AIR - RDGAS # Heat capacity of dry air at constant volume From 7348922d32eee811aa4060978cc62742ad3334e8 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 3 Mar 2023 21:15:30 +0000 Subject: [PATCH 09/57] lint --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 4 +++- fv3core/pace/fv3core/stencils/d_sw.py | 5 +---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index ea61ed7a..b832950f 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -16,7 +16,9 @@ class GeosDycoreWrapper: Takes numpy arrays as inputs, returns a dictionary of numpy arrays as outputs """ - def __init__(self, namelist: f90nml.Namelist, bdt: float, comm: pace.util.Comm, backend: str): + def __init__( + self, namelist: f90nml.Namelist, bdt: float, comm: pace.util.Comm, backend: str + ): # Make a custom performance collector for the GEOS wrapper self.perf_collector = PerformanceCollector("GEOS wrapper", comm) diff --git a/fv3core/pace/fv3core/stencils/d_sw.py b/fv3core/pace/fv3core/stencils/d_sw.py index 8be6dcd5..12db67ee 100644 --- a/fv3core/pace/fv3core/stencils/d_sw.py +++ b/fv3core/pace/fv3core/stencils/d_sw.py @@ -1242,10 +1242,7 @@ def __call__( self._column_namelist["d_con"], ) self._accumulate_heat_source_and_dissipation_estimate_stencil( - self._tmp_heat_s, - heat_source, - self._tmp_diss_e, - diss_est + self._tmp_heat_s, heat_source, self._tmp_diss_e, diss_est ) self._update_u_and_v_stencil( self._tmp_ut, From 131a2af86d5f1982e345634c5ef891db7b99f4d7 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 3 Mar 2023 21:36:20 +0000 Subject: [PATCH 10/57] More linting --- driver/pace/driver/grid.py | 2 +- fv3core/pace/fv3core/initialization/geos_wrapper.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/pace/driver/grid.py b/driver/pace/driver/grid.py index c4c40c19..4817869c 100644 --- a/driver/pace/driver/grid.py +++ b/driver/pace/driver/grid.py @@ -215,5 +215,5 @@ def _transform_horizontal_grid( grid.data[:, :, 0] = lon_transform[:] grid.data[:, :, 1] = lat_transform[:] - metric_terms._grid.data[:] = grid.data[:] + metric_terms._grid.data[:] = grid.data[:] # type: ignore[attr-defined] metric_terms._init_agrid() diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 340d8a5b..6ac49949 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -20,7 +20,7 @@ class GeosDycoreWrapper: def __init__( self, namelist: f90nml.Namelist, - bdt: float, + bdt: int, comm: pace.util.Comm, backend: str, ): From 89825429f6603d9dd4d9fbc3ba8616b93b9d179e Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 6 Mar 2023 21:01:13 +0000 Subject: [PATCH 11/57] Remove unused if leading to empty code block --- fv3core/pace/fv3core/stencils/d_sw.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fv3core/pace/fv3core/stencils/d_sw.py b/fv3core/pace/fv3core/stencils/d_sw.py index 93ad3f5e..b16b4690 100644 --- a/fv3core/pace/fv3core/stencils/d_sw.py +++ b/fv3core/pace/fv3core/stencils/d_sw.py @@ -566,10 +566,9 @@ def heat_source_from_vorticity_damping( heat_source - kinetic_energy_fraction_to_damp * dampterm ) - if __INLINED((d_con > dcon_threshold) or do_stochastic_ke_backscatter): + if __INLINED(do_stochastic_ke_backscatter): with horizontal(region[local_is : local_ie + 1, local_js : local_je + 1]): - if __INLINED(do_stochastic_ke_backscatter): - dissipation_estimate -= dampterm + dissipation_estimate -= dampterm def accumulate_heat_source_and_dissipation_estimate( From da2f902e00fd49b8f58ba53d36d67131c133b317 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 6 Mar 2023 21:53:26 +0000 Subject: [PATCH 12/57] Restrict dace to 0.14.1 due to a parsing bug --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 8318706a..9cfa0de2 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,7 +11,7 @@ dask>=2021.10.0 netCDF4 cftime fv3config>=0.9.0 -dace>=0.14.1 +dace=0.14.1 f90nml>=1.1.0 numpy>=1.15 -e external/gt4py From 27fae1c243b85699862ddb9fb58c38d42d22e52b Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 7 Mar 2023 18:51:56 +0000 Subject: [PATCH 13/57] Add guard for bdt==0 Fix bad merge for bdt with GEOS_Wrapper --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 8 ++++++++ fv3core/pace/fv3core/stencils/fv_dynamics.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 00351502..515518bc 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -11,6 +11,7 @@ from pace.driver.performance.collector import PerformanceCollector from pace.dsl.dace import DaceConfig, orchestrate from pace.dsl.gt4py_utils import is_gpu_backend +from pace.util.logging import pace_log @enum.unique @@ -108,6 +109,7 @@ def __init__( self.dycore_state = fv3core.DycoreState.init_zeros( quantity_factory=quantity_factory ) + self.dycore_state.bdt = self.dycore_config.dt_atmos damping_coefficients = pace.util.grid.DampingCoefficients.new_from_metric_terms( metric_terms @@ -133,6 +135,12 @@ def __init__( self.output_dict: Dict[str, np.ndarray] = {} self._allocate_output_dir() + pace_log.info( + "GEOS-Wrapper with: \n" + f" dt : {self.dycore_state.bdt}\n" + f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" + ) + def _critical_path(self): """Top-level orchestration function""" with self.perf_collector.timestep_timer.clock("step_dynamics"): diff --git a/fv3core/pace/fv3core/stencils/fv_dynamics.py b/fv3core/pace/fv3core/stencils/fv_dynamics.py index d7adba12..e5f0c269 100644 --- a/fv3core/pace/fv3core/stencils/fv_dynamics.py +++ b/fv3core/pace/fv3core/stencils/fv_dynamics.py @@ -179,6 +179,8 @@ def __init__( method_to_orchestrate="_checkpoint_tracer_advection_out", dace_compiletime_args=["state"], ) + if timestep == timedelta(seconds=0): + raise RuntimeError("Bad dynamical core configuration: bdt is 0") # nested and stretched_grid are options in the Fortran code which we # have not implemented, so they are hard-coded here. self.call_checkpointer = checkpointer is not None From 2f8ebac91a891e2fc732ff6b792e080d9fc226b6 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 7 Mar 2023 19:36:11 +0000 Subject: [PATCH 14/57] Remove unused code --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 64fbb6ff..cc7211c9 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -19,10 +19,6 @@ class MemorySpace(enum.Enum): DEVICE = 1 -def assign_no_copy(A, B): - A = B - - class GeosDycoreWrapper: """ Provides an interface for the Geos model to access the Pace dycore. From 81d00ced5704e94decd692b37100ce73e36620d7 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 28 Mar 2023 13:43:45 +0000 Subject: [PATCH 15/57] Fix theroritical timings Lint --- dsl/pace/dsl/dace/utils.py | 17 ++++++++--------- fv3core/pace/fv3core/stencils/d_sw.py | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/dsl/pace/dsl/dace/utils.py b/dsl/pace/dsl/dace/utils.py index 68b61331..4ba0247e 100644 --- a/dsl/pace/dsl/dace/utils.py +++ b/dsl/pace/dsl/dace/utils.py @@ -5,9 +5,11 @@ import dace import numpy as np from dace.transformation.helpers import get_parent_map +from gt4py.cartesian.gtscript import PARALLEL, computation, interval from pace.dsl.dace.dace_config import DaceConfig -from pace.dsl.typing import Float +from pace.dsl.stencil import CompilationConfig, FrozenStencil, StencilConfig +from pace.dsl.typing import Float, FloatField from pace.util._optional_imports import cupy as cp from pace.util.logging import pace_log @@ -186,13 +188,6 @@ def memory_static_analysis_from_path(sdfg_path: str, detail_report=False) -> str # ---------------------------------------------------------- # Theoritical bandwith from SDFG # ---------------------------------------------------------- - -from gt4py.cartesian.gtscript import PARALLEL, computation, interval - -from pace.dsl.stencil import CompilationConfig, FrozenStencil, StencilConfig -from pace.dsl.typing import FloatField - - def copy_defn(q_in: FloatField, q_out: FloatField): with computation(PARALLEL), interval(...): q_in = q_out @@ -260,6 +255,7 @@ def kernel_theoretical_timing( ) else: bandwidth_in_bytes_s = hardware_bw_in_GB_s * 1024 * 1024 * 1024 + print(f"Given hardware bandwith: {bandwidth_in_bytes_s/(1024*1024*1024)} GB/s") allmaps = [ (me, state) @@ -311,7 +307,9 @@ def kernel_theoretical_timing( pass # Bad expansion - if not isinstance(newresult_in_us, sympy.core.numbers.Float): + if not isinstance(newresult_in_us, sympy.core.numbers.Float) and not isinstance( + newresult_in_us, float + ): continue result[node.label] = float(newresult_in_us) @@ -351,6 +349,7 @@ def kernel_theoretical_timing_from_path( output_format: Optional[str] = None, ) -> str: """Load an SDFG and report the theoretical kernel timings""" + print(f"Running kernel_theoretical_timing for {sdfg_path}") timings = kernel_theoretical_timing( dace.SDFG.from_file(sdfg_path), hardware_bw_in_GB_s=hardware_bw_in_GB_s, diff --git a/fv3core/pace/fv3core/stencils/d_sw.py b/fv3core/pace/fv3core/stencils/d_sw.py index b16b4690..02ce9887 100644 --- a/fv3core/pace/fv3core/stencils/d_sw.py +++ b/fv3core/pace/fv3core/stencils/d_sw.py @@ -529,7 +529,7 @@ def heat_source_from_vorticity_damping( kinetic_energy_fraction_to_damp (in): the fraction of kinetic energy to explicitly damp and convert into heat. """ - from __externals__ import ( + from __externals__ import ( # noqa (see below) d_con, do_stochastic_ke_backscatter, local_ie, From 4891d560d59931611e561e7a9e4144fa73761fa6 Mon Sep 17 00:00:00 2001 From: Purnendu Chakraborty Date: Fri, 7 Apr 2023 12:41:46 -0400 Subject: [PATCH 16/57] Fixed a bug where pkz was being calculated twice, and the second calc was wrong --- fv3core/pace/fv3core/stencils/temperature_adjust.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fv3core/pace/fv3core/stencils/temperature_adjust.py b/fv3core/pace/fv3core/stencils/temperature_adjust.py index 33feb484..0226df38 100644 --- a/fv3core/pace/fv3core/stencils/temperature_adjust.py +++ b/fv3core/pace/fv3core/stencils/temperature_adjust.py @@ -29,7 +29,6 @@ def apply_diffusive_heating( """ with computation(PARALLEL), interval(...): pkz = exp(cappa / (1.0 - cappa) * log(constants.RDG * delp / delz * pt)) - pkz = (constants.RDG * delp / delz * pt) ** (cappa / (1.0 - cappa)) dtmp = heat_source / (constants.CV_AIR * delp) with computation(PARALLEL): with interval(0, 1): From fafbfc761a063a5fc4e7e5af2e1839f151072469 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 10 Apr 2023 13:11:27 +0000 Subject: [PATCH 17/57] Downgrade DaCe to 0.14.0 pending array aliasing fix --- constraints.txt | 2 +- requirements_dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/constraints.txt b/constraints.txt index 282e5988..dd122d08 100644 --- a/constraints.txt +++ b/constraints.txt @@ -87,7 +87,7 @@ cytoolz==0.11.2 # via # gt4py # gt4py (external/gt4py/setup.cfg) -dace==0.14.1 +dace==0.14.0 # via # -r requirements_dev.txt # pace-dsl diff --git a/requirements_dev.txt b/requirements_dev.txt index 9cfa0de2..a07db35a 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,7 +11,7 @@ dask>=2021.10.0 netCDF4 cftime fv3config>=0.9.0 -dace=0.14.1 +dace=0.14.0 f90nml>=1.1.0 numpy>=1.15 -e external/gt4py From 4fc5b4dfb82c5e702b66881e3c39f8f49878a0a1 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 10 Apr 2023 17:45:16 +0000 Subject: [PATCH 18/57] Set default cache path for orchestrated DaCe to respect GT_CACHE_* env --- dsl/pace/dsl/dace/build.py | 9 ++------- dsl/pace/dsl/dace/dace_config.py | 4 +++- dsl/pace/dsl/gt4py_utils.py | 9 +++++++++ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/dsl/pace/dsl/dace/build.py b/dsl/pace/dsl/dace/build.py index 7d8f3db2..cbdc4404 100644 --- a/dsl/pace/dsl/dace/build.py +++ b/dsl/pace/dsl/dace/build.py @@ -5,6 +5,7 @@ import pace.util from pace.dsl.dace.dace_config import DaceConfig, DaCeOrchestration +from pace.dsl.gt4py_utils import cache_path ################################################ @@ -124,9 +125,6 @@ def get_sdfg_path( ) return sdfg_file_path - # Case of loading a precompiled .so - lookup using GT_CACHE - from gt4py.cartesian import config as gt_config - if config.rank_size > 1: rank = config.my_rank rank_str = f"_{config.target_rank:06d}" @@ -134,10 +132,7 @@ def get_sdfg_path( rank = 0 rank_str = f"_{rank:06d}" - sdfg_dir_path = ( - f"{gt_config.cache_settings['root_path']}" - f"/.gt_cache{rank_str}/dacecache/{daceprog_name}" - ) + sdfg_dir_path = f"{cache_path(rank)}/dacecache/{daceprog_name}" if not os.path.isdir(sdfg_dir_path): raise RuntimeError(f"Precompiled SDFG is missing at {sdfg_dir_path}") diff --git a/dsl/pace/dsl/dace/dace_config.py b/dsl/pace/dsl/dace/dace_config.py index 961bf3ba..5481759d 100644 --- a/dsl/pace/dsl/dace/dace_config.py +++ b/dsl/pace/dsl/dace/dace_config.py @@ -5,7 +5,7 @@ from dace.codegen.compiled_sdfg import CompiledSDFG from dace.frontend.python.parser import DaceProgram -from pace.dsl.gt4py_utils import is_gpu_backend +from pace.dsl.gt4py_utils import cache_path, is_gpu_backend from pace.util._optional_imports import cupy as cp from pace.util.communicator import CubedSphereCommunicator @@ -198,6 +198,8 @@ def __init__( self.target_rank = 0 self.layout = (1, 1) + dace.config.Config.set("default_build_folder", value=cache_path(self.my_rank)) + set_distributed_caches(self) if ( diff --git a/dsl/pace/dsl/gt4py_utils.py b/dsl/pace/dsl/gt4py_utils.py index 7b033fee..0c446126 100644 --- a/dsl/pace/dsl/gt4py_utils.py +++ b/dsl/pace/dsl/gt4py_utils.py @@ -2,6 +2,7 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union import gt4py +import gt4py.cartesian.config as gt_config import numpy as np from pace.dsl.typing import DTypes, Field, Float @@ -483,3 +484,11 @@ def split_cartesian_into_storages(var: np.ndarray) -> Sequence[np.ndarray]: asarray(var, type(var))[:, :, cart], ) return var_data + + +def cache_path(rank: int) -> str: + rank_str = f"_{rank:06d}" + return ( + f"{gt_config.cache_settings['root_path']}" + f"/{gt_config.cache_settings['dir_name']}{rank_str}" + ) From 22450279075e746f44a1b6afce7a05b437b1f35d Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 11 Apr 2023 13:36:42 +0000 Subject: [PATCH 19/57] Remove previous per stencil override of default_build_folder --- dsl/pace/dsl/stencil.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dsl/pace/dsl/stencil.py b/dsl/pace/dsl/stencil.py index 3b0bd781..51832522 100644 --- a/dsl/pace/dsl/stencil.py +++ b/dsl/pace/dsl/stencil.py @@ -16,7 +16,6 @@ cast, ) -import dace import gt4py import numpy as np from gt4py.cartesian import gtscript @@ -322,14 +321,6 @@ def __init__( self._argument_names = tuple(inspect.getfullargspec(func).args) - if "dace" in self.stencil_config.compilation_config.backend: - dace.Config.set( - "default_build_folder", - value="{gt_cache}/dacecache".format( - gt_cache=gt4py.cartesian.config.cache_settings["dir_name"] - ), - ) - assert ( len(self._argument_names) > 0 ), "A stencil with no arguments? You may be double decorating" From 4f8fdc3bc5932dd020f0a836f602cf9e9311136f Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 11 Apr 2023 15:03:43 +0000 Subject: [PATCH 20/57] Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" This reverts commit 4fc5b4dfb82c5e702b66881e3c39f8f49878a0a1. --- dsl/pace/dsl/dace/build.py | 9 +++++++-- dsl/pace/dsl/dace/dace_config.py | 4 +--- dsl/pace/dsl/gt4py_utils.py | 9 --------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/dsl/pace/dsl/dace/build.py b/dsl/pace/dsl/dace/build.py index cbdc4404..7d8f3db2 100644 --- a/dsl/pace/dsl/dace/build.py +++ b/dsl/pace/dsl/dace/build.py @@ -5,7 +5,6 @@ import pace.util from pace.dsl.dace.dace_config import DaceConfig, DaCeOrchestration -from pace.dsl.gt4py_utils import cache_path ################################################ @@ -125,6 +124,9 @@ def get_sdfg_path( ) return sdfg_file_path + # Case of loading a precompiled .so - lookup using GT_CACHE + from gt4py.cartesian import config as gt_config + if config.rank_size > 1: rank = config.my_rank rank_str = f"_{config.target_rank:06d}" @@ -132,7 +134,10 @@ def get_sdfg_path( rank = 0 rank_str = f"_{rank:06d}" - sdfg_dir_path = f"{cache_path(rank)}/dacecache/{daceprog_name}" + sdfg_dir_path = ( + f"{gt_config.cache_settings['root_path']}" + f"/.gt_cache{rank_str}/dacecache/{daceprog_name}" + ) if not os.path.isdir(sdfg_dir_path): raise RuntimeError(f"Precompiled SDFG is missing at {sdfg_dir_path}") diff --git a/dsl/pace/dsl/dace/dace_config.py b/dsl/pace/dsl/dace/dace_config.py index 5481759d..961bf3ba 100644 --- a/dsl/pace/dsl/dace/dace_config.py +++ b/dsl/pace/dsl/dace/dace_config.py @@ -5,7 +5,7 @@ from dace.codegen.compiled_sdfg import CompiledSDFG from dace.frontend.python.parser import DaceProgram -from pace.dsl.gt4py_utils import cache_path, is_gpu_backend +from pace.dsl.gt4py_utils import is_gpu_backend from pace.util._optional_imports import cupy as cp from pace.util.communicator import CubedSphereCommunicator @@ -198,8 +198,6 @@ def __init__( self.target_rank = 0 self.layout = (1, 1) - dace.config.Config.set("default_build_folder", value=cache_path(self.my_rank)) - set_distributed_caches(self) if ( diff --git a/dsl/pace/dsl/gt4py_utils.py b/dsl/pace/dsl/gt4py_utils.py index 0c446126..7b033fee 100644 --- a/dsl/pace/dsl/gt4py_utils.py +++ b/dsl/pace/dsl/gt4py_utils.py @@ -2,7 +2,6 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union import gt4py -import gt4py.cartesian.config as gt_config import numpy as np from pace.dsl.typing import DTypes, Field, Float @@ -484,11 +483,3 @@ def split_cartesian_into_storages(var: np.ndarray) -> Sequence[np.ndarray]: asarray(var, type(var))[:, :, cart], ) return var_data - - -def cache_path(rank: int) -> str: - rank_str = f"_{rank:06d}" - return ( - f"{gt_config.cache_settings['root_path']}" - f"/{gt_config.cache_settings['dir_name']}{rank_str}" - ) From 47421a0c8ac33184379e9eb5ed555da4720378d3 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 11 Apr 2023 15:03:47 +0000 Subject: [PATCH 21/57] Revert "Remove previous per stencil override of default_build_folder" This reverts commit 22450279075e746f44a1b6afce7a05b437b1f35d. --- dsl/pace/dsl/stencil.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dsl/pace/dsl/stencil.py b/dsl/pace/dsl/stencil.py index 51832522..3b0bd781 100644 --- a/dsl/pace/dsl/stencil.py +++ b/dsl/pace/dsl/stencil.py @@ -16,6 +16,7 @@ cast, ) +import dace import gt4py import numpy as np from gt4py.cartesian import gtscript @@ -321,6 +322,14 @@ def __init__( self._argument_names = tuple(inspect.getfullargspec(func).args) + if "dace" in self.stencil_config.compilation_config.backend: + dace.Config.set( + "default_build_folder", + value="{gt_cache}/dacecache".format( + gt_cache=gt4py.cartesian.config.cache_settings["dir_name"] + ), + ) + assert ( len(self._argument_names) > 0 ), "A stencil with no arguments? You may be double decorating" From d51bc11598cb8fa1dadf64f97ed6f5fcd4cdde68 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 11 Apr 2023 15:07:44 +0000 Subject: [PATCH 22/57] Read cache_root in default dace backend --- dsl/pace/dsl/stencil.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dsl/pace/dsl/stencil.py b/dsl/pace/dsl/stencil.py index 3b0bd781..26454ef8 100644 --- a/dsl/pace/dsl/stencil.py +++ b/dsl/pace/dsl/stencil.py @@ -325,8 +325,9 @@ def __init__( if "dace" in self.stencil_config.compilation_config.backend: dace.Config.set( "default_build_folder", - value="{gt_cache}/dacecache".format( - gt_cache=gt4py.cartesian.config.cache_settings["dir_name"] + value="{gt_root}/{gt_cache}/dacecache".format( + gt_root=gt4py.cartesian.config.cache_settings["root_path"], + gt_cache=gt4py.cartesian.config.cache_settings["dir_name"], ), ) From 6bdd5958a4f68dc76fc4c8174620a15c8f1c8c6b Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 11 Apr 2023 15:09:12 +0000 Subject: [PATCH 23/57] Document faulty behavior with GT_CACHE_DIR_NAME --- doc_primer_orchestration.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc_primer_orchestration.md b/doc_primer_orchestration.md index 16fadd4d..a10baf0c 100644 --- a/doc_primer_orchestration.md +++ b/doc_primer_orchestration.md @@ -104,6 +104,10 @@ _Parsing errors_ DaCe cannot parse _any_ dynamic Python and any code that allocates memory on the fly (think list creation). It will also complain about any arguments it can't memory describe (remember `dace_compiletime_args` ). +_GT_CACHE_DIR_NAME_ + +We do not honor the `GT_CACHE_DIR_NAME` with orchestration. `GT_CACHE_ROOT` is respected. + Conclusion ---------- From 80cbb015559e15079a57bd04feccc9260035be8e Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 13 Apr 2023 18:15:15 +0000 Subject: [PATCH 24/57] Fix bad requirements syntax --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index a07db35a..052bf5c3 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,7 +11,7 @@ dask>=2021.10.0 netCDF4 cftime fv3config>=0.9.0 -dace=0.14.0 +dace==0.14.0 f90nml>=1.1.0 numpy>=1.15 -e external/gt4py From 40f24408a29cd28e50c430610e0009947b068611 Mon Sep 17 00:00:00 2001 From: Purnendu Chakraborty Date: Fri, 14 Apr 2023 17:08:24 -0400 Subject: [PATCH 25/57] Check for the string value of CONST_VERSION directly instead of enum --- util/pace/util/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/pace/util/constants.py b/util/pace/util/constants.py index 9e86c6d7..5d4f5f85 100644 --- a/util/pace/util/constants.py +++ b/util/pace/util/constants.py @@ -64,7 +64,7 @@ class ConstantVersions(BaseEnum): # The FV3GFS model ships with two sets of constants, one used in the GFS physics # package and the other used for the Dycore. Their difference are small but significant # Our Fortran executable on GCE has GFS_PHYS=True -if CONST_VERSION == ConstantVersions.GEOS: +if CONST_VERSION == "GEOS": RADIUS = 6.371e6 PI = 3.14159265358979323846 OMEGA = 2.0 * PI / 86164.0 @@ -78,7 +78,7 @@ class ConstantVersions(BaseEnum): CP_AIR = RDGAS / KAPPA TFREEZE = 273.15 SAT_ADJUST_THRESHOLD = 1.0e-6 -elif CONST_VERSION == ConstantVersions.GFS: +elif CONST_VERSION == "GFS": RADIUS = 6.3712e6 # Radius of the Earth [m] PI = 3.1415926535897931 OMEGA = 7.2921e-5 # Rotation of the earth From cae25a9430c040e45f15aee3d4ef232b7d933dd8 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 20 Apr 2023 09:21:41 -0400 Subject: [PATCH 26/57] Protect constant selection more rigorusly. Clean abort on unknown constant given --- util/pace/util/constants.py | 49 ++++++++++++++----------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/util/pace/util/constants.py b/util/pace/util/constants.py index 5d4f5f85..4819cc4d 100644 --- a/util/pace/util/constants.py +++ b/util/pace/util/constants.py @@ -1,31 +1,22 @@ import os -from enum import Enum, EnumMeta +from enum import Enum +from warnings import warn -CONST_VERSION = os.environ.get("PACE_CONSTANTS", "GFS") - - -class MetaEnum(EnumMeta): - def __contains__(cls, item): - try: - cls(item) - except ValueError: - return False - return True - - -class BaseEnum(Enum, metaclass=MetaEnum): - pass - - -class ConstantVersions(BaseEnum): - DEFAULT = "" - GEOS = "GEOS" - GFS = "GFS" +# The FV3GFS model ships with two sets of constants, one used in the GFS physics +# package and the other used for the Dycore. Their difference are small but significant +# In addition the GSFC's GEOS model as its own variables +class ConstantVersions(Enum): + FV3DYCORE = "FV3DYCORE" # NOAA's FV3 dynamical core constants (original port) + GFS = "GFS" # Constant as defined in NOAA GFS + GEOS = "GEOS" # Constant as defined in GEOS v13 -if CONST_VERSION not in ConstantVersions: - raise NotImplementedError(f"Constant {CONST_VERSION} not implemented") +CONST_VERSION_AS_STR = os.environ.get("PACE_CONSTANTS", "FV3DYCORE") +try: + CONST_VERSION = ConstantVersions[CONST_VERSION_AS_STR] +except KeyError as e: + raise RuntimeError(f"Constants {CONST_VERSION_AS_STR} is not implemented, abort.") ROOT_RANK = 0 X_DIM = "x" @@ -60,11 +51,7 @@ class ConstantVersions(BaseEnum): ##################### # Physical constants ##################### - -# The FV3GFS model ships with two sets of constants, one used in the GFS physics -# package and the other used for the Dycore. Their difference are small but significant -# Our Fortran executable on GCE has GFS_PHYS=True -if CONST_VERSION == "GEOS": +if CONST_VERSION == ConstantVersions.GEOS: RADIUS = 6.371e6 PI = 3.14159265358979323846 OMEGA = 2.0 * PI / 86164.0 @@ -78,7 +65,7 @@ class ConstantVersions(BaseEnum): CP_AIR = RDGAS / KAPPA TFREEZE = 273.15 SAT_ADJUST_THRESHOLD = 1.0e-6 -elif CONST_VERSION == "GFS": +elif CONST_VERSION == ConstantVersions.GFS: RADIUS = 6.3712e6 # Radius of the Earth [m] PI = 3.1415926535897931 OMEGA = 7.2921e-5 # Rotation of the earth @@ -92,7 +79,7 @@ class ConstantVersions(BaseEnum): KAPPA = RDGAS / CP_AIR # Specific heat capacity of dry air at TFREEZE = 273.15 SAT_ADJUST_THRESHOLD = 1.0e-8 -else: +elif CONST_VERSION == ConstantVersions.FV3DYCORE: RADIUS = 6371.0e3 # Radius of the Earth [m] #6371.0e3 PI = 3.14159265358979323846 # 3.14159265358979323846 OMEGA = 7.292e-5 # Rotation of the earth # 7.292e-5 @@ -106,6 +93,8 @@ class ConstantVersions(BaseEnum): CP_AIR = RDGAS / KAPPA # Specific heat capacity of dry air at TFREEZE = 273.16 # Freezing temperature of fresh water [K] SAT_ADJUST_THRESHOLD = 1.0e-8 +else: + raise RuntimeError("Constant selector failed, bad code.") DZ_MIN = 2.0 CV_AIR = CP_AIR - RDGAS # Heat capacity of dry air at constant volume From 915993e9dfe692e1db3bc7582bb3dcb9fe45535a Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 20 Apr 2023 09:26:15 -0400 Subject: [PATCH 27/57] Log constants selection --- util/pace/util/constants.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/util/pace/util/constants.py b/util/pace/util/constants.py index 4819cc4d..2a72d25c 100644 --- a/util/pace/util/constants.py +++ b/util/pace/util/constants.py @@ -1,7 +1,7 @@ import os from enum import Enum from warnings import warn - +from pace.util.logging import pace_log # The FV3GFS model ships with two sets of constants, one used in the GFS physics # package and the other used for the Dycore. Their difference are small but significant @@ -11,10 +11,11 @@ class ConstantVersions(Enum): GFS = "GFS" # Constant as defined in NOAA GFS GEOS = "GEOS" # Constant as defined in GEOS v13 - CONST_VERSION_AS_STR = os.environ.get("PACE_CONSTANTS", "FV3DYCORE") + try: CONST_VERSION = ConstantVersions[CONST_VERSION_AS_STR] + pace_log.info(f"Constant selected: {CONST_VERSION}") except KeyError as e: raise RuntimeError(f"Constants {CONST_VERSION_AS_STR} is not implemented, abort.") From c3e355c4f7558076d5fa75490c9700690b81c2d8 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 20 Apr 2023 12:10:28 -0400 Subject: [PATCH 28/57] Refactor NQ to constants.py --- fv3core/pace/fv3core/stencils/fv_dynamics.py | 21 ++++++++----------- .../translate/translate_init_case.py | 2 +- util/pace/util/constants.py | 20 ++++++++++++++++++ 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/fv3core/pace/fv3core/stencils/fv_dynamics.py b/fv3core/pace/fv3core/stencils/fv_dynamics.py index e5f0c269..8f5f8a7f 100644 --- a/fv3core/pace/fv3core/stencils/fv_dynamics.py +++ b/fv3core/pace/fv3core/stencils/fv_dynamics.py @@ -21,19 +21,12 @@ from pace.fv3core.stencils.neg_adj3 import AdjustNegativeTracerMixingRatio from pace.fv3core.stencils.remapping import LagrangianToEulerian from pace.stencils.c2l_ord import CubedToLatLon -from pace.util import X_DIM, Y_DIM, Z_INTERFACE_DIM, Timer +from pace.util import X_DIM, Y_DIM, Z_INTERFACE_DIM, Timer, constants from pace.util.grid import DampingCoefficients, GridData from pace.util.logging import pace_log from pace.util.mpi import MPI -# nq is actually given by ncnst - pnats, where those are given in atmosphere.F90 by: -# ncnst = Atm(mytile)%ncnst -# pnats = Atm(mytile)%flagstruct%pnats -# here we hard-coded it because 8 is the only supported value, refactor this later! -NQ = 9 # state.nq_tot - spec.namelist.dnats - - def pt_to_potential_density_pt( pkz: FloatField, dp_initial: FloatField, q_con: FloatField, pt: FloatField ): @@ -209,7 +202,7 @@ def __init__( ) self.tracers = {} - for name in utils.tracer_variables[0:NQ]: + for name in utils.tracer_variables[0:constants.NQ]: self.tracers[name] = state.__dict__[name] temporaries = fvdyn_temporaries(quantity_factory) @@ -284,7 +277,7 @@ def __init__( ) self._cappa = self.acoustic_dynamics.cappa - if not (not self.config.inline_q and NQ != 0): + if not (not self.config.inline_q and constants.NQ != 0): raise NotImplementedError("tracer_2d not implemented, turn on z_tracer") self._adjust_tracer_mixing_ratio = AdjustNegativeTracerMixingRatio( stencil_factory, @@ -298,7 +291,7 @@ def __init__( quantity_factory=quantity_factory, config=config.remapping, area_64=grid_data.area_64, - nq=NQ, + nq=constants.NQ, pfull=self._pfull, tracers=self.tracers, checkpointer=checkpointer, @@ -548,6 +541,11 @@ def _compute(self, state: DycoreState, timer: pace.util.Timer): log_on_rank_0("Remapping") with timer.clock("Remapping"): self._checkpoint_remapping_in(state) + + # TODO: When NQ=9, we shouldn't need to pass qcld explicitly + # since it's in self.tracers. It should not be an issue since + # we don't have self.tracers & qcld computation at the same time + # When NQ=8, we do need qcld passed explicitely self._lagrangian_to_eulerian_obj( self.tracers, state.pt, @@ -559,7 +557,6 @@ def _compute(self, state: DycoreState, timer: pace.util.Timer): state.w, self._cappa, state.q_con, - # Since NQ=9, we shouldn't need to pass qcld explicitly state.qcld, state.pkz, state.pk, diff --git a/fv3core/tests/savepoint/translate/translate_init_case.py b/fv3core/tests/savepoint/translate/translate_init_case.py index 9716aca3..5fe14d73 100644 --- a/fv3core/tests/savepoint/translate/translate_init_case.py +++ b/fv3core/tests/savepoint/translate/translate_init_case.py @@ -184,7 +184,7 @@ def outputs_from_state(self, state: dict): def compute_parallel(self, inputs, communicator): state = {} - full_shape = (*self.grid.domain_shape_full(add=(1, 1, 1)), fv_dynamics.NQ) + full_shape = (*self.grid.domain_shape_full(add=(1, 1, 1)), pace.util.constants.NQ) for variable, properties in self.outputs.items(): dims = properties["dims"] state[variable] = fv3util.Quantity( diff --git a/util/pace/util/constants.py b/util/pace/util/constants.py index 2a72d25c..7a481b95 100644 --- a/util/pace/util/constants.py +++ b/util/pace/util/constants.py @@ -19,6 +19,10 @@ class ConstantVersions(Enum): except KeyError as e: raise RuntimeError(f"Constants {CONST_VERSION_AS_STR} is not implemented, abort.") +##################### +# Common constants +##################### + ROOT_RANK = 0 X_DIM = "x" X_INTERFACE_DIM = "x_interface" @@ -49,6 +53,22 @@ class ConstantVersions(Enum): BOUNDARY_TYPES = EDGE_BOUNDARY_TYPES + CORNER_BOUNDARY_TYPES N_HALO_DEFAULT = 3 +####################### +# Tracers configuration +####################### + +# nq is actually given by ncnst - pnats, where those are given in atmosphere.F90 by: +# ncnst = Atm(mytile)%ncnst +# pnats = Atm(mytile)%flagstruct%pnats +# here we hard-coded it because 8 is the only supported value, refactor this later! +if CONST_VERSION == ConstantVersions.GEOS: + # 'qlcd' is exchanged in GEOS + NQ = 9 +elif ( CONST_VERSION == ConstantVersions.GFS or CONST_VERSION == ConstantVersions.FV3DYCORE ): + NQ = 8 +else: + raise RuntimeError("Constant selector failed, bad code.") + ##################### # Physical constants ##################### From cde11e817e9b3bc2b756daa86bbb5f0918e923dc Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 2 May 2023 11:33:16 -0400 Subject: [PATCH 29/57] Replace all logger with pace_log Introduce PACE_LOGLEVEL to control log level from outside --- README.md | 6 ++++ driver/pace/driver/run.py | 7 +++-- dsl/pace/dsl/dace/utils.py | 3 +- .../fv3core/initialization/geos_wrapper.py | 8 ++--- util/pace/util/communicator.py | 3 -- util/pace/util/local_comm.py | 7 ++--- util/pace/util/logging.py | 8 +++-- util/pace/util/monitor/netcdf_monitor.py | 9 ++---- util/pace/util/monitor/zarr_monitor.py | 9 ++---- util/pace/util/mpi.py | 30 +++++++++---------- 10 files changed, 45 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 31980394..7753fa73 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,12 @@ mpirun -n 6 --oversubscribe python3 -m pace.driver.run driver/examples/configs/b After the run completes, you will see an output direcotry `output.zarr`. An example to visualize the output is provided in `driver/examples/plot_output.py`. See the [driver example](driver/examples/README.md) section for more details. +### Environment variable configuration + +- `PACE_CONSTANTS`: Pace is bundled with various constants (see _util/pace/util/constants.py_). +- `PACE_FLOAT_PRECISION`: default precision of the field & scalars in the numerics. Default to 64. +- `PACE_LOGLEVEL`: logging level to display (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default to INFO. + ## Quickstart - Docker ### Build diff --git a/driver/pace/driver/run.py b/driver/pace/driver/run.py index 2d9160cd..7979d6ba 100644 --- a/driver/pace/driver/run.py +++ b/driver/pace/driver/run.py @@ -7,6 +7,7 @@ import yaml from pace.util.mpi import MPI +from pace.util import pace_log from .driver import Driver, DriverConfig @@ -76,11 +77,13 @@ def command_line(config_path: str, log_rank: Optional[int], log_level: str): CONFIG_PATH is the path to a DriverConfig yaml file. """ configure_logging(log_rank=log_rank, log_level=log_level) - logger.info("loading DriverConfig from yaml") + pace_log.info("loading DriverConfig from yaml") with open(config_path, "r") as f: config = yaml.safe_load(f) driver_config = DriverConfig.from_dict(config) - logging.info(f"DriverConfig loaded: {yaml.dump(dataclasses.asdict(driver_config))}") + pace_log.info( + f"DriverConfig loaded: {yaml.dump(dataclasses.asdict(driver_config))}" + ) main(driver_config=driver_config) diff --git a/dsl/pace/dsl/dace/utils.py b/dsl/pace/dsl/dace/utils.py index 4ba0247e..5c9f63ec 100644 --- a/dsl/pace/dsl/dace/utils.py +++ b/dsl/pace/dsl/dace/utils.py @@ -27,7 +27,7 @@ def __init__(self, config: DaceConfig, label: str): @classmethod def log(cls, prefix: str, message: str): - pace_log.info(f"{prefix} {message}") + pace_log.debug(f"{prefix} {message}") @classmethod def default_prefix(cls, config: DaceConfig) -> str: @@ -299,7 +299,6 @@ def kernel_theoretical_timing( import sympy if node.label in result: - newresult_in_us = sympy.Max(result[node.label], newresult_in_us).expand() try: newresult_in_us = float(newresult_in_us) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 4fc34052..2835e77e 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -132,9 +132,12 @@ def __init__( self._allocate_output_dir() pace_log.info( - "GEOS-Wrapper with: \n" + "Pace GEOS wrapper initialized: \n" f" dt : {self.dycore_state.bdt}\n" f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" + f" backend: {backend}\n" + f" orchestration: {self._is_orchestrated}\n" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})" ) def _critical_path(self): @@ -173,7 +176,6 @@ def __call__( cyd: np.ndarray, diss_estd: np.ndarray, ) -> Tuple[Dict[str, np.ndarray], Dict[str, List[float]]]: - with self.perf_collector.timestep_timer.clock("numpy-to-dycore"): self.dycore_state = self._put_fortran_data_in_dycore( u, @@ -246,7 +248,6 @@ def _put_fortran_data_in_dycore( cyd: np.ndarray, diss_estd: np.ndarray, ) -> fv3core.DycoreState: - isc = self._grid_indexing.isc jsc = self._grid_indexing.jsc iec = self._grid_indexing.iec + 1 @@ -315,7 +316,6 @@ def _put_fortran_data_in_dycore( return state def _prep_outputs_for_geos(self) -> Dict[str, np.ndarray]: - output_dict = self.output_dict isc = self._grid_indexing.isc jsc = self._grid_indexing.jsc diff --git a/util/pace/util/communicator.py b/util/pace/util/communicator.py index 0611fa98..938469bd 100644 --- a/util/pace/util/communicator.py +++ b/util/pace/util/communicator.py @@ -1,5 +1,4 @@ import abc -import logging from typing import List, Mapping, Optional, Sequence, Tuple, Union, cast import numpy as np @@ -15,8 +14,6 @@ from .utils import device_synchronize -logger = logging.getLogger("pace.util") - try: import cupy except ImportError: diff --git a/util/pace/util/local_comm.py b/util/pace/util/local_comm.py index a289296a..32fd0fb4 100644 --- a/util/pace/util/local_comm.py +++ b/util/pace/util/local_comm.py @@ -1,14 +1,11 @@ import copy -import logging from typing import Any from .comm import Comm +from .logging import pace_log from .utils import ensure_contiguous, safe_assign_array -logger = logging.getLogger("pace.util") - - class ConcurrencyError(Exception): """Exception to denote that a rank cannot proceed because it is waiting on a call from another rank.""" @@ -104,7 +101,7 @@ def bcast(self, value, root=0): "the bcast source" ) value = self._get_buffer("bcast", value) - logger.debug(f"bcast {value} to rank {self.rank}") + pace_log.debug(f"bcast {value} to rank {self.rank}") return value def Barrier(self): diff --git a/util/pace/util/logging.py b/util/pace/util/logging.py index 81b5a7b1..c0e9d0d7 100644 --- a/util/pace/util/logging.py +++ b/util/pace/util/logging.py @@ -1,15 +1,19 @@ import logging +import os import sys from mpi4py import MPI +LOGLEVEL = os.environ.get("PACE_LOGLEVEL", "INFO").upper() + + def _pace_logger(): name_log = logging.getLogger(__name__) - name_log.setLevel(logging.DEBUG) + name_log.setLevel(LOGLEVEL) handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.DEBUG) + handler.setLevel(LOGLEVEL) formatter = logging.Formatter( fmt=( f"%(asctime)s|%(levelname)s|rank {MPI.COMM_WORLD.Get_rank()}|" diff --git a/util/pace/util/monitor/netcdf_monitor.py b/util/pace/util/monitor/netcdf_monitor.py index 18687216..76e5ef31 100644 --- a/util/pace/util/monitor/netcdf_monitor.py +++ b/util/pace/util/monitor/netcdf_monitor.py @@ -10,13 +10,11 @@ from .. import _xarray as xr from ..filesystem import get_fs +from ..logging import pace_log from ..quantity import Quantity from .convert import to_numpy -logger = logging.getLogger(__name__) - - class _TimeChunkedVariable: def __init__(self, initial: Quantity, time_chunk_size: int): self._data = np.zeros( @@ -46,7 +44,6 @@ def data(self) -> Quantity: class _ChunkedNetCDFWriter: - FILENAME_FORMAT = "state_{chunk:04d}_tile{tile}.nc" def __init__( @@ -62,7 +59,7 @@ def __init__( self._time_units: Optional[str] = None def append(self, state): - logger.debug("appending at time %d", self._i_time) + pace_log.debug("appending at time %d", self._i_time) state = {**state} # copy so we don't mutate the input time = state.pop("time", None) if self._chunked is None: @@ -75,7 +72,7 @@ def append(self, state): self._chunked[name].append(quantity) self._times.append(time) if (self._i_time + 1) % self._time_chunk_size == 0: - logger.debug("flushing on append at time %d", self._i_time) + pace_log.debug("flushing on append at time %d", self._i_time) self.flush() self._i_time += 1 diff --git a/util/pace/util/monitor/zarr_monitor.py b/util/pace/util/monitor/zarr_monitor.py index 12186799..5d7729b9 100644 --- a/util/pace/util/monitor/zarr_monitor.py +++ b/util/pace/util/monitor/zarr_monitor.py @@ -1,4 +1,3 @@ -import logging from datetime import datetime, timedelta from typing import List, Tuple, Union @@ -7,12 +6,11 @@ from .. import _xarray as xr from .. import constants, utils from .._optional_imports import cupy, zarr +from ..logging import pace_log from ..partitioner import Partitioner, subtile_slice from .convert import to_numpy -logger = logging.getLogger("pace.util") - __all__ = ["ZarrMonitor"] @@ -238,7 +236,7 @@ def append(self, quantity): ) from_slice = _get_from_slice(target_slice) - logger.debug( + pace_log.debug( f"assigning data from subtile slice {from_slice} to " f"target slice {target_slice}" ) @@ -310,7 +308,7 @@ def append(self, quantity): ) from_slice = _get_from_slice(target_slice) - logger.debug( + pace_log.debug( f"assigning data from subtile slice {from_slice} to " f"target slice {target_slice}" ) @@ -332,7 +330,6 @@ def append(self, quantity): class _ZarrTimeWriter(_ZarrVariableWriter): - _TIME_CHUNK_SIZE = 1024 def __init__(self, *args, **kwargs): diff --git a/util/pace/util/mpi.py b/util/pace/util/mpi.py index d03b7937..5acc2b00 100644 --- a/util/pace/util/mpi.py +++ b/util/pace/util/mpi.py @@ -2,16 +2,14 @@ from mpi4py import MPI except ImportError: MPI = None -import logging from typing import List, Optional, TypeVar, cast from .comm import Comm, Request +from .logging import pace_log T = TypeVar("T") -logger = logging.getLogger(__name__) - class MPIComm(Comm): def __init__(self): @@ -26,54 +24,56 @@ def Get_size(self) -> int: return self._comm.Get_size() def bcast(self, value: Optional[T], root=0) -> T: - logger.debug("bcast from root %s on rank %s", root, self._comm.Get_rank()) + pace_log.debug("bcast from root %s on rank %s", root, self._comm.Get_rank()) return self._comm.bcast(value, root=root) def barrier(self): - logger.debug("barrier on rank %s", self._comm.Get_rank()) + pace_log.debug("barrier on rank %s", self._comm.Get_rank()) self._comm.barrier() def Barrier(self): pass def Scatter(self, sendbuf, recvbuf, root=0, **kwargs): - logger.debug("Scatter on rank %s with root %s", self._comm.Get_rank(), root) + pace_log.debug("Scatter on rank %s with root %s", self._comm.Get_rank(), root) self._comm.Scatter(sendbuf, recvbuf, root=root, **kwargs) def Gather(self, sendbuf, recvbuf, root=0, **kwargs): - logger.debug("Gather on rank %s with root %s", self._comm.Get_rank(), root) + pace_log.debug("Gather on rank %s with root %s", self._comm.Get_rank(), root) self._comm.Gather(sendbuf, recvbuf, root=root, **kwargs) def allgather(self, sendobj: T) -> List[T]: - logger.debug("allgather on rank %s", self._comm.Get_rank()) + pace_log.debug("allgather on rank %s", self._comm.Get_rank()) return self._comm.allgather(sendobj) def Send(self, sendbuf, dest, tag: int = 0, **kwargs): - logger.debug("Send on rank %s with dest %s", self._comm.Get_rank(), dest) + pace_log.debug("Send on rank %s with dest %s", self._comm.Get_rank(), dest) self._comm.Send(sendbuf, dest, tag=tag, **kwargs) def sendrecv(self, sendbuf, dest, **kwargs): - logger.debug("sendrecv on rank %s with dest %s", self._comm.Get_rank(), dest) + pace_log.debug("sendrecv on rank %s with dest %s", self._comm.Get_rank(), dest) return self._comm.sendrecv(sendbuf, dest, **kwargs) def Isend(self, sendbuf, dest, tag: int = 0, **kwargs) -> Request: - logger.debug("Isend on rank %s with dest %s", self._comm.Get_rank(), dest) + pace_log.debug("Isend on rank %s with dest %s", self._comm.Get_rank(), dest) return self._comm.Isend(sendbuf, dest, tag=tag, **kwargs) def Recv(self, recvbuf, source, tag: int = 0, **kwargs): - logger.debug("Recv on rank %s with source %s", self._comm.Get_rank(), source) + pace_log.debug("Recv on rank %s with source %s", self._comm.Get_rank(), source) self._comm.Recv(recvbuf, source, tag=tag, **kwargs) def Irecv(self, recvbuf, source, tag: int = 0, **kwargs) -> Request: - logger.debug("Irecv on rank %s with source %s", self._comm.Get_rank(), source) + pace_log.debug("Irecv on rank %s with source %s", self._comm.Get_rank(), source) return self._comm.Irecv(recvbuf, source, tag=tag, **kwargs) def Split(self, color, key) -> "Comm": - logger.debug( + pace_log.debug( "Split on rank %s with color %s, key %s", self._comm.Get_rank(), color, key ) return self._comm.Split(color, key) def allreduce(self, sendobj: T, op=None) -> T: - logger.debug("allreduce on rank %s with operator %s", self._comm.Get_rank(), op) + pace_log.debug( + "allreduce on rank %s with operator %s", self._comm.Get_rank(), op + ) return self._comm.allreduce(sendobj, op) From 9e6bbb6ecc0bc8574f9c2b79d1f5c663ddb70af8 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 2 May 2023 11:34:56 -0400 Subject: [PATCH 30/57] Code guidelines clean up --- fv3core/pace/fv3core/stencils/fv_dynamics.py | 8 ++++---- .../savepoint/translate/translate_init_case.py | 8 ++++---- util/pace/util/constants.py | 14 +++++++++----- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/fv3core/pace/fv3core/stencils/fv_dynamics.py b/fv3core/pace/fv3core/stencils/fv_dynamics.py index 8f5f8a7f..80b78e12 100644 --- a/fv3core/pace/fv3core/stencils/fv_dynamics.py +++ b/fv3core/pace/fv3core/stencils/fv_dynamics.py @@ -7,7 +7,6 @@ import pace.dsl.gt4py_utils as utils import pace.fv3core.stencils.moist_cv as moist_cv import pace.util -import pace.util.constants as constants from pace.dsl.dace.orchestration import dace_inhibitor, orchestrate from pace.dsl.dace.wrapped_halo_exchange import WrappedHaloUpdater from pace.dsl.stencil import StencilFactory @@ -202,7 +201,7 @@ def __init__( ) self.tracers = {} - for name in utils.tracer_variables[0:constants.NQ]: + for name in utils.tracer_variables[0 : constants.NQ]: self.tracers[name] = state.__dict__[name] temporaries = fvdyn_temporaries(quantity_factory) @@ -541,10 +540,11 @@ def _compute(self, state: DycoreState, timer: pace.util.Timer): log_on_rank_0("Remapping") with timer.clock("Remapping"): self._checkpoint_remapping_in(state) - + # TODO: When NQ=9, we shouldn't need to pass qcld explicitly # since it's in self.tracers. It should not be an issue since - # we don't have self.tracers & qcld computation at the same time + # we don't have self.tracers & qcld computation at + # the same time. # When NQ=8, we do need qcld passed explicitely self._lagrangian_to_eulerian_obj( self.tracers, diff --git a/fv3core/tests/savepoint/translate/translate_init_case.py b/fv3core/tests/savepoint/translate/translate_init_case.py index 5fe14d73..4a9fafc4 100644 --- a/fv3core/tests/savepoint/translate/translate_init_case.py +++ b/fv3core/tests/savepoint/translate/translate_init_case.py @@ -7,7 +7,6 @@ import pace.dsl.gt4py_utils as utils import pace.fv3core.initialization.baroclinic as baroclinic_init import pace.fv3core.initialization.baroclinic_jablonowski_williamson as jablo_init -import pace.fv3core.stencils.fv_dynamics as fv_dynamics import pace.util import pace.util as fv3util from pace.fv3core.testing import TranslateDycoreFortranData2Py @@ -17,7 +16,6 @@ class TranslateInitCase(ParallelTranslateBaseSlicing): - outputs: Dict[str, Any] = { "u": { "name": "x_wind", @@ -175,7 +173,6 @@ def outputs_from_state(self, state: dict): state[name][tracer] = state[name][tracer].data arrays[name] = state[name] elif len(self.outputs[name]["dims"]) > 0: - arrays[name] = state[name].data else: outputs[name] = state[name] # scalar @@ -184,7 +181,10 @@ def outputs_from_state(self, state: dict): def compute_parallel(self, inputs, communicator): state = {} - full_shape = (*self.grid.domain_shape_full(add=(1, 1, 1)), pace.util.constants.NQ) + full_shape = ( + *self.grid.domain_shape_full(add=(1, 1, 1)), + pace.util.constants.NQ, + ) for variable, properties in self.outputs.items(): dims = properties["dims"] state[variable] = fv3util.Quantity( diff --git a/util/pace/util/constants.py b/util/pace/util/constants.py index 7a481b95..ef57ed18 100644 --- a/util/pace/util/constants.py +++ b/util/pace/util/constants.py @@ -1,15 +1,17 @@ import os from enum import Enum -from warnings import warn + from pace.util.logging import pace_log + # The FV3GFS model ships with two sets of constants, one used in the GFS physics # package and the other used for the Dycore. Their difference are small but significant # In addition the GSFC's GEOS model as its own variables class ConstantVersions(Enum): FV3DYCORE = "FV3DYCORE" # NOAA's FV3 dynamical core constants (original port) - GFS = "GFS" # Constant as defined in NOAA GFS - GEOS = "GEOS" # Constant as defined in GEOS v13 + GFS = "GFS" # Constant as defined in NOAA GFS + GEOS = "GEOS" # Constant as defined in GEOS v13 + CONST_VERSION_AS_STR = os.environ.get("PACE_CONSTANTS", "FV3DYCORE") @@ -63,8 +65,10 @@ class ConstantVersions(Enum): # here we hard-coded it because 8 is the only supported value, refactor this later! if CONST_VERSION == ConstantVersions.GEOS: # 'qlcd' is exchanged in GEOS - NQ = 9 -elif ( CONST_VERSION == ConstantVersions.GFS or CONST_VERSION == ConstantVersions.FV3DYCORE ): + NQ = 9 +elif ( + CONST_VERSION == ConstantVersions.GFS or CONST_VERSION == ConstantVersions.FV3DYCORE +): NQ = 8 else: raise RuntimeError("Constant selector failed, bad code.") From 7e449cd8bbee4dfe7e614130beb56da598e07201 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 21 Jun 2023 12:28:17 -0400 Subject: [PATCH 31/57] Devops/GitHub actions on (#15) * Linting on PR * Run main unit test * Update python to available 3.8.12 * Remove cd to pace * Lint: git submodule recursive * Typo * Add openmpi to the image * Linting * Fix unit tests (remove dxa, dya rely on halo ex) * typo * Change name of jobs --- .github/workflows/lint.yml | 27 ++++++++++ .github/workflows/main_unit_tests.yml | 27 ++++++++++ driver/pace/driver/run.py | 2 +- tests/main/fv3core/test_init_from_geos.py | 60 +++++++++++++---------- tests/main/test_grid_init.py | 2 - util/pace/util/monitor/netcdf_monitor.py | 1 - 6 files changed, 88 insertions(+), 31 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/main_unit_tests.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..0cc08080 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,27 @@ +name: "Lint" +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout Pace repository + uses: actions/checkout@v3.5.2 + with: + submodules: 'recursive' + - name: Step Python 3.8.12 + uses: actions/setup-python@v4.6.0 + with: + python-version: '3.8.12' + - name: Install OpenMPI for gt4py + run: | + sudo apt-get install libopenmpi-dev + - name: Install Python packages + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt -r requirements_lint.txt + - name: Run lint via pre-commit + run: | + pre-commit run --all-files diff --git a/.github/workflows/main_unit_tests.yml b/.github/workflows/main_unit_tests.yml new file mode 100644 index 00000000..5dbf4a1f --- /dev/null +++ b/.github/workflows/main_unit_tests.yml @@ -0,0 +1,27 @@ +name: "Main unit tests" +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] + +jobs: + main_unit_tests: + runs-on: ubuntu-latest + steps: + - name: Checkout Pace repository + uses: actions/checkout@v3.5.2 + with: + submodules: 'recursive' + - name: Step Python 3.8.12 + uses: actions/setup-python@v4.6.0 + with: + python-version: '3.8.12' + - name: Install OpenMPI for gt4py + run: | + sudo apt-get install libopenmpi-dev + - name: Install Python packages + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + - name: Run all main tests + run: | + pytest -x tests/main diff --git a/driver/pace/driver/run.py b/driver/pace/driver/run.py index 7979d6ba..c8532ebd 100644 --- a/driver/pace/driver/run.py +++ b/driver/pace/driver/run.py @@ -6,8 +6,8 @@ import click import yaml -from pace.util.mpi import MPI from pace.util import pace_log +from pace.util.mpi import MPI from .driver import Driver, DriverConfig diff --git a/tests/main/fv3core/test_init_from_geos.py b/tests/main/fv3core/test_init_from_geos.py index 252fe7d2..16efe9e9 100644 --- a/tests/main/fv3core/test_init_from_geos.py +++ b/tests/main/fv3core/test_init_from_geos.py @@ -7,7 +7,6 @@ def test_geos_wrapper(): - namelist_dict = { "stencil_config": { "compilation_config": { @@ -82,7 +81,12 @@ def test_geos_wrapper(): comm = NullComm(rank=0, total_ranks=6, fill_value=0.0) backend = "numpy" - wrapper = fv3core.GeosDycoreWrapper(namelist, comm, backend) + wrapper = fv3core.GeosDycoreWrapper( + namelist=namelist, + comm=comm, + backend=backend, + bdt=namelist_dict["dt_atmos"], + ) nhalo = 3 shape_centered = ( namelist["nx_tile"] + 2 * nhalo, @@ -191,31 +195,33 @@ def test_geos_wrapper(): ) diss_estd = np.ones(shape_centered) - output_dict = wrapper( - u, - v, - w, - delz, - pt, - delp, - q, - ps, - pe, - pk, - peln, - pkz, - phis, - q_con, - omga, - ua, - va, - uc, - vc, - mfxd, - mfyd, - cxd, - cyd, - diss_estd, + timings = {} + output_dict, timings = wrapper( + timings=timings, + u=u, + v=v, + w=w, + delz=delz, + pt=pt, + delp=delp, + q=q, + ps=ps, + pe=pe, + pk=pk, + peln=peln, + pkz=pkz, + phis=phis, + q_con=q_con, + omga=omga, + ua=ua, + va=va, + uc=uc, + vc=vc, + mfxd=mfxd, + mfyd=mfyd, + cxd=cxd, + cyd=cyd, + diss_estd=diss_estd, ) assert isinstance(output_dict["u"], np.ndarray) diff --git a/tests/main/test_grid_init.py b/tests/main/test_grid_init.py index 942dcfd3..4c0e5e2b 100644 --- a/tests/main/test_grid_init.py +++ b/tests/main/test_grid_init.py @@ -51,8 +51,6 @@ def test_grid_init_not_decomposition_dependent(rank: int): assert allclose(metric_terms_1by1.area, metric_terms_3by3.area, partitioner, rank) assert allclose(metric_terms_1by1.dx, metric_terms_3by3.dx, partitioner, rank) assert allclose(metric_terms_1by1.dy, metric_terms_3by3.dy, partitioner, rank) - assert allclose(metric_terms_1by1.dxa, metric_terms_3by3.dxa, partitioner, rank) - assert allclose(metric_terms_1by1.dya, metric_terms_3by3.dya, partitioner, rank) assert allclose( metric_terms_1by1.cos_sg1, metric_terms_3by3.cos_sg1, partitioner, rank ) diff --git a/util/pace/util/monitor/netcdf_monitor.py b/util/pace/util/monitor/netcdf_monitor.py index 76e5ef31..0b39da60 100644 --- a/util/pace/util/monitor/netcdf_monitor.py +++ b/util/pace/util/monitor/netcdf_monitor.py @@ -1,4 +1,3 @@ -import logging import os from pathlib import Path from typing import Any, Dict, List, Optional, Set From e40d35615636e2e0557403a818f47324f9631cc3 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 21 Jun 2023 14:24:03 -0400 Subject: [PATCH 32/57] Distributed compilation on orchestrated backend for NxN layouts (#14) * Adapt orchestration distribute compile for NxN layout * Remove debug code * Add a more descriptive string base postfix for cache naming Identify the code path for all cases Consistent reload post-compile Create a central space for all caches generation logic No more original layout check required * Add a test on caches relocatability * Verbose todo * Linting on PR * Run main unit test * Update python to available 3.8.12 * Remove cd to pace * Lint: git submodule recursive * Typo * Add openmpi to the image * Linting * Fix unit tests (remove dxa, dya rely on halo ex) * typo * Change name of jobs * Missing enum * Lint imports * Fix unit tests * Deactivate relocability test due to Python crash Logged as issyue 16 * Typo * Raise for 1,X and X,1 layouts which requires a new descriptor --- dsl/pace/dsl/caches/cache_location.py | 46 +++++++ dsl/pace/dsl/caches/codepath.py | 33 +++++ dsl/pace/dsl/dace/build.py | 136 ++++--------------- dsl/pace/dsl/dace/dace_config.py | 111 ++++++++++++++-- dsl/pace/dsl/dace/orchestration.py | 43 ++----- tests/main/dsl/test_caches.py | 179 ++++++++++++++++++++++++++ tests/main/dsl/test_dace_config.py | 68 +++++++++- 7 files changed, 466 insertions(+), 150 deletions(-) create mode 100644 dsl/pace/dsl/caches/cache_location.py create mode 100644 dsl/pace/dsl/caches/codepath.py create mode 100644 tests/main/dsl/test_caches.py diff --git a/dsl/pace/dsl/caches/cache_location.py b/dsl/pace/dsl/caches/cache_location.py new file mode 100644 index 00000000..ab57a60b --- /dev/null +++ b/dsl/pace/dsl/caches/cache_location.py @@ -0,0 +1,46 @@ +from pace.dsl.caches.codepath import FV3CodePath +from pace.util import CubedSpherePartitioner + + +def identify_code_path( + rank: int, + partitioner: CubedSpherePartitioner, +) -> FV3CodePath: + if partitioner.layout == (1, 1) or partitioner.layout == [1, 1]: + return FV3CodePath.All + elif partitioner.layout[0] == 1 or partitioner.layout[1] == 1: + raise NotImplementedError( + f"Build for layout {partitioner.layout} is not handled" + ) + else: + if partitioner.tile.on_tile_bottom(rank): + if partitioner.tile.on_tile_left(rank): + return FV3CodePath.BottomLeft + if partitioner.tile.on_tile_right(rank): + return FV3CodePath.BottomRight + else: + return FV3CodePath.Bottom + if partitioner.tile.on_tile_top(rank): + if partitioner.tile.on_tile_left(rank): + return FV3CodePath.TopLeft + if partitioner.tile.on_tile_right(rank): + return FV3CodePath.TopRight + else: + return FV3CodePath.Top + else: + if partitioner.tile.on_tile_left(rank): + return FV3CodePath.Left + if partitioner.tile.on_tile_right(rank): + return FV3CodePath.Right + else: + return FV3CodePath.Center + + +def get_cache_fullpath(code_path: FV3CodePath) -> str: + from gt4py.cartesian import config as gt_config + + return f"{gt_config.cache_settings['root_path']}/.gt_cache_{code_path}" + + +def get_cache_directory(code_path: FV3CodePath) -> str: + return f".gt_cache_{code_path}" diff --git a/dsl/pace/dsl/caches/codepath.py b/dsl/pace/dsl/caches/codepath.py new file mode 100644 index 00000000..cb8327b5 --- /dev/null +++ b/dsl/pace/dsl/caches/codepath.py @@ -0,0 +1,33 @@ +import enum + + +class FV3CodePath(enum.Enum): + """Enum listing all possible code path on a cube sphere. + For any layout the cube sphere has up to 9 different code path, 10 + when counting the 1,1 layout which aggregates all 9. Those are related to + the positioning of the rank on the tile and which of the edge/corner case + it has to handle. + Since the framework inline code to optimize, we _cannot_ pre-suppose of the code + being kept and/or ejected. This enum serves as the ground truth to map rank to + the proper generated code. + """ + + All = "FV3_A" + BottomLeft = "FV3_BL" + Left = "FV3_L" + TopLeft = "FV3_TL" + Top = "FV3_T" + TopRight = "FV3_TR" + Right = "FV3_R" + BottomRight = "FV3_BR" + Bottom = "FV3_B" + Center = "FV3_C" + + def __str__(self): + return self.value + + def __repr__(self): + return self.value + + def __format__(self, format_spec: str) -> str: + return self.value diff --git a/dsl/pace/dsl/dace/build.py b/dsl/pace/dsl/dace/build.py index 7d8f3db2..b134f569 100644 --- a/dsl/pace/dsl/dace/build.py +++ b/dsl/pace/dsl/dace/build.py @@ -1,9 +1,9 @@ from typing import List, Optional, Tuple -from warnings import warn from dace.sdfg import SDFG import pace.util +from pace.dsl.caches.cache_location import get_cache_directory, get_cache_fullpath from pace.dsl.dace.dace_config import DaceConfig, DaCeOrchestration @@ -11,19 +11,6 @@ # Distributed compilation -def determine_compiling_ranks(config: DaceConfig) -> bool: - is_compiling = False - rank = config.my_rank - size = config.rank_size - - if int(size / 6) == 0: - is_compiling = True - elif rank % int(size / 6) == rank: - is_compiling = True - - return is_compiling - - def unblock_waiting_tiles(comm, sdfg_path: str) -> None: if comm and comm.Get_size() > 1: for tile in range(1, 6): @@ -31,48 +18,6 @@ def unblock_waiting_tiles(comm, sdfg_path: str) -> None: comm.send(sdfg_path, dest=tile * tilesize + comm.Get_rank()) -def get_target_rank(rank: int, partitioner: pace.util.CubedSpherePartitioner): - """From my rank & the current partitioner we determine which - rank we should read from. - For all layout >= 3,3 this presumes build has been done on a - 3,3 layout.""" - if partitioner.layout == (1, 1): - return 0 - if partitioner.layout == (2, 2): - if partitioner.tile.on_tile_bottom(rank): - if partitioner.tile.on_tile_left(rank): - return 0 # "00" - if partitioner.tile.on_tile_right(rank): - return 1 # "10" - if partitioner.tile.on_tile_top(rank): - if partitioner.tile.on_tile_left(rank): - return 2 # "01" - if partitioner.tile.on_tile_right(rank): - return 3 # "11" - else: - if partitioner.tile.on_tile_bottom(rank): - if partitioner.tile.on_tile_left(rank): - return 0 # "00" - if partitioner.tile.on_tile_right(rank): - return 2 # "20" - else: - return 1 # "10" - if partitioner.tile.on_tile_top(rank): - if partitioner.tile.on_tile_left(rank): - return 6 # "02" - if partitioner.tile.on_tile_right(rank): - return 8 # "22" - else: - return 7 # "12" - else: - if partitioner.tile.on_tile_left(rank): - return 3 # "01" - if partitioner.tile.on_tile_right(rank): - return 5 # "21" - else: - return 4 # "11" - - def build_info_filepath() -> str: return "build_info.txt" @@ -101,7 +46,10 @@ def write_build_info( def get_sdfg_path( - daceprog_name: str, config: DaceConfig, sdfg_file_path: Optional[str] = None + daceprog_name: str, + config: DaceConfig, + sdfg_file_path: Optional[str] = None, + override_run_only=False, ) -> Optional[str]: """Build an SDFG path from the qualified program name or it's direct path to .sdfg @@ -113,7 +61,7 @@ def get_sdfg_path( # TODO: check DaceConfig for cache.strategy == name # Guarding against bad usage of this function - if config.get_orchestrate() != DaCeOrchestration.Run: + if not override_run_only and config.get_orchestrate() != DaCeOrchestration.Run: return None # Case of a .sdfg file given by the user to be compiled @@ -125,19 +73,8 @@ def get_sdfg_path( return sdfg_file_path # Case of loading a precompiled .so - lookup using GT_CACHE - from gt4py.cartesian import config as gt_config - - if config.rank_size > 1: - rank = config.my_rank - rank_str = f"_{config.target_rank:06d}" - else: - rank = 0 - rank_str = f"_{rank:06d}" - - sdfg_dir_path = ( - f"{gt_config.cache_settings['root_path']}" - f"/.gt_cache{rank_str}/dacecache/{daceprog_name}" - ) + cache_fullpath = get_cache_fullpath(config.code_path) + sdfg_dir_path = f"{cache_fullpath}/dacecache/{daceprog_name}" if not os.path.isdir(sdfg_dir_path): raise RuntimeError(f"Precompiled SDFG is missing at {sdfg_dir_path}") @@ -153,23 +90,8 @@ def get_sdfg_path( raise RuntimeError( f"SDFG build for {build_backend}, {config._backend} has been asked" ) - # Check layout - build_layout = ast.literal_eval(build_info_file.readline()) - can_read = True - if config.layout == (1, 1) and config.layout != build_layout: - can_read = False - elif config.layout == (2, 2) and config.layout != build_layout: - can_read = False - elif ( - build_layout != (1, 1) and build_layout != (2, 2) and build_layout != (3, 3) - ): - can_read = False - if not can_read: - warn( - f"SDFG build for layout {build_layout}, " - f"cannot be run with current layout {config.layout}, bad layout?" - ) # Check resolution per tile + build_layout = ast.literal_eval(build_info_file.readline()) build_resolution = ast.literal_eval(build_info_file.readline()) if (config.tile_resolution[0] / config.layout[0]) != ( build_resolution[0] / build_layout[0] @@ -179,7 +101,7 @@ def get_sdfg_path( f"cannot be run with current resolution {config.tile_resolution}" ) - print(f"[DaCe Config] Rank {rank} loading SDFG {sdfg_dir_path}") + print(f"[DaCe Config] Rank {config.my_rank} loading SDFG {sdfg_dir_path}") return sdfg_dir_path @@ -189,33 +111,31 @@ def set_distributed_caches(config: "DaceConfig"): # Execute specific initialization per orchestration state orchestration_mode = config.get_orchestrate() + if orchestration_mode == DaCeOrchestration.Python: + return # Check that we have all the file we need to early out in case # of issues. if orchestration_mode == DaCeOrchestration.Run: import os - from gt4py.cartesian import config as gt_config - - # Check our cache exist - if config.rank_size > 1: - rank = config.my_rank - target_rank_str = f"_{config.target_rank:06d}" - else: - rank = 0 - target_rank_str = f"_{rank:06d}" - cache_filepath = ( - f"{gt_config.cache_settings['root_path']}/.gt_cache{target_rank_str}" - ) - if not os.path.exists(cache_filepath): + cache_directory = get_cache_fullpath(config.code_path) + if not os.path.exists(cache_directory): raise RuntimeError( f"{orchestration_mode} error: Could not find caches for rank " - f"{rank} at {cache_filepath}" + f"{config.my_rank} at {cache_directory}" ) - # All, good set this rank cache to the source cache - gt_config.cache_settings["dir_name"] = f".gt_cache{target_rank_str}" - print( - f"[{orchestration_mode}] Rank {rank} " - f"reading cache {gt_config.cache_settings['dir_name']}" - ) + # Set read/write caches to the target rank + from gt4py.cartesian import config as gt_config + + if config.do_compile: + verb = "reading/writing" + else: + verb = "reading" + + gt_config.cache_settings["dir_name"] = get_cache_directory(config.code_path) + pace.util.pace_log.critical( + f"[{orchestration_mode}] Rank {config.my_rank} " + f"{verb} cache {gt_config.cache_settings['dir_name']}" + ) diff --git a/dsl/pace/dsl/dace/dace_config.py b/dsl/pace/dsl/dace/dace_config.py index 961bf3ba..5e78c6bc 100644 --- a/dsl/pace/dsl/dace/dace_config.py +++ b/dsl/pace/dsl/dace/dace_config.py @@ -1,13 +1,15 @@ import enum -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple import dace.config from dace.codegen.compiled_sdfg import CompiledSDFG from dace.frontend.python.parser import DaceProgram +from pace.dsl.caches.cache_location import identify_code_path +from pace.dsl.caches.codepath import FV3CodePath from pace.dsl.gt4py_utils import is_gpu_backend from pace.util._optional_imports import cupy as cp -from pace.util.communicator import CubedSphereCommunicator +from pace.util.communicator import CubedSphereCommunicator, CubedSpherePartitioner # This can be turned on to revert compilation for orchestration @@ -16,6 +18,95 @@ DEACTIVATE_DISTRIBUTED_DACE_COMPILE = False +def _is_corner(rank: int, partitioner: CubedSpherePartitioner) -> bool: + if partitioner.tile.on_tile_bottom(rank): + if partitioner.tile.on_tile_left(rank): + return True + if partitioner.tile.on_tile_right(rank): + return True + if partitioner.tile.on_tile_top(rank): + if partitioner.tile.on_tile_left(rank): + return True + if partitioner.tile.on_tile_right(rank): + return True + return False + + +def _smallest_rank_bottom(x: int, y: int, layout: Tuple[int, int]): + return y == 0 and x == 1 + + +def _smallest_rank_top(x: int, y: int, layout: Tuple[int, int]): + return y == layout[1] - 1 and x == 1 + + +def _smallest_rank_left(x: int, y: int, layout: Tuple[int, int]): + return x == 0 and y == 1 + + +def _smallest_rank_right(x: int, y: int, layout: Tuple[int, int]): + return x == layout[0] - 1 and y == 1 + + +def _smallest_rank_middle(x: int, y: int, layout: Tuple[int, int]): + return layout[0] > 1 and layout[1] > 1 and x == 1 and y == 1 + + +def _determine_compiling_ranks( + config: "DaceConfig", + partitioner: CubedSpherePartitioner, +) -> bool: + """ + We try to map every layout to a 3x3 layout which MPI ranks + looks like + 6 7 8 + 3 4 5 + 0 1 2 + Using the partitionner we find mapping of the given layout + to all of those. For example on 4x4 layout + 12 13 14 15 + 8 9 10 11 + 4 5 6 7 + 0 1 2 3 + therefore we map + 0 -> 0 + 1 -> 1 + 2 -> NOT COMPILING + 3 -> 2 + 4 -> 3 + 5 -> 4 + 6 -> NOT COMPILING + 7 -> 5 + 8 -> NOT COMPILING + 9 -> NOT COMPILING + 10 -> NOT COMPILING + 11 -> NOT COMPILING + 12 -> 6 + 13 -> 7 + 14 -> NOT COMPILING + 15 -> 8 + """ + + # Tile 0 compiles + if partitioner.tile_index(config.my_rank) != 0: + return False + + # Corners compile + if _is_corner(config.my_rank, partitioner): + return True + + y, x = partitioner.tile.subtile_index(config.my_rank) + + # If edge or center tile, we give way to the smallest rank + return ( + _smallest_rank_left(x, y, config.layout) + or _smallest_rank_bottom(x, y, config.layout) + or _smallest_rank_middle(x, y, config.layout) + or _smallest_rank_right(x, y, config.layout) + or _smallest_rank_top(x, y, config.layout) + ) + + class DaCeOrchestration(enum.Enum): """ Orchestration mode for DaCe @@ -179,24 +270,24 @@ def __init__( self._backend = backend self.tile_resolution = [tile_nx, tile_nx, tile_nz] - from pace.dsl.dace.build import get_target_rank, set_distributed_caches + from pace.dsl.dace.build import set_distributed_caches # Distributed build required info if communicator: self.my_rank = communicator.rank self.rank_size = communicator.comm.Get_size() - if DEACTIVATE_DISTRIBUTED_DACE_COMPILE: - self.target_rank = communicator.rank - else: - self.target_rank = get_target_rank( - self.my_rank, communicator.partitioner - ) + self.code_path = identify_code_path(self.my_rank, communicator.partitioner) self.layout = communicator.partitioner.layout + self.do_compile = ( + DEACTIVATE_DISTRIBUTED_DACE_COMPILE + or _determine_compiling_ranks(self, communicator.partitioner) + ) else: self.my_rank = 0 self.rank_size = 1 - self.target_rank = 0 + self.code_path = FV3CodePath.All self.layout = (1, 1) + self.do_compile = True set_distributed_caches(self) diff --git a/dsl/pace/dsl/dace/orchestration.py b/dsl/pace/dsl/dace/orchestration.py index 2bd9df5b..1feca341 100644 --- a/dsl/pace/dsl/dace/orchestration.py +++ b/dsl/pace/dsl/dace/orchestration.py @@ -11,12 +11,7 @@ from dace.transformation.auto.auto_optimize import make_transients_persistent from dace.transformation.helpers import get_parent_map -from pace.dsl.dace.build import ( - determine_compiling_ranks, - get_sdfg_path, - unblock_waiting_tiles, - write_build_info, -) +from pace.dsl.dace.build import get_sdfg_path, write_build_info from pace.dsl.dace.dace_config import ( DEACTIVATE_DISTRIBUTED_DACE_COMPILE, DaceConfig, @@ -34,6 +29,7 @@ memory_static_analysis, report_memory_static_analysis, ) +from pace.util import pace_log from pace.util.mpi import MPI @@ -122,7 +118,7 @@ def _build_sdfg( if DEACTIVATE_DISTRIBUTED_DACE_COMPILE: is_compiling = True else: - is_compiling = determine_compiling_ranks(config) + is_compiling = config.do_compile if is_compiling: # Make the transients array persistents if config.is_gpu_backend(): @@ -212,31 +208,16 @@ def _build_sdfg( ) exit(0) elif config.get_orchestrate() == DaCeOrchestration.BuildAndRun: - MPI.COMM_WORLD.Barrier() - if is_compiling: - if not DEACTIVATE_DISTRIBUTED_DACE_COMPILE: - unblock_waiting_tiles(MPI.COMM_WORLD, sdfg.build_folder) - DaCeProgress.log( - DaCeProgress.default_prefix(config), "Build folder exchanged." - ) - csdfg, _ = daceprog.load_precompiled_sdfg( - sdfg.build_folder, *args, **kwargs - ) - config.loaded_precompiled_SDFG[daceprog] = FrozenCompiledSDFG( - daceprog, csdfg, args, kwargs - ) - - else: - source_rank = config.target_rank - # wait for compilation to be done + if not is_compiling: DaCeProgress.log( DaCeProgress.default_prefix(config), - "Rank is not compiling. Waiting for build dir...", - ) - sdfg_path = MPI.COMM_WORLD.recv(source=source_rank) - DaCeProgress.log( - DaCeProgress.default_prefix(config), "Build dir received, loading .so." + "Rank is not compiling. " + "Waiting for compilation to end on all other ranks...", ) + MPI.COMM_WORLD.Barrier() + + with DaCeProgress(config, "Loading"): + sdfg_path = get_sdfg_path(daceprog.name, config, override_run_only=True) csdfg, _ = daceprog.load_precompiled_sdfg(sdfg_path, *args, **kwargs) config.loaded_precompiled_SDFG[daceprog] = FrozenCompiledSDFG( daceprog, csdfg, args, kwargs @@ -267,6 +248,7 @@ def _call_sdfg( config.get_orchestrate() == DaCeOrchestration.Build or config.get_orchestrate() == DaCeOrchestration.BuildAndRun ): + pace_log.info("Building DaCe orchestration") res = _build_sdfg(daceprog, sdfg, config, args, kwargs) elif config.get_orchestrate() == DaCeOrchestration.Run: # We should never hit this, it should be caught by the @@ -302,7 +284,7 @@ def _parse_sdfg( if DEACTIVATE_DISTRIBUTED_DACE_COMPILE: is_compiling = True else: - is_compiling = determine_compiling_ranks(config) + is_compiling = config.do_compile if not is_compiling: # We can not parse the SDFG since we will load the proper # compiled SDFG from the compiling rank @@ -448,7 +430,6 @@ def __get__(self, obj, objtype=None) -> SDFGEnabledCallable: """Return SDFGEnabledCallable wrapping original obj.method from cache. Update cache first if need be""" if (id(obj), id(self.func)) not in _LazyComputepathMethod.bound_callables: - _LazyComputepathMethod.bound_callables[ (id(obj), id(self.func)) ] = _LazyComputepathMethod.SDFGEnabledCallable(self, obj) diff --git a/tests/main/dsl/test_caches.py b/tests/main/dsl/test_caches.py new file mode 100644 index 00000000..d5318493 --- /dev/null +++ b/tests/main/dsl/test_caches.py @@ -0,0 +1,179 @@ +import pytest +from gt4py.cartesian.gtscript import PARALLEL, Field, computation, interval +from gt4py.storage import empty, ones + +import pace.dsl +from pace.dsl.dace import orchestrate +from pace.dsl.dace.dace_config import DaceConfig, DaCeOrchestration +from pace.dsl.stencil import CompilationConfig, GridIndexing + + +def _make_storage( + func, + grid_indexing, + stencil_config: pace.dsl.StencilConfig, + *, + dtype=float, + aligned_index=(0, 0, 0), +): + return func( + backend=stencil_config.compilation_config.backend, + shape=grid_indexing.domain, + dtype=dtype, + aligned_index=aligned_index, + ) + + +def _stencil(inp: Field[float], out: Field[float], scalar: float): + with computation(PARALLEL), interval(...): + out = inp + + +def _build_stencil(backend, orchestrated: DaCeOrchestration): + # Make stencil and verify it ran + grid_indexing = GridIndexing( + domain=(5, 5, 5), + n_halo=2, + south_edge=True, + north_edge=True, + west_edge=True, + east_edge=True, + ) + + stencil_config = pace.dsl.StencilConfig( + compilation_config=CompilationConfig(backend=backend, rebuild=True), + dace_config=DaceConfig(None, backend, 5, 5, orchestrated), + ) + + stencil_factory = pace.dsl.StencilFactory(stencil_config, grid_indexing) + + built_stencil = stencil_factory.from_origin_domain( + _stencil, (0, 0, 0), domain=grid_indexing.domain + ) + + return built_stencil, grid_indexing, stencil_config + + +class OrchestratedProgam: + def __init__(self, backend, orchestration): + self.stencil, grid_indexing, stencil_config = _build_stencil( + backend, orchestration + ) + orchestrate(obj=self, config=stencil_config.dace_config) + self.inp = _make_storage(ones, grid_indexing, stencil_config, dtype=float) + self.out = _make_storage(empty, grid_indexing, stencil_config, dtype=float) + + def __call__(self): + self.stencil(self.inp, self.out, self.inp[0, 0, 0]) + + +@pytest.mark.parametrize( + "backend", + [ + pytest.param("dace:cpu"), + ], +) +def test_relocatability_orchestration(backend): + import os + import shutil + + from gt4py.cartesian import config as gt_config + + original_root_directory = gt_config.cache_settings["root_path"] + working_dir = str(os.getcwd()) + + # Compile on default + p0 = OrchestratedProgam(backend, DaCeOrchestration.BuildAndRun) + p0() + assert os.path.exists( + f"{working_dir}/.gt_cache_FV3_A/dacecache/" + "test_caches_OrchestratedProgam___call__", + ) or os.path.exists( + f"{working_dir}/.gt_cache_FV3_A/dacecache/OrchestratedProgam___call__", + ) + + # Compile in another directory + + custom_path = f"{working_dir}/.my_cache_path" + gt_config.cache_settings["root_path"] = custom_path + p1 = OrchestratedProgam(backend, DaCeOrchestration.BuildAndRun) + p1() + assert os.path.exists( + f"{custom_path}/.gt_cache_FV3_A/dacecache/" + "test_caches_OrchestratedProgam___call__", + ) or os.path.exists( + f"{working_dir}/.gt_cache_FV3_A/dacecache/OrchestratedProgam___call__", + ) + + # Check relocability by copying the second cache directory, + # changing the path of gt_config.cache_settings and trying to Run on it + relocated_path = f"{working_dir}/.my_relocated_cache_path" + shutil.copytree(custom_path, relocated_path, dirs_exist_ok=True) + gt_config.cache_settings["root_path"] = relocated_path + p2 = OrchestratedProgam(backend, DaCeOrchestration.Run) + p2() + + # Generate a file exists error to check for bad path + bogus_path = "./nope/notatall/nothappening" + gt_config.cache_settings["root_path"] = bogus_path + with pytest.raises(RuntimeError): + OrchestratedProgam(backend, DaCeOrchestration.Run) + + # Restore cache settings + gt_config.cache_settings["root_path"] = original_root_directory + + +@pytest.mark.parametrize( + "backend", + [ + pytest.param("gt:cpu_ifirst"), + pytest.param("dace:cpu"), + ], +) +def test_relocatability(backend): + # TODO: test work - but crashes when chained with other + # see https://github.com/GEOS-ESM/pace/issues/16 + pass + # import os + # import shutil + + # working_dir = os.getcwd() + + # # Compile on default + # p0 = OrchestratedProgam(backend, DaCeOrchestration.Python) + # p0() + # assert os.path.exists( + # f"{working_dir}/.gt_cache_000000/py38_1013/gtcpu_ifirst/__main__/_stencil/" + # ) + + # # Compile in another directory + # from gt4py.cartesian import config as gt_config + + # custom_path = f"{working_dir}/.my_cache_path" + # gt_config.cache_settings["root_path"] = custom_path + # p1 = OrchestratedProgam(backend, DaCeOrchestration.Python) + # p1() + # assert os.path.exists( + # f"{custom_path}/.gt_cache_000000/py38_1013/gtcpu_ifirst/__main__/_stencil/" + # ) + + # # Check relocability by copying the second cache directory, + # # changing the path of gt_config.cache_settings and trying to Run on it + # relocated_path = f"{working_dir}/.my_relocated_cache_path" + # shutil.copytree(custom_path, relocated_path, dirs_exist_ok=True) + # gt_config.cache_settings["root_path"] = relocated_path + # p2 = OrchestratedProgam(backend, DaCeOrchestration.Python) + # p2() + # assert os.path.exists( + # f"{relocated_path}/.gt_cache_000000/py38_1013/gtcpu_ifirst/__main__/_stencil/" + # ) + + +if __name__ == "__main__": + # TODO: test can be merged once gt4py also generates in the _FV3_X format + print("\n|> test_relocatability_orchestration('dace:cpu')\n") + test_relocatability_orchestration("dace:cpu") + print("\n|> test_relocatability('gt:cpu_ifirst')\n") + test_relocatability("gt:cpu_ifirst") + print("\n|> test_relocatability('dace:cpu')\n") + test_relocatability("dace:cpu") diff --git a/tests/main/dsl/test_dace_config.py b/tests/main/dsl/test_dace_config.py index 78553278..cb3566dd 100644 --- a/tests/main/dsl/test_dace_config.py +++ b/tests/main/dsl/test_dace_config.py @@ -1,11 +1,12 @@ import unittest.mock -from pace.dsl.dace.dace_config import DaceConfig +from pace.dsl.dace.dace_config import DaceConfig, _determine_compiling_ranks from pace.dsl.dace.orchestration import ( DaCeOrchestration, orchestrate, orchestrate_function, ) +from pace.util.communicator import CubedSpherePartitioner, TilePartitioner """ @@ -91,3 +92,68 @@ def foo(self): a = A() a.foo() assert not mock_call_sdfg.called + + +def test_orchestrate_distributed_build(): + dummy_dace_config = DaceConfig( + communicator=None, + backend="gtc:dace", + orchestration=DaCeOrchestration.BuildAndRun, + ) + + def _does_compile(rank, partitioner) -> bool: + dummy_dace_config.layout = partitioner.layout + dummy_dace_config.rank_size = partitioner.layout[0] * partitioner.layout[1] * 6 + dummy_dace_config.my_rank = rank + return _determine_compiling_ranks(dummy_dace_config, partitioner) + + # (1, 1) layout, one rank which compiles + cube_partitioner_11 = CubedSpherePartitioner(TilePartitioner((1, 1))) + assert _does_compile(0, cube_partitioner_11) + assert not _does_compile(1, cube_partitioner_11) # not compiling face + + # (2, 2) layout, 4 ranks, all compiling + cube_partitioner_22 = CubedSpherePartitioner(TilePartitioner((2, 2))) + assert _does_compile(0, cube_partitioner_22) + assert _does_compile(1, cube_partitioner_22) + assert _does_compile(2, cube_partitioner_22) + assert _does_compile(3, cube_partitioner_22) + assert not _does_compile(4, cube_partitioner_22) # not compiling face + + # (3, 3) layout, 9 ranks, all compiling + cube_partitioner_33 = CubedSpherePartitioner(TilePartitioner((3, 3))) + assert _does_compile(0, cube_partitioner_33) + assert _does_compile(1, cube_partitioner_33) + assert _does_compile(2, cube_partitioner_33) + assert _does_compile(3, cube_partitioner_33) + assert _does_compile(4, cube_partitioner_33) + assert _does_compile(5, cube_partitioner_33) + assert _does_compile(6, cube_partitioner_33) + assert _does_compile(7, cube_partitioner_33) + assert _does_compile(8, cube_partitioner_33) + assert not _does_compile(9, cube_partitioner_33) # not compiling face + + # (4, 4) layout, 16 ranks, + # expecting compiling:0, 1, 2, 3, 4, 5, 7, 12, 13, 15 + cube_partitioner_44 = CubedSpherePartitioner(TilePartitioner((4, 4))) + assert _does_compile(0, cube_partitioner_44) + assert _does_compile(1, cube_partitioner_44) + assert _does_compile(4, cube_partitioner_44) + assert _does_compile(5, cube_partitioner_44) + assert _does_compile(7, cube_partitioner_44) + assert _does_compile(12, cube_partitioner_44) + assert _does_compile(13, cube_partitioner_44) + assert _does_compile(15, cube_partitioner_44) + assert not _does_compile(2, cube_partitioner_44) # same code path as 3 + assert not _does_compile(6, cube_partitioner_44) # same code path as 5 + assert not _does_compile(8, cube_partitioner_44) # same code path as 4 + assert not _does_compile(11, cube_partitioner_44) # same code path as 7 + assert not _does_compile(16, cube_partitioner_44) # not compiling face + + # For a few other layouts, we check that we always have 9 compiling ranks + for layout in [(5, 5), (10, 10), (20, 20)]: + partition = CubedSpherePartitioner(TilePartitioner(layout)) + compiling = 0 + for i in range(layout[0] * layout[1] * 6): + compiling += 1 if _does_compile(i, partition) else 0 + assert compiling == 9 From 286ad0003d2d7e92878122995419d91c115c4cc5 Mon Sep 17 00:00:00 2001 From: Purnendu Chakraborty Date: Sun, 25 Jun 2023 19:38:29 -0400 Subject: [PATCH 33/57] Added ak, bk for 137 levels in eta.py --- util/pace/util/grid/eta.py | 291 ++++++++++++++++++++++++++++++++++++- 1 file changed, 290 insertions(+), 1 deletion(-) diff --git a/util/pace/util/grid/eta.py b/util/pace/util/grid/eta.py index 50afaadb..075bc920 100644 --- a/util/pace/util/grid/eta.py +++ b/util/pace/util/grid/eta.py @@ -559,10 +559,299 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ] ) + elif km == 137: + + ak = np.array( + [ + 1.00000000, + 1.82500005, + 3.00000000, + 4.63000011, + 6.82797718, + 9.74696636, + 13.6054239, + 18.6089306, + 24.9857178, + 32.9857101, + 42.8792419, + 54.9554634, + 69.5205765, + 86.8958817, + 107.415741, + 131.425507, + 159.279404, + 191.338562, + 227.968948, + 269.539581, + 316.420746, + 368.982361, + 427.592499, + 492.616028, + 564.413452, + 643.339905, + 729.744141, + 823.967834, + 926.344910, + 1037.20117, + 1156.85364, + 1285.61035, + 1423.77014, + 1571.62292, + 1729.44897, + 1897.51929, + 2076.09595, + 2265.43164, + 2465.77051, + 2677.34814, + 2900.39136, + 3135.11938, + 3381.74365, + 3640.46826, + 3911.49048, + 4194.93066, + 4490.81738, + 4799.14941, + 5119.89502, + 5452.99072, + 5798.34473, + 6156.07422, + 6526.94678, + 6911.87061, + 7311.86914, + 7727.41211, + 8159.35400, + 8608.52539, + 9076.40039, + 9562.68262, + 10065.9785, + 10584.6318, + 11116.6621, + 11660.0674, + 12211.5479, + 12766.8730, + 13324.6689, + 13881.3311, + 14432.1396, + 14975.6152, + 15508.2568, + 16026.1152, + 16527.3223, + 17008.7891, + 17467.6133, + 17901.6211, + 18308.4336, + 18685.7188, + 19031.2891, + 19343.5117, + 19620.0430, + 19859.3906, + 20059.9316, + 20219.6641, + 20337.8633, + 20412.3086, + 20442.0781, + 20425.7188, + 20361.8164, + 20249.5117, + 20087.0859, + 19874.0254, + 19608.5723, + 19290.2266, + 18917.4609, + 18489.7070, + 18006.9258, + 17471.8398, + 16888.6875, + 16262.0469, + 15596.6953, + 14898.4531, + 14173.3242, + 13427.7695, + 12668.2578, + 11901.3398, + 11133.3047, + 10370.1758, + 9617.51562, + 8880.45312, + 8163.37500, + 7470.34375, + 6804.42188, + 6168.53125, + 5564.38281, + 4993.79688, + 4457.37500, + 3955.96094, + 3489.23438, + 3057.26562, + 2659.14062, + 2294.24219, + 1961.50000, + 1659.47656, + 1387.54688, + 1143.25000, + 926.507812, + 734.992188, + 568.062500, + 424.414062, + 302.476562, + 202.484375, + 122.101562, + 62.7812500, + 22.8359375, + 3.75781298, + 0.00000000, + 0.00000000, + ] + ) + + bk = np.array( + [ + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 7.00000010E-06, + 2.40000008E-05, + 5.90000018E-05, + 1.12000002E-04, + 1.99000002E-04, + 3.39999999E-04, + 5.61999972E-04, + 8.90000025E-04, + 1.35300006E-03, + 1.99200003E-03, + 2.85700010E-03, + 3.97100020E-03, + 5.37799997E-03, + 7.13300006E-03, + 9.26099997E-03, + 1.18060000E-02, + 1.48160001E-02, + 1.83179993E-02, + 2.23549996E-02, + 2.69639995E-02, + 3.21759991E-02, + 3.80260013E-02, + 4.45480011E-02, + 5.17730005E-02, + 5.97280003E-02, + 6.84479997E-02, + 7.79580027E-02, + 8.82859975E-02, + 9.94620025E-02, + 0.111505002, + 0.124448001, + 0.138312995, + 0.153125003, + 0.168909997, + 0.185689002, + 0.203491002, + 0.222332999, + 0.242244005, + 0.263242006, + 0.285353988, + 0.308598012, + 0.332938999, + 0.358253986, + 0.384362996, + 0.411125004, + 0.438391000, + 0.466003001, + 0.493800014, + 0.521619022, + 0.549301028, + 0.576691985, + 0.603648007, + 0.630035996, + 0.655736029, + 0.680643022, + 0.704668999, + 0.727738976, + 0.749796987, + 0.770798028, + 0.790717006, + 0.809535980, + 0.827256024, + 0.843881011, + 0.859431982, + 0.873929024, + 0.887408018, + 0.899900019, + 0.911448002, + 0.922096014, + 0.931881011, + 0.940859973, + 0.949064016, + 0.956550002, + 0.963352025, + 0.969512999, + 0.975077987, + 0.980072021, + 0.984542012, + 0.988499999, + 0.991984010, + 0.995002985, + 0.997630000, + 1.00000000, + ] + ) + else: raise NotImplementedError( - "Only grids with 72, 79, or 91 vertical levels have been implemented so far" + "Only grids with 72, 79, 91 or 137 vertical levels have been implemented so far" ) + if 0.0 in bk: ks = 0 if km == 91 else np.where(bk == 0)[0][-1] ptop = ak[0] From c1e011cf1693d02491db910f33bab7ae027bfa11 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 6 Jul 2023 13:05:53 -0400 Subject: [PATCH 34/57] Add floating point precision to GEOS bridge init --- dsl/pace/dsl/typing.py | 6 +++++- fv3core/pace/fv3core/initialization/geos_wrapper.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dsl/pace/dsl/typing.py b/dsl/pace/dsl/typing.py index 05b255ce..d67dd7b6 100644 --- a/dsl/pace/dsl/typing.py +++ b/dsl/pace/dsl/typing.py @@ -22,11 +22,15 @@ DTypes = Union[bool, np.bool_, int, np.int32, np.int64, float, np.float32, np.float64] +def floating_point_precision() -> int: + return int(os.getenv("PACE_FLOAT_PRECISION", "64")) + + def global_set_floating_point_precision(): """Set the global floating point precision for all reference to Float in the codebase. Defaults to 64 bit.""" global Float - precision_in_bit = int(os.getenv("PACE_FLOAT_PRECISION", "64")) + precision_in_bit = floating_point_precision() if precision_in_bit == 64: return np.float64 elif precision_in_bit == 32: diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 2835e77e..9fbb98ab 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -10,6 +10,7 @@ from pace import fv3core from pace.driver.performance.collector import PerformanceCollector from pace.dsl.dace import DaceConfig, orchestrate +from pace.dsl.typing import floating_point_precision from pace.dsl.gt4py_utils import is_gpu_backend from pace.util.logging import pace_log @@ -136,6 +137,7 @@ def __init__( f" dt : {self.dycore_state.bdt}\n" f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" f" backend: {backend}\n" + f" float : {floating_point_precision()}bit" f" orchestration: {self._is_orchestrated}\n" f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})" ) From c58a2a126c83cc0a1014bfa824a7b78f7fd1cd19 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 6 Jul 2023 14:28:24 -0400 Subject: [PATCH 35/57] lint --- .../fv3core/initialization/geos_wrapper.py | 2 +- util/pace/util/grid/eta.py | 64 +++++++++---------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 9fbb98ab..8ca5c890 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -10,8 +10,8 @@ from pace import fv3core from pace.driver.performance.collector import PerformanceCollector from pace.dsl.dace import DaceConfig, orchestrate -from pace.dsl.typing import floating_point_precision from pace.dsl.gt4py_utils import is_gpu_backend +from pace.dsl.typing import floating_point_precision from pace.util.logging import pace_log diff --git a/util/pace/util/grid/eta.py b/util/pace/util/grid/eta.py index 075bc920..dc37aaa2 100644 --- a/util/pace/util/grid/eta.py +++ b/util/pace/util/grid/eta.py @@ -206,7 +206,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 91: - ak = np.array( [ 1.00000000, @@ -402,7 +401,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 72: - ak = np.array( [ 1.00000000, @@ -560,7 +558,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 137: - ak = np.array( [ 1.00000000, @@ -761,35 +758,35 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: 0.00000000, 0.00000000, 0.00000000, - 7.00000010E-06, - 2.40000008E-05, - 5.90000018E-05, - 1.12000002E-04, - 1.99000002E-04, - 3.39999999E-04, - 5.61999972E-04, - 8.90000025E-04, - 1.35300006E-03, - 1.99200003E-03, - 2.85700010E-03, - 3.97100020E-03, - 5.37799997E-03, - 7.13300006E-03, - 9.26099997E-03, - 1.18060000E-02, - 1.48160001E-02, - 1.83179993E-02, - 2.23549996E-02, - 2.69639995E-02, - 3.21759991E-02, - 3.80260013E-02, - 4.45480011E-02, - 5.17730005E-02, - 5.97280003E-02, - 6.84479997E-02, - 7.79580027E-02, - 8.82859975E-02, - 9.94620025E-02, + 7.00000010e-06, + 2.40000008e-05, + 5.90000018e-05, + 1.12000002e-04, + 1.99000002e-04, + 3.39999999e-04, + 5.61999972e-04, + 8.90000025e-04, + 1.35300006e-03, + 1.99200003e-03, + 2.85700010e-03, + 3.97100020e-03, + 5.37799997e-03, + 7.13300006e-03, + 9.26099997e-03, + 1.18060000e-02, + 1.48160001e-02, + 1.83179993e-02, + 2.23549996e-02, + 2.69639995e-02, + 3.21759991e-02, + 3.80260013e-02, + 4.45480011e-02, + 5.17730005e-02, + 5.97280003e-02, + 6.84479997e-02, + 7.79580027e-02, + 8.82859975e-02, + 9.94620025e-02, 0.111505002, 0.124448001, 0.138312995, @@ -849,7 +846,8 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: else: raise NotImplementedError( - "Only grids with 72, 79, 91 or 137 vertical levels have been implemented so far" + "Only grids with 72, 79, 91 or 137 vertical levels" + "have been implemented so far" ) if 0.0 in bk: From 8e362a2913360c6998a402c11c7132b4add3f3bb Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 7 Jul 2023 09:50:04 -0400 Subject: [PATCH 36/57] Add device PCI bus id (for MPS debug) --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 8ca5c890..87ce0193 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -5,6 +5,7 @@ import f90nml import numpy as np +from pace.util._optional_imports import cupy as cp import pace.util from pace import fv3core @@ -132,6 +133,11 @@ def __init__( self.output_dict: Dict[str, np.ndarray] = {} self._allocate_output_dir() + device_ordinal_info = "" + if is_gpu_backend(): + device_ordinal_info = ( + f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" + ) pace_log.info( "Pace GEOS wrapper initialized: \n" f" dt : {self.dycore_state.bdt}\n" @@ -139,7 +145,8 @@ def __init__( f" backend: {backend}\n" f" float : {floating_point_precision()}bit" f" orchestration: {self._is_orchestrated}\n" - f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})\n" + f" {device_ordinal_info}" ) def _critical_path(self): From adc5ee501a0b36b8478cf751012fc619ee832b44 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 7 Jul 2023 09:59:24 -0400 Subject: [PATCH 37/57] Typo + lint --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 87ce0193..7f8f05d3 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -5,7 +5,6 @@ import f90nml import numpy as np -from pace.util._optional_imports import cupy as cp import pace.util from pace import fv3core @@ -13,6 +12,7 @@ from pace.dsl.dace import DaceConfig, orchestrate from pace.dsl.gt4py_utils import is_gpu_backend from pace.dsl.typing import floating_point_precision +from pace.util._optional_imports import cupy as cp from pace.util.logging import pace_log @@ -134,7 +134,7 @@ def __init__( self._allocate_output_dir() device_ordinal_info = "" - if is_gpu_backend(): + if is_gpu_backend(backend): device_ordinal_info = ( f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" ) From 39ff8ead23a35754b455c89781f1413a087bfc6a Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 3 Aug 2023 15:31:14 -0400 Subject: [PATCH 38/57] Try to detect MPS reading the "log" pipe --- .../fv3core/initialization/geos_wrapper.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 7f8f05d3..8143da2b 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -133,20 +133,30 @@ def __init__( self.output_dict: Dict[str, np.ndarray] = {} self._allocate_output_dir() - device_ordinal_info = "" - if is_gpu_backend(backend): - device_ordinal_info = ( - f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" - ) + # Feedback information + device_ordinal_info = ( + f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" + if is_gpu_backend(backend) + else "N/A" + ) + MPS_pipe_directory = os.getenv("CUDA_MPS_PIPE_DIRECTORY", None) + MPS_is_on = ( + True + if MPS_pipe_directory + and is_gpu_backend(backend) + and os.path.exists(f"{MPS_pipe_directory}/log") + else False + ) pace_log.info( "Pace GEOS wrapper initialized: \n" - f" dt : {self.dycore_state.bdt}\n" - f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" - f" backend: {backend}\n" - f" float : {floating_point_precision()}bit" - f" orchestration: {self._is_orchestrated}\n" - f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})\n" + f" dt : {self.dycore_state.bdt}\n" + f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" + f" backend : {backend}\n" + f" float : {floating_point_precision()}bit" + f" orchestration : {self._is_orchestrated}\n" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})\n" f" {device_ordinal_info}" + f" Nvidia MPS : {MPS_is_on}" ) def _critical_path(self): From f2d171dc5903560c991932c5a14ccf15536012c8 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 8 Aug 2023 14:15:14 -0400 Subject: [PATCH 39/57] Lint --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 8143da2b..de0b944c 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -154,7 +154,8 @@ def __init__( f" backend : {backend}\n" f" float : {floating_point_precision()}bit" f" orchestration : {self._is_orchestrated}\n" - f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})\n" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz}" + f"(halo: {sizer.n_halo})\n" f" {device_ordinal_info}" f" Nvidia MPS : {MPS_is_on}" ) From ac70398415a3c7dcff9fa56a9385618a1113f8d9 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 8 Aug 2023 14:16:06 -0400 Subject: [PATCH 40/57] Clean up --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index de0b944c..f7133543 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -141,11 +141,9 @@ def __init__( ) MPS_pipe_directory = os.getenv("CUDA_MPS_PIPE_DIRECTORY", None) MPS_is_on = ( - True - if MPS_pipe_directory + MPS_pipe_directory and is_gpu_backend(backend) and os.path.exists(f"{MPS_pipe_directory}/log") - else False ) pace_log.info( "Pace GEOS wrapper initialized: \n" From 0a4163f742dff8af21f734802ab6d7bdafaba0ca Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 8 Aug 2023 14:21:51 -0400 Subject: [PATCH 41/57] Log info GEOS bridge (#18) * Add floating point precision to GEOS bridge init * lint * Add device PCI bus id (for MPS debug) * Typo + lint * Try to detect MPS reading the "log" pipe * Lint * Clean up --- dsl/pace/dsl/typing.py | 6 +- .../fv3core/initialization/geos_wrapper.py | 28 ++++++-- util/pace/util/grid/eta.py | 64 +++++++++---------- 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/dsl/pace/dsl/typing.py b/dsl/pace/dsl/typing.py index 05b255ce..d67dd7b6 100644 --- a/dsl/pace/dsl/typing.py +++ b/dsl/pace/dsl/typing.py @@ -22,11 +22,15 @@ DTypes = Union[bool, np.bool_, int, np.int32, np.int64, float, np.float32, np.float64] +def floating_point_precision() -> int: + return int(os.getenv("PACE_FLOAT_PRECISION", "64")) + + def global_set_floating_point_precision(): """Set the global floating point precision for all reference to Float in the codebase. Defaults to 64 bit.""" global Float - precision_in_bit = int(os.getenv("PACE_FLOAT_PRECISION", "64")) + precision_in_bit = floating_point_precision() if precision_in_bit == 64: return np.float64 elif precision_in_bit == 32: diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 2835e77e..f7133543 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -11,6 +11,8 @@ from pace.driver.performance.collector import PerformanceCollector from pace.dsl.dace import DaceConfig, orchestrate from pace.dsl.gt4py_utils import is_gpu_backend +from pace.dsl.typing import floating_point_precision +from pace.util._optional_imports import cupy as cp from pace.util.logging import pace_log @@ -131,13 +133,29 @@ def __init__( self.output_dict: Dict[str, np.ndarray] = {} self._allocate_output_dir() + # Feedback information + device_ordinal_info = ( + f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" + if is_gpu_backend(backend) + else "N/A" + ) + MPS_pipe_directory = os.getenv("CUDA_MPS_PIPE_DIRECTORY", None) + MPS_is_on = ( + MPS_pipe_directory + and is_gpu_backend(backend) + and os.path.exists(f"{MPS_pipe_directory}/log") + ) pace_log.info( "Pace GEOS wrapper initialized: \n" - f" dt : {self.dycore_state.bdt}\n" - f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" - f" backend: {backend}\n" - f" orchestration: {self._is_orchestrated}\n" - f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})" + f" dt : {self.dycore_state.bdt}\n" + f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" + f" backend : {backend}\n" + f" float : {floating_point_precision()}bit" + f" orchestration : {self._is_orchestrated}\n" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz}" + f"(halo: {sizer.n_halo})\n" + f" {device_ordinal_info}" + f" Nvidia MPS : {MPS_is_on}" ) def _critical_path(self): diff --git a/util/pace/util/grid/eta.py b/util/pace/util/grid/eta.py index 075bc920..dc37aaa2 100644 --- a/util/pace/util/grid/eta.py +++ b/util/pace/util/grid/eta.py @@ -206,7 +206,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 91: - ak = np.array( [ 1.00000000, @@ -402,7 +401,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 72: - ak = np.array( [ 1.00000000, @@ -560,7 +558,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 137: - ak = np.array( [ 1.00000000, @@ -761,35 +758,35 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: 0.00000000, 0.00000000, 0.00000000, - 7.00000010E-06, - 2.40000008E-05, - 5.90000018E-05, - 1.12000002E-04, - 1.99000002E-04, - 3.39999999E-04, - 5.61999972E-04, - 8.90000025E-04, - 1.35300006E-03, - 1.99200003E-03, - 2.85700010E-03, - 3.97100020E-03, - 5.37799997E-03, - 7.13300006E-03, - 9.26099997E-03, - 1.18060000E-02, - 1.48160001E-02, - 1.83179993E-02, - 2.23549996E-02, - 2.69639995E-02, - 3.21759991E-02, - 3.80260013E-02, - 4.45480011E-02, - 5.17730005E-02, - 5.97280003E-02, - 6.84479997E-02, - 7.79580027E-02, - 8.82859975E-02, - 9.94620025E-02, + 7.00000010e-06, + 2.40000008e-05, + 5.90000018e-05, + 1.12000002e-04, + 1.99000002e-04, + 3.39999999e-04, + 5.61999972e-04, + 8.90000025e-04, + 1.35300006e-03, + 1.99200003e-03, + 2.85700010e-03, + 3.97100020e-03, + 5.37799997e-03, + 7.13300006e-03, + 9.26099997e-03, + 1.18060000e-02, + 1.48160001e-02, + 1.83179993e-02, + 2.23549996e-02, + 2.69639995e-02, + 3.21759991e-02, + 3.80260013e-02, + 4.45480011e-02, + 5.17730005e-02, + 5.97280003e-02, + 6.84479997e-02, + 7.79580027e-02, + 8.82859975e-02, + 9.94620025e-02, 0.111505002, 0.124448001, 0.138312995, @@ -849,7 +846,8 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: else: raise NotImplementedError( - "Only grids with 72, 79, 91 or 137 vertical levels have been implemented so far" + "Only grids with 72, 79, 91 or 137 vertical levels" + "have been implemented so far" ) if 0.0 in bk: From 9d6d2f9d1dd281b873033cf1d818c1359f9c845b Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 15 Aug 2023 16:45:15 -0400 Subject: [PATCH 42/57] Update geos/develop to grab NOAA PR9 results (#21) * Verbose choice of block/grid size * added build script for c5 * updated repo to NOAA * GEOS integration (#9) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustement threshold to const * Remove unused if leading to empty code block * Restrict dace to 0.14.1 due to a parsing bug * Add guard for bdt==0 Fix bad merge for bdt with GEOS_Wrapper * Remove unused code * Fix theroritical timings * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Set default cache path for orchestrated DaCe to respect GT_CACHE_* env * Remove previous per stencil override of default_build_folder * Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" * Revert "Remove previous per stencil override of default_build_folder" * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Fix bad requirements syntax * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Fix or explain inlined import * Verbose runtime error when bad dt_atmos * Verbose warm up * re-initialize heat_source and diss_est each call, add do_skeb check to accumulation --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Oliver Elbert --------- Co-authored-by: Rusty Benson <6594772+bensonr@users.noreply.github.com> Co-authored-by: Oliver Elbert Co-authored-by: Purnendu Chakraborty Co-authored-by: Oliver Elbert --- dsl/pace/dsl/dace/dace_config.py | 4 +++ dsl/pace/dsl/dace/utils.py | 8 ++++-- .../{build_gaea.sh => build_gaea_c4.sh} | 2 +- examples/build_scripts/build_gaea_c5.sh | 27 +++++++++++++++++++ fv3core/pace/fv3core/stencils/d_sw.py | 24 +++++++++++------ fv3core/pace/fv3core/stencils/fv_dynamics.py | 9 ++++--- 6 files changed, 60 insertions(+), 14 deletions(-) rename examples/build_scripts/{build_gaea.sh => build_gaea_c4.sh} (91%) create mode 100644 examples/build_scripts/build_gaea_c5.sh diff --git a/dsl/pace/dsl/dace/dace_config.py b/dsl/pace/dsl/dace/dace_config.py index 5e78c6bc..6f2befff 100644 --- a/dsl/pace/dsl/dace/dace_config.py +++ b/dsl/pace/dsl/dace/dace_config.py @@ -213,6 +213,10 @@ def __init__( if cp: cuda_sm = cp.cuda.Device(0).compute_capability dace.config.Config.set("compiler", "cuda", "cuda_arch", value=f"{cuda_sm}") + # Block size/thread count is defaulted to an average value for recent + # hardware (Pascal and upward). The problem of setting an optimized + # block/thread is both hardware and problem dependant. Fine tuners + # available in DaCe should be relied on for futher tuning of this value. dace.config.Config.set( "compiler", "cuda", "default_block_size", value="64,8,1" ) diff --git a/dsl/pace/dsl/dace/utils.py b/dsl/pace/dsl/dace/utils.py index 5c9f63ec..40ac3c12 100644 --- a/dsl/pace/dsl/dace/utils.py +++ b/dsl/pace/dsl/dace/utils.py @@ -1,3 +1,4 @@ +import json import time from dataclasses import dataclass, field from typing import Dict, List, Optional @@ -242,6 +243,8 @@ def kernel_theoretical_timing( n = 1000 m = 4 dt = [] + # Warm up run (build, allocation) + # to remove from timing the common runtime bench(A, B, n) # Time for _ in range(m): @@ -296,6 +299,9 @@ def kernel_theoretical_timing( except TypeError: newresult_in_us = (alldata_in_bytes / bandwidth_in_bytes_s) * in_us + # We keep sympy import here because sympy is known to be a problematic + # import and an heavy module which should be avoided if possible. + # TODO: refactor it out by shadow-coding the sympy.Max/Eval functions import sympy if node.label in result: @@ -333,8 +339,6 @@ def report_kernel_theoretical_timing( with open("kernel_theoretical_timing.csv", "w") as f: f.write(csv_string) elif out_format == "json": - import json - with open("kernel_theoretical_timing.json", "w") as f: json.dump(timings, f, indent=2) diff --git a/examples/build_scripts/build_gaea.sh b/examples/build_scripts/build_gaea_c4.sh similarity index 91% rename from examples/build_scripts/build_gaea.sh rename to examples/build_scripts/build_gaea_c4.sh index b2f9d770..def7af3f 100644 --- a/examples/build_scripts/build_gaea.sh +++ b/examples/build_scripts/build_gaea_c4.sh @@ -13,7 +13,7 @@ module load boost/1.72.0 module load python/3.9 # clone Pace and update submodules -git clone --recursive https://github.com/ai2cm/pace +git clone --recursive https://github.com/NOAA-GFDL/pace cd pace # create a conda environment for pace diff --git a/examples/build_scripts/build_gaea_c5.sh b/examples/build_scripts/build_gaea_c5.sh new file mode 100644 index 00000000..94ad5611 --- /dev/null +++ b/examples/build_scripts/build_gaea_c5.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# Example bash script to install Pace to run bare-metal on Gaea's c4 cluster + +set -e -x + +# module load necessary system software +module rm PrgEnv-intel +module load PrgEnv-gnu +module rm gcc +module load gcc/12.2.0 +module load boost/1.79.0 +module load python/3.9 + +# clone Pace and update submodules +git clone --recursive https://github.com/NOAA-GFDL/pace +cd pace + +# create a conda environment for pace +conda create -y --name my_name python=3.8 + +# enter the environment and update it +conda activate my_name +pip3 install --upgrade pip setuptools wheel + +# install the Pace dependencies, GT4Py, and Pace +pip3 install -r requirements_dev.txt -c constraints.txt diff --git a/fv3core/pace/fv3core/stencils/d_sw.py b/fv3core/pace/fv3core/stencils/d_sw.py index 02ce9887..14155da4 100644 --- a/fv3core/pace/fv3core/stencils/d_sw.py +++ b/fv3core/pace/fv3core/stencils/d_sw.py @@ -751,6 +751,8 @@ def __init__( orchestrate(obj=self, config=stencil_factory.config.dace_config) self.grid_data = grid_data self._f0 = self.grid_data.fC_agrid + self._d_con = config.d_con + self._do_stochastic_ke_backscatter = config.do_skeb self.grid_indexing = stencil_factory.grid_indexing assert config.grid_type < 3, "ubke and vbke only implemented for grid_type < 3" @@ -927,12 +929,15 @@ def make_quantity(): }, ) ) - self._accumulate_heat_source_and_dissipation_estimate_stencil = ( - stencil_factory.from_dims_halo( - func=accumulate_heat_source_and_dissipation_estimate, - compute_dims=[X_DIM, Y_DIM, Z_DIM], + + if (self._d_con > 1.0e-5) or (self._do_stochastic_ke_backscatter): + self._accumulate_heat_source_and_dissipation_estimate_stencil = ( + stencil_factory.from_dims_halo( + func=accumulate_heat_source_and_dissipation_estimate, + compute_dims=[X_DIM, Y_DIM, Z_DIM], + ) ) - ) + self._compute_vorticity_stencil = stencil_factory.from_dims_halo( compute_vorticity, compute_dims=[X_DIM, Y_DIM, Z_DIM], @@ -1246,9 +1251,12 @@ def __call__( self._tmp_diss_e, self._column_namelist["d_con"], ) - self._accumulate_heat_source_and_dissipation_estimate_stencil( - self._tmp_heat_s, heat_source, self._tmp_diss_e, diss_est - ) + + if (self._d_con > 1.0e-5) or (self._do_stochastic_ke_backscatter): + self._accumulate_heat_source_and_dissipation_estimate_stencil( + self._tmp_heat_s, heat_source, self._tmp_diss_e, diss_est + ) + self._update_u_and_v_stencil( self._tmp_ut, self._tmp_vt, diff --git a/fv3core/pace/fv3core/stencils/fv_dynamics.py b/fv3core/pace/fv3core/stencils/fv_dynamics.py index 80b78e12..5f3de73a 100644 --- a/fv3core/pace/fv3core/stencils/fv_dynamics.py +++ b/fv3core/pace/fv3core/stencils/fv_dynamics.py @@ -172,7 +172,10 @@ def __init__( dace_compiletime_args=["state"], ) if timestep == timedelta(seconds=0): - raise RuntimeError("Bad dynamical core configuration: bdt is 0") + raise RuntimeError( + "Bad dynamical core configuration:" + " the atmospheric timestep is 0 seconds!" + ) # nested and stretched_grid are options in the Fortran code which we # have not implemented, so they are hard-coded here. self.call_checkpointer = checkpointer is not None @@ -543,8 +546,8 @@ def _compute(self, state: DycoreState, timer: pace.util.Timer): # TODO: When NQ=9, we shouldn't need to pass qcld explicitly # since it's in self.tracers. It should not be an issue since - # we don't have self.tracers & qcld computation at - # the same time. + # we don't have self.tracers & qcld computation at the same + # time # When NQ=8, we do need qcld passed explicitely self._lagrangian_to_eulerian_obj( self.tracers, From 2031b9e6e9222ce5914a579d9713d37d301f106f Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 23 Aug 2023 11:15:05 -0400 Subject: [PATCH 43/57] [NOAA:Update] Bring back #15 & doubly periodic domain (#25) * Feature/dp driver (#13) * initial commit * adding test config * adding the rest of driver and util code * updating history.md * move u_max to dycore config * uncomment assert * added comment explaining the copy of grid type to dycore config * Turn main unit test & lint on PR, logger clean up [NASA:Update] (#15) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustement threshold to const * Restrict dace to 0.14.1 due to a parsing bug * Add guard for bdt==0 * Fix theroritical timings * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Set default cache path for orchestrated DaCe to respect GT_CACHE_* env * Remove previous per stencil override of default_build_folder * Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Introduce PACE_LOGLEVEL to control log level from outside * Code guidelines clean up * Devops/GitHub actions on (#15) * Linting on PR * Run main unit test * Update python to available 3.8.12 * Fix unit tests (remove dxa, dya rely on halo ex) * Update HISTORY.md * Adapt log_level in driver.run * Verbose the PACE_CONSTANTS * Doc log level hierarchical nature --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Purnendu Chakraborty * Lint --------- Co-authored-by: Oliver Elbert Co-authored-by: Purnendu Chakraborty Co-authored-by: Purnendu Chakraborty --- README.md | 14 ++- .../examples/configs/baroclinic_c12_dp.yaml | 102 ++++++++++++++++++ driver/pace/driver/driver.py | 4 + driver/pace/driver/grid.py | 15 ++- driver/pace/driver/run.py | 49 +-------- fv3core/pace/fv3core/_config.py | 2 + tests/main/driver/test_example_configs.py | 1 + util/HISTORY.md | 3 + util/pace/util/__init__.py | 2 +- util/pace/util/grid/generation.py | 9 ++ util/pace/util/logging.py | 10 ++ util/pace/util/namelist.py | 8 ++ 12 files changed, 170 insertions(+), 49 deletions(-) create mode 100644 driver/examples/configs/baroclinic_c12_dp.yaml diff --git a/README.md b/README.md index 7753fa73..5884cee8 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Pace is an implementation of the FV3GFS / SHiELD atmospheric model developed by Full Sphinx documentation can be found at [https://ai2cm.github.io/pace/](https://ai2cm.github.io/pace/). **WARNING** This repo is under active development - supported features and procedures can change rapidly and without notice. + ## Quickstart - bare metal ### Build @@ -27,10 +28,13 @@ export BOOST_ROOT=BOOST/ROOT/boost_1_79_0 ``` When cloning Pace you will need to update the repository's submodules as well: + ```shell git clone --recursive https://github.com/ai2cm/pace.git ``` + or if you have already cloned the repository: + ``` git submodule update --init --recursive ``` @@ -43,6 +47,7 @@ source venv_name/bin/activate ``` Inside of your pace `venv` or conda environment pip install the Python requirements, GT4Py, and Pace: + ```shell pip3 install -r requirements_dev.txt -c constraints.txt ``` @@ -52,6 +57,7 @@ Shell scripts to install Pace on specific machines such as Gaea can be found in ### Run With the environment activated, you can run an example baroclinic test case with the following command: + ```shell mpirun -n 6 python3 -m pace.driver.run driver/examples/configs/baroclinic_c12.yaml @@ -64,23 +70,30 @@ After the run completes, you will see an output direcotry `output.zarr`. An exam ### Environment variable configuration - `PACE_CONSTANTS`: Pace is bundled with various constants (see _util/pace/util/constants.py_). + - `FV3DYCORE` NOAA's FV3 dynamical core constants (original port) + - `GFS` Constant as defined in NOAA GFS + - `GEOS` Constant as defined in GEOS v13 - `PACE_FLOAT_PRECISION`: default precision of the field & scalars in the numerics. Default to 64. - `PACE_LOGLEVEL`: logging level to display (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default to INFO. ## Quickstart - Docker + ### Build While it is possible to install and build pace bare-metal, we can ensure all system libraries are installed with the correct versions by using a Docker container to test and develop pace. First, you will need to update the git submodules so that any dependencies are cloned and at the correct version: + ```shell git submodule update --init --recursive ``` Then build the `pace` docker image at the top level. + ```shell make build ``` + ### Run ```shell @@ -100,7 +113,6 @@ This git repository is laid out as a mono-repo, containing multiple independent ![Graph of interdependencies of Pace modules, generated from dependences.dot](./dependencies.svg) - ## ML emulation An example of integration of an ML model replacing the microphysics parametrization is available on the `feature/microphysics-emulator` branch. diff --git a/driver/examples/configs/baroclinic_c12_dp.yaml b/driver/examples/configs/baroclinic_c12_dp.yaml new file mode 100644 index 00000000..029767ca --- /dev/null +++ b/driver/examples/configs/baroclinic_c12_dp.yaml @@ -0,0 +1,102 @@ +stencil_config: + compilation_config: + backend: numpy + rebuild: false + validate_args: true + format_source: false + device_sync: false +grid_config: + type: generated + config: + grid_type: 4 + dx_const: 3000.0 + dy_const: 3000.0 + deglat: 10.0 +initialization: + type: baroclinic +performance_config: + collect_performance: true + experiment_name: c12_baroclinic +nx_tile: 12 +nz: 79 +dt_atmos: 225 +minutes: 15 +layout: + - 1 + - 1 +diagnostics_config: + path: output + output_format: netcdf + names: + - u + - v + - ua + - va + - pt + - delp + - qvapor + - qliquid + - qice + - qrain + - qsnow + - qgraupel + z_select: + - level: 65 + names: + - pt +dycore_config: + a_imp: 1.0 + beta: 0. + consv_te: 0. + d2_bg: 0. + d2_bg_k1: 0.2 + d2_bg_k2: 0.1 + d4_bg: 0.15 + d_con: 1.0 + d_ext: 0.0 + dddmp: 0.5 + delt_max: 0.002 + do_sat_adj: true + do_vort_damp: true + fill: true + hord_dp: 6 + hord_mt: 6 + hord_tm: 6 + hord_tr: 8 + hord_vt: 6 + hydrostatic: false + k_split: 1 + ke_bg: 0. + kord_mt: 9 + kord_tm: -9 + kord_tr: 9 + kord_wz: 9 + n_split: 1 + nord: 3 + nwat: 6 + p_fac: 0.05 + rf_cutoff: 3000. + rf_fast: true + tau: 10. + vtdm4: 0.06 + z_tracer: true + do_qa: true + tau_i2s: 1000. + tau_g2v: 1200. + ql_gen: 0.001 + ql_mlt: 0.002 + qs_mlt: 0.000001 + qi_lim: 1.0 + dw_ocean: 0.1 + dw_land: 0.15 + icloud_f: 0 + tau_l2v: 300. + tau_v2l: 90. + fv_sg_adj: 0 + n_sponge: 48 + u_max: 355.0 + +physics_config: + hydrostatic: false + nwat: 6 + do_qa: true diff --git a/driver/pace/driver/driver.py b/driver/pace/driver/driver.py index 07317415..284acaca 100644 --- a/driver/pace/driver/driver.py +++ b/driver/pace/driver/driver.py @@ -273,6 +273,10 @@ def from_dict(cls, kwargs: Dict[str, Any]) -> "DriverConfig": kwargs["grid_config"] = GridInitializerSelector.from_dict( kwargs["grid_config"] ) + grid_type = kwargs["grid_config"].config.grid_type + # Copy grid_type to the DycoreConfig if it's not the default value + if grid_type != 0: + kwargs["dycore_config"].grid_type = grid_type if ( isinstance(kwargs["stencil_config"], dict) diff --git a/driver/pace/driver/grid.py b/driver/pace/driver/grid.py index 4817869c..c184d566 100644 --- a/driver/pace/driver/grid.py +++ b/driver/pace/driver/grid.py @@ -85,12 +85,20 @@ class GeneratedGridConfig(GridInitializer): lon_target: desired center longitude for refined tile (deg) lat_target: desired center latitude for refined tile (deg) restart_path: if given, load vertical grid from restart file + grid_type: type of grid, 0 is a gnomonic cubed-sphere, 4 is doubly-periodic + dx_const: constant x-width of grid cells on a dp-grid + dy_const: constant y-width of grid cells on a dp-grid + deglat: latitude to use for coriolis calculations on a dp-grid """ stretch_factor: Optional[float] = 1.0 lon_target: Optional[float] = 350.0 lat_target: Optional[float] = -90.0 restart_path: Optional[str] = None + grid_type: Optional[int] = 0 + dx_const: Optional[float] = 1000.0 + dy_const: Optional[float] = 1000.0 + deglat: Optional[float] = 15.0 def get_grid( self, @@ -99,7 +107,12 @@ def get_grid( ) -> Tuple[DampingCoefficients, DriverGridData, GridData]: metric_terms = MetricTerms( - quantity_factory=quantity_factory, communicator=communicator + quantity_factory=quantity_factory, + communicator=communicator, + grid_type=self.grid_type, + dx_const=self.dx_const, + dy_const=self.dy_const, + deglat=self.deglat, ) if self.stretch_factor != 1: # do horizontal grid transformation _transform_horizontal_grid( diff --git a/driver/pace/driver/run.py b/driver/pace/driver/run.py index c8532ebd..df70eb14 100644 --- a/driver/pace/driver/run.py +++ b/driver/pace/driver/run.py @@ -1,59 +1,15 @@ import dataclasses import gc -import logging from typing import Optional import click import yaml -from pace.util import pace_log -from pace.util.mpi import MPI +from pace.util import AVAILABLE_LOG_LEVELS, pace_log from .driver import Driver, DriverConfig -logger = logging.getLogger(__name__) - - -log_levels = { - "info": logging.INFO, - "debug": logging.DEBUG, - "warning": logging.WARNING, - "error": logging.ERROR, - "critical": logging.CRITICAL, -} - - -def configure_logging(log_rank: Optional[int], log_level: str): - """ - Configure logging for the driver. - - Args: - log_rank: rank to log from, or 'all' to log to all ranks, - forced to 'all' if running without MPI - log_level: log level to use - """ - level = log_levels[log_level.lower()] - if MPI is None: - logging.basicConfig( - level=level, - format="%(asctime)s [%(levelname)s] %(name)s:%(message)s", - handlers=[logging.StreamHandler()], - datefmt="%Y-%m-%d %H:%M:%S", - ) - else: - if log_rank is None or int(log_rank) == MPI.COMM_WORLD.Get_rank(): - logging.basicConfig( - level=level, - format=( - f"%(asctime)s [%(levelname)s] (rank {MPI.COMM_WORLD.Get_rank()}) " - "%(name)s:%(message)s" - ), - handlers=[logging.StreamHandler()], - datefmt="%Y-%m-%d %H:%M:%S", - ) - - @click.command() @click.argument( "CONFIG_PATH", @@ -76,7 +32,8 @@ def command_line(config_path: str, log_rank: Optional[int], log_level: str): CONFIG_PATH is the path to a DriverConfig yaml file. """ - configure_logging(log_rank=log_rank, log_level=log_level) + level = AVAILABLE_LOG_LEVELS[log_level.lower()] + pace_log.setLevel(level) pace_log.info("loading DriverConfig from yaml") with open(config_path, "r") as f: config = yaml.safe_load(f) diff --git a/fv3core/pace/fv3core/_config.py b/fv3core/pace/fv3core/_config.py index 17609b7c..51fb609f 100644 --- a/fv3core/pace/fv3core/_config.py +++ b/fv3core/pace/fv3core/_config.py @@ -195,6 +195,7 @@ class DynamicalCoreConfig: do_qa: bool = DEFAULT_BOOL layout: Tuple[int, int] = NamelistDefaults.layout grid_type: int = NamelistDefaults.grid_type + u_max: float = NamelistDefaults.u_max # max windspeed for dp config do_f3d: bool = NamelistDefaults.do_f3d inline_q: bool = NamelistDefaults.inline_q do_skeb: bool = NamelistDefaults.do_skeb # save dissipation estimate @@ -334,6 +335,7 @@ def from_namelist(cls, namelist: Namelist) -> "DynamicalCoreConfig": do_qa=namelist.do_qa, layout=namelist.layout, grid_type=namelist.grid_type, + u_max=namelist.u_max, do_f3d=namelist.do_f3d, inline_q=namelist.inline_q, do_skeb=namelist.do_skeb, diff --git a/tests/main/driver/test_example_configs.py b/tests/main/driver/test_example_configs.py index 14d74ce0..e62276d1 100644 --- a/tests/main/driver/test_example_configs.py +++ b/tests/main/driver/test_example_configs.py @@ -13,6 +13,7 @@ TESTED_CONFIGS: List[str] = [ "baroclinic_c12.yaml", + "baroclinic_c12_dp.yaml", "baroclinic_c12_comm_read.yaml", "baroclinic_c12_comm_write.yaml", "baroclinic_c12_null_comm.yaml", diff --git a/util/HISTORY.md b/util/HISTORY.md index e07ed317..0b0a42b6 100644 --- a/util/HISTORY.md +++ b/util/HISTORY.md @@ -4,7 +4,10 @@ History latest ------ +- Added `dx_const`, `dy_const`, `deglat`, and `u_max` namelist settings for doubly-periodic grids +- Added `dx_const`, `dy_const`, and `deglat` to grid generation code for doubly-periodic grids - Added f32 support to halo exchange data transformation +- Use one single logger, from logging.py v0.10.0 ------- diff --git a/util/pace/util/__init__.py b/util/pace/util/__init__.py index 4911f2cf..58a7c2a5 100644 --- a/util/pace/util/__init__.py +++ b/util/pace/util/__init__.py @@ -54,7 +54,7 @@ from .initialization import GridSizer, QuantityFactory, SubtileGridSizer from .io import read_state, write_state from .local_comm import LocalComm -from .logging import pace_log +from .logging import AVAILABLE_LOG_LEVELS, pace_log from .monitor import Monitor, NetCDFMonitor, ZarrMonitor from .mpi import MPIComm from .namelist import Namelist, NamelistDefaults diff --git a/util/pace/util/grid/generation.py b/util/pace/util/grid/generation.py index 7c7ad98c..b78a7059 100644 --- a/util/pace/util/grid/generation.py +++ b/util/pace/util/grid/generation.py @@ -222,6 +222,9 @@ def __init__( quantity_factory: util.QuantityFactory, communicator: util.CubedSphereCommunicator, grid_type: int = 0, + dx_const: float = 1000.0, + dy_const: float = 1000.0, + deglat: float = 15.0, ): assert grid_type < 3 self._grid_type = grid_type @@ -375,6 +378,9 @@ def from_tile_sizing( communicator: util.CubedSphereCommunicator, backend: str, grid_type: int = 0, + dx_const: float = 1000.0, + dy_const: float = 1000.0, + deglat: float = 15.0, ) -> "MetricTerms": sizer = util.SubtileGridSizer.from_tile_params( nx_tile=npx - 1, @@ -393,6 +399,9 @@ def from_tile_sizing( quantity_factory=quantity_factory, communicator=communicator, grid_type=grid_type, + dx_const=dx_const, + dy_const=dy_const, + deglat=deglat, ) @property diff --git a/util/pace/util/logging.py b/util/pace/util/logging.py index c0e9d0d7..1f9142fe 100644 --- a/util/pace/util/logging.py +++ b/util/pace/util/logging.py @@ -7,6 +7,16 @@ LOGLEVEL = os.environ.get("PACE_LOGLEVEL", "INFO").upper() +# Python log levels are hierarchical, therefore setting INFO +# means DEBUG and everything lower will be logged. +AVAILABLE_LOG_LEVELS = { + "info": logging.INFO, + "debug": logging.DEBUG, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, +} + def _pace_logger(): name_log = logging.getLogger(__name__) diff --git a/util/pace/util/namelist.py b/util/pace/util/namelist.py index ff082736..0133e3f6 100644 --- a/util/pace/util/namelist.py +++ b/util/pace/util/namelist.py @@ -12,6 +12,10 @@ class NamelistDefaults: layout = (1, 1) grid_type = 0 + dx_const = 1000.0 + dy_const = 1000.0 + deglat = 15.0 + u_max = 350.0 do_f3d = False inline_q = False do_skeb = False # save dissipation estimate @@ -372,6 +376,10 @@ class Namelist: # fvmxl: Any # ldebug: Any grid_type: int = NamelistDefaults.grid_type + dx_const: float = NamelistDefaults.dx_const + dy_const: float = NamelistDefaults.dy_const + deglat: float = NamelistDefaults.deglat + u_max: float = NamelistDefaults.u_max do_f3d: bool = NamelistDefaults.do_f3d inline_q: bool = NamelistDefaults.inline_q do_skeb: bool = NamelistDefaults.do_skeb # save dissipation estimate From 2f9bbe9bb4fcd2ac40340cad5622fe18e1d5bc7f Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 23 Aug 2023 11:40:43 -0400 Subject: [PATCH 44/57] lint --- driver/pace/driver/driver.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/driver/pace/driver/driver.py b/driver/pace/driver/driver.py index 97615c64..284acaca 100644 --- a/driver/pace/driver/driver.py +++ b/driver/pace/driver/driver.py @@ -455,11 +455,7 @@ def exit_instead_of_build(self): stencil_compare_comm=stencil_compare_comm, ) pace_log.info("setting up grid started") - ( - damping_coefficients, - driver_grid_data, - grid_data, - ) = self.config.get_grid( + (damping_coefficients, driver_grid_data, grid_data,) = self.config.get_grid( quantity_factory=self.quantity_factory, communicator=communicator, ) From 8f6ba7cdb80337e963c564a6e08c7c6e62db0c82 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 23 Aug 2023 12:57:27 -0400 Subject: [PATCH 45/57] Fix non-deterministic temporaries by using `zeros` everywhere instead of `empty` --- driver/pace/driver/grid.py | 4 +--- driver/pace/driver/initialization.py | 5 +---- fv3core/pace/fv3core/testing/translate_dyncore.py | 4 ++-- .../tests/savepoint/translate/translate_remapping.py | 2 +- stencils/pace/stencils/testing/parallel_translate.py | 3 +-- stencils/pace/stencils/testing/temporaries.py | 7 +++---- util/pace/util/communicator.py | 10 +++++----- util/pace/util/grid/generation.py | 5 +---- util/pace/util/grid/gnomonic.py | 6 +++--- util/pace/util/grid/helper.py | 6 ++---- util/pace/util/halo_data_transformer.py | 4 +--- util/pace/util/initialization/allocator.py | 2 +- 12 files changed, 22 insertions(+), 36 deletions(-) diff --git a/driver/pace/driver/grid.py b/driver/pace/driver/grid.py index c184d566..9fa97a06 100644 --- a/driver/pace/driver/grid.py +++ b/driver/pace/driver/grid.py @@ -105,7 +105,6 @@ def get_grid( quantity_factory: QuantityFactory, communicator: CubedSphereCommunicator, ) -> Tuple[DampingCoefficients, DriverGridData, GridData]: - metric_terms = MetricTerms( quantity_factory=quantity_factory, communicator=communicator, @@ -184,8 +183,7 @@ def get_grid( quantity_factory: QuantityFactory, communicator: CubedSphereCommunicator, ) -> Tuple[DampingCoefficients, DriverGridData, GridData]: - - backend = quantity_factory.empty( + backend = quantity_factory.zeros( dims=[pace.util.X_DIM, pace.util.Y_DIM], units="unknown" ).gt4py_backend diff --git a/driver/pace/driver/initialization.py b/driver/pace/driver/initialization.py index 2b6471a8..bd6d96ea 100644 --- a/driver/pace/driver/initialization.py +++ b/driver/pace/driver/initialization.py @@ -154,7 +154,6 @@ def get_driver_state( driver_grid_data: pace.util.grid.DriverGridData, grid_data: pace.util.grid.GridData, ) -> DriverState: - dycore_state = tc_init.init_tc_state( grid_data=grid_data, quantity_factory=quantity_factory, @@ -323,7 +322,7 @@ def get_driver_state( driver_grid_data: pace.util.grid.DriverGridData, grid_data: pace.util.grid.GridData, ) -> DriverState: - backend = quantity_factory.empty( + backend = quantity_factory.zeros( dims=[pace.util.X_DIM, pace.util.Y_DIM], units="unknown" ).gt4py_backend @@ -348,7 +347,6 @@ def _initialize_dycore_state( communicator: pace.util.CubedSphereCommunicator, backend: str, ) -> fv3core.DycoreState: - grid = self._get_serialized_grid(communicator=communicator, backend=backend) ser = self._serializer(communicator) @@ -401,7 +399,6 @@ def get_driver_state( driver_grid_data: pace.util.grid.DriverGridData, grid_data: pace.util.grid.GridData, ) -> DriverState: - return DriverState( dycore_state=self.dycore_state, physics_state=self.physics_state, diff --git a/fv3core/pace/fv3core/testing/translate_dyncore.py b/fv3core/pace/fv3core/testing/translate_dyncore.py index 510e299f..6c7da4b7 100644 --- a/fv3core/pace/fv3core/testing/translate_dyncore.py +++ b/fv3core/pace/fv3core/testing/translate_dyncore.py @@ -140,7 +140,7 @@ def compute_parallel(self, inputs, communicator): grid_data.ptop = inputs["ptop"] self._base.make_storage_data_input_vars(inputs) state = DycoreState.init_zeros(quantity_factory=self.grid.quantity_factory) - wsd: pace.util.Quantity = self.grid.quantity_factory.empty( + wsd: pace.util.Quantity = self.grid.quantity_factory.zeros( dims=[pace.util.X_DIM, pace.util.Y_DIM], units="unknown", ) @@ -152,7 +152,7 @@ def compute_parallel(self, inputs, communicator): state[name].data[selection] = value else: setattr(state, name, value) - phis: pace.util.Quantity = self.grid.quantity_factory.empty( + phis: pace.util.Quantity = self.grid.quantity_factory.zeros( dims=[pace.util.X_DIM, pace.util.Y_DIM], units="m", ) diff --git a/fv3core/tests/savepoint/translate/translate_remapping.py b/fv3core/tests/savepoint/translate/translate_remapping.py index fc9196b0..9a2e1f84 100644 --- a/fv3core/tests/savepoint/translate/translate_remapping.py +++ b/fv3core/tests/savepoint/translate/translate_remapping.py @@ -107,7 +107,7 @@ def compute_from_storage(self, inputs): inputs["wsd"] = wsd_2d inputs["q_cld"] = inputs["tracers"]["qcld"] inputs["last_step"] = bool(inputs["last_step"]) - pfull = self.grid.quantity_factory.empty([Z_DIM], units="Pa") + pfull = self.grid.quantity_factory.zeros([Z_DIM], units="Pa") pfull.data[:] = pfull.np.asarray(inputs.pop("pfull")) l_to_e_obj = LagrangianToEulerian( self.stencil_factory, diff --git a/stencils/pace/stencils/testing/parallel_translate.py b/stencils/pace/stencils/testing/parallel_translate.py index dcc1e64d..f10b9b27 100644 --- a/stencils/pace/stencils/testing/parallel_translate.py +++ b/stencils/pace/stencils/testing/parallel_translate.py @@ -12,7 +12,6 @@ class ParallelTranslate: - max_error = TranslateFortranData2Py.max_error near_zero = TranslateFortranData2Py.near_zero compute_grid_option = False @@ -192,7 +191,7 @@ def state_from_inputs(self, inputs: dict, grid=None) -> dict: for name, properties in self.inputs.items(): standard_name = properties.get("name", name) if len(properties["dims"]) > 0: - state[standard_name] = grid.quantity_factory.empty( + state[standard_name] = grid.quantity_factory.zeros( properties["dims"], properties["units"], dtype=inputs[name].dtype ) input_slice = _serialize_slice( diff --git a/stencils/pace/stencils/testing/temporaries.py b/stencils/pace/stencils/testing/temporaries.py index 581387f6..2dd46663 100644 --- a/stencils/pace/stencils/testing/temporaries.py +++ b/stencils/pace/stencils/testing/temporaries.py @@ -40,10 +40,9 @@ def _assert_same_temporaries(dict1: dict, dict2: dict) -> List[str]: attr2 = dict2[attr] if isinstance(attr1, np.ndarray): try: - np.testing.assert_almost_equal( - attr1, attr2, err_msg=f"{attr} not equal" - ) - except AssertionError: + assert np.allclose(attr1, attr2, equal_nan=True) + except AssertionError as e: + print(e) differences.append(attr) else: sub_differences = _assert_same_temporaries(attr1, attr2) diff --git a/util/pace/util/communicator.py b/util/pace/util/communicator.py index 938469bd..d2577d8c 100644 --- a/util/pace/util/communicator.py +++ b/util/pace/util/communicator.py @@ -167,7 +167,7 @@ def _get_gather_recv_quantity( ) -> Quantity: """Initialize a Quantity for use when receiving global data during gather""" recv_quantity = Quantity( - send_metadata.np.empty(global_extent, dtype=send_metadata.dtype), + send_metadata.np.zeros(global_extent, dtype=send_metadata.dtype), dims=send_metadata.dims, units=send_metadata.units, origin=tuple([0 for dim in send_metadata.dims]), @@ -182,7 +182,7 @@ def _get_scatter_recv_quantity( ) -> Quantity: """Initialize a Quantity for use when receiving subtile data during scatter""" recv_quantity = Quantity( - send_metadata.np.empty(shape, dtype=send_metadata.dtype), + send_metadata.np.zeros(shape, dtype=send_metadata.dtype), dims=send_metadata.dims, units=send_metadata.units, gt4py_backend=send_metadata.gt4py_backend, @@ -206,7 +206,7 @@ def gather( result: Optional[Quantity] if self.rank == constants.ROOT_RANK: with array_buffer( - send_quantity.np.empty, + send_quantity.np.zeros, (self.partitioner.total_ranks,) + tuple(send_quantity.extent), dtype=send_quantity.data.dtype, ) as recvbuf: @@ -745,7 +745,7 @@ def _get_gather_recv_quantity( # needs to change the quantity dimensions since we add a "tile" dimension, # unlike for tile scatter/gather which retains the same dimensions recv_quantity = Quantity( - metadata.np.empty(global_extent, dtype=metadata.dtype), + metadata.np.zeros(global_extent, dtype=metadata.dtype), dims=(constants.TILE_DIM,) + metadata.dims, units=metadata.units, origin=(0,) + tuple([0 for dim in metadata.dims]), @@ -767,7 +767,7 @@ def _get_scatter_recv_quantity( # needs to change the quantity dimensions since we remove a "tile" dimension, # unlike for tile scatter/gather which retains the same dimensions recv_quantity = Quantity( - metadata.np.empty(shape, dtype=metadata.dtype), + metadata.np.zeros(shape, dtype=metadata.dtype), dims=metadata.dims[1:], units=metadata.units, gt4py_backend=metadata.gt4py_backend, diff --git a/util/pace/util/grid/generation.py b/util/pace/util/grid/generation.py index b78a7059..679b9449 100644 --- a/util/pace/util/grid/generation.py +++ b/util/pace/util/grid/generation.py @@ -75,7 +75,7 @@ def quantity_cast_to_model_float( quantity_factory: util.QuantityFactory, qty_64: util.Quantity ) -> util.Quantity: """Copy & cast from 64-bit float to model precision if need be""" - qty = quantity_factory.empty(qty_64.dims, qty_64.units, dtype=Float) + qty = quantity_factory.zeros(qty_64.dims, qty_64.units, dtype=Float) qty.data[:] = qty_64.data[:] return qty @@ -1530,7 +1530,6 @@ def rdyc(self) -> util.Quantity: ) def _init_dgrid(self): - grid_mirror_ew = self.quantity_factory.zeros( self._grid_dims, "radians", @@ -1751,7 +1750,6 @@ def _compute_dxdy(self): return dx, dy def _compute_dxdy_agrid(self): - dx_agrid_64 = self.quantity_factory.zeros( [util.X_DIM, util.Y_DIM], "m", @@ -2149,7 +2147,6 @@ def _calculate_more_trig_terms(self, cos_sg, sin_sg): ) def _init_cell_trigonometry(self): - cosa_u_64 = self.quantity_factory.zeros( [util.X_INTERFACE_DIM, util.Y_DIM], "", diff --git a/util/pace/util/grid/gnomonic.py b/util/pace/util/grid/gnomonic.py index 705014e4..f26af0f2 100644 --- a/util/pace/util/grid/gnomonic.py +++ b/util/pace/util/grid/gnomonic.py @@ -303,9 +303,9 @@ def _mirror_latlon(lon1, lat1, lon2, lat2, lon0, lat0, np): pdot = p0[0] * nb[0] + p0[1] * nb[1] + p0[2] * nb[2] pp = p0 - np.multiply(2.0, pdot) * nb - lon3 = np.empty((1, 1)) - lat3 = np.empty((1, 1)) - pp3 = np.empty((3, 1, 1)) + lon3 = np.zeros((1, 1)) + lat3 = np.zeros((1, 1)) + pp3 = np.zeros((3, 1, 1)) pp3[:, 0, 0] = pp _cart_to_latlon(1, pp3, lon3, lat3, np) diff --git a/util/pace/util/grid/helper.py b/util/pace/util/grid/helper.py index 673e484d..1b977ad8 100644 --- a/util/pace/util/grid/helper.py +++ b/util/pace/util/grid/helper.py @@ -166,8 +166,8 @@ def from_restart( but no fv_core.res.nc in restart data file.""" ) - ak = quantity_factory.empty([Z_INTERFACE_DIM], units="Pa") - bk = quantity_factory.empty([Z_INTERFACE_DIM], units="") + ak = quantity_factory.zeros([Z_INTERFACE_DIM], units="Pa") + bk = quantity_factory.zeros([Z_INTERFACE_DIM], units="") with fs.open(ak_bk_data_file, "rb") as f: ds = xr.open_dataset(f).isel(Time=0).drop_vars("Time") ak.view[:] = ds["ak"].values @@ -322,7 +322,6 @@ def __init__( @classmethod def new_from_metric_terms(cls, metric_terms: MetricTerms): - horizontal_data = HorizontalGridData.new_from_metric_terms(metric_terms) vertical_data = VerticalGridData.new_from_metric_terms(metric_terms) contravariant_data = ContravariantGridData.new_from_metric_terms(metric_terms) @@ -701,7 +700,6 @@ def new_from_grid_variables( es1: pace.util.Quantity, ew2: pace.util.Quantity, ) -> "DriverGridData": - try: vlon1, vlon2, vlon3 = split_quantity_along_last_dim(vlon) vlat1, vlat2, vlat3 = split_quantity_along_last_dim(vlat) diff --git a/util/pace/util/halo_data_transformer.py b/util/pace/util/halo_data_transformer.py index 00a547d6..e97bb97a 100644 --- a/util/pace/util/halo_data_transformer.py +++ b/util/pace/util/halo_data_transformer.py @@ -70,7 +70,7 @@ def _build_flatten_indices( """ # Have to go down to numpy to leverage indices calculation - arr_indices = np.empty(shape, dtype=np.int32, order="C")[slices] + arr_indices = np.zeros(shape, dtype=np.int32, order="C")[slices] # Get offset from first index offset_dims = [] @@ -875,7 +875,6 @@ def _opt_unpack_scalar(self, quantities: List[Quantity]): # Use private stream with self._get_stream(cu_kernel_args.stream): - # Launch kernel blocks = 128 grid_x = (info_x._unpack_buffer_size // blocks) + 1 @@ -942,7 +941,6 @@ def _opt_unpack_vector( # Use private stream with self._get_stream(cu_kernel_args.stream): - # Buffer sizes edge_size = info_x._unpack_buffer_size + info_y._unpack_buffer_size diff --git a/util/pace/util/initialization/allocator.py b/util/pace/util/initialization/allocator.py index c865cbbf..1a68495e 100644 --- a/util/pace/util/initialization/allocator.py +++ b/util/pace/util/initialization/allocator.py @@ -102,7 +102,7 @@ def from_array( That numpy array must correspond to the correct shape and extent for the given dims. """ - base = self.empty( + base = self.zeros( dims=dims, units=units, dtype=data.dtype, From 31c484455b4c389fe83b75baaed26445d4486965 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 23 Aug 2023 13:12:52 -0400 Subject: [PATCH 46/57] Missed commit --- stencils/pace/stencils/testing/grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stencils/pace/stencils/testing/grid.py b/stencils/pace/stencils/testing/grid.py index 65ad8870..4cf623b1 100644 --- a/stencils/pace/stencils/testing/grid.py +++ b/stencils/pace/stencils/testing/grid.py @@ -504,7 +504,7 @@ def grid_data(self) -> "GridData": data = getattr(self, name) assert data is not None - quantity = self.quantity_factory.empty(dims=dims, units=units) + quantity = self.quantity_factory.zeros(dims=dims, units=units) if len(quantity.shape) == 3: quantity.data[:] = data[:, :, : quantity.shape[2]] elif len(quantity.shape) == 2: From 08f3033df36b0c730a393e94e14310f2a1875ec3 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 25 Aug 2023 15:45:37 -0400 Subject: [PATCH 47/57] Update dsl/pace/dsl/caches/codepath.py Co-authored-by: Oliver Elbert --- dsl/pace/dsl/caches/codepath.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dsl/pace/dsl/caches/codepath.py b/dsl/pace/dsl/caches/codepath.py index cb8327b5..c80bac48 100644 --- a/dsl/pace/dsl/caches/codepath.py +++ b/dsl/pace/dsl/caches/codepath.py @@ -2,12 +2,11 @@ class FV3CodePath(enum.Enum): - """Enum listing all possible code path on a cube sphere. - For any layout the cube sphere has up to 9 different code path, 10 - when counting the 1,1 layout which aggregates all 9. Those are related to - the positioning of the rank on the tile and which of the edge/corner case - it has to handle. - Since the framework inline code to optimize, we _cannot_ pre-suppose of the code + """Enum listing all possible code paths on a cube sphere. + For any layout the cube sphere has up to 9 different code paths depending on + the positioning of the rank on the tile and which of the edge/corner cases + it has to handle, as well as the possibility for all boundary computations in the 1x1 layout case. + Since the framework inlines code to optimize, we _cannot_ pre-suppose which code being kept and/or ejected. This enum serves as the ground truth to map rank to the proper generated code. """ From d63a0f01679c3f22f7d1053bfaed31b49693a543 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 25 Aug 2023 15:52:57 -0400 Subject: [PATCH 48/57] Lint --- dsl/pace/dsl/caches/codepath.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dsl/pace/dsl/caches/codepath.py b/dsl/pace/dsl/caches/codepath.py index c80bac48..8ebf9492 100644 --- a/dsl/pace/dsl/caches/codepath.py +++ b/dsl/pace/dsl/caches/codepath.py @@ -5,7 +5,8 @@ class FV3CodePath(enum.Enum): """Enum listing all possible code paths on a cube sphere. For any layout the cube sphere has up to 9 different code paths depending on the positioning of the rank on the tile and which of the edge/corner cases - it has to handle, as well as the possibility for all boundary computations in the 1x1 layout case. + it has to handle, as well as the possibility for all boundary computations in + the 1x1 layout case. Since the framework inlines code to optimize, we _cannot_ pre-suppose which code being kept and/or ejected. This enum serves as the ground truth to map rank to the proper generated code. From 6de1b3cf4733ee86a1e8880fd37c0a4f9cf40d14 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 25 Aug 2023 15:55:49 -0400 Subject: [PATCH 49/57] Restore zero-ing out the fields --- fv3core/pace/fv3core/stencils/d_sw.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fv3core/pace/fv3core/stencils/d_sw.py b/fv3core/pace/fv3core/stencils/d_sw.py index 14155da4..51c9ee6e 100644 --- a/fv3core/pace/fv3core/stencils/d_sw.py +++ b/fv3core/pace/fv3core/stencils/d_sw.py @@ -94,6 +94,8 @@ def heat_diss( ke_bg (in): """ with computation(PARALLEL), interval(...): + heat_source = 0.0 + diss_est = 0.0 if damp_w > 1e-5: dd8 = ke_bg * abs(dt) dw = (fx2 - fx2[1, 0, 0] + fy2 - fy2[0, 1, 0]) * rarea From 33ac533e7298032c86e659e305c3636e9311f3ae Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 25 Aug 2023 15:57:07 -0400 Subject: [PATCH 50/57] Fix formatting in geos logger --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index f7133543..a7a526ee 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -154,7 +154,7 @@ def __init__( f" orchestration : {self._is_orchestrated}\n" f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz}" f"(halo: {sizer.n_halo})\n" - f" {device_ordinal_info}" + f" Device ord: {device_ordinal_info}\n" f" Nvidia MPS : {MPS_is_on}" ) From 79556957337bfce11bfc8b94ae64c779340023b6 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 25 Aug 2023 16:01:55 -0400 Subject: [PATCH 51/57] Clean up --- dsl/pace/dsl/dace/dace_config.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dsl/pace/dsl/dace/dace_config.py b/dsl/pace/dsl/dace/dace_config.py index 6f2befff..1bb0939e 100644 --- a/dsl/pace/dsl/dace/dace_config.py +++ b/dsl/pace/dsl/dace/dace_config.py @@ -1,4 +1,5 @@ import enum +import os from typing import Any, Dict, Optional, Tuple import dace.config @@ -162,8 +163,6 @@ def __init__( # Temporary. This is a bit too out of the ordinary for the common user. # We should refactor the architecture to allow for a `gtc:orchestrated:dace:X` # backend that would signify both the `CPU|GPU` split and the orchestration mode - import os - if orchestration is None: fv3_dacemode_env_var = os.getenv("FV3_DACEMODE", "Python") # The below condition guard against defining empty FV3_DACEMODE and @@ -266,8 +265,6 @@ def __init__( # attempt to kill the dace.conf to avoid confusion if dace.config.Config._cfg_filename: try: - import os - os.remove(dace.config.Config._cfg_filename) except OSError: pass @@ -317,7 +314,7 @@ def get_orchestrate(self) -> DaCeOrchestration: return self._orchestrate def get_sync_debug(self) -> bool: - return dace.config.Config.get("compiler", "cuda", "syncdebug") + return dace.config.Config.get_bool("compiler", "cuda", "syncdebug") def as_dict(self) -> Dict[str, Any]: return { From 12527362ba741bec183f99d5abdb4562eb748b43 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 29 Aug 2023 15:42:05 -0400 Subject: [PATCH 52/57] Refactor the test to go around so reload bug --- tests/main/dsl/test_caches.py | 97 ++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/tests/main/dsl/test_caches.py b/tests/main/dsl/test_caches.py index d5318493..d5088e4b 100644 --- a/tests/main/dsl/test_caches.py +++ b/tests/main/dsl/test_caches.py @@ -126,54 +126,55 @@ def test_relocatability_orchestration(backend): @pytest.mark.parametrize( "backend", [ - pytest.param("gt:cpu_ifirst"), pytest.param("dace:cpu"), ], ) -def test_relocatability(backend): - # TODO: test work - but crashes when chained with other - # see https://github.com/GEOS-ESM/pace/issues/16 - pass - # import os - # import shutil - - # working_dir = os.getcwd() - - # # Compile on default - # p0 = OrchestratedProgam(backend, DaCeOrchestration.Python) - # p0() - # assert os.path.exists( - # f"{working_dir}/.gt_cache_000000/py38_1013/gtcpu_ifirst/__main__/_stencil/" - # ) - - # # Compile in another directory - # from gt4py.cartesian import config as gt_config - - # custom_path = f"{working_dir}/.my_cache_path" - # gt_config.cache_settings["root_path"] = custom_path - # p1 = OrchestratedProgam(backend, DaCeOrchestration.Python) - # p1() - # assert os.path.exists( - # f"{custom_path}/.gt_cache_000000/py38_1013/gtcpu_ifirst/__main__/_stencil/" - # ) - - # # Check relocability by copying the second cache directory, - # # changing the path of gt_config.cache_settings and trying to Run on it - # relocated_path = f"{working_dir}/.my_relocated_cache_path" - # shutil.copytree(custom_path, relocated_path, dirs_exist_ok=True) - # gt_config.cache_settings["root_path"] = relocated_path - # p2 = OrchestratedProgam(backend, DaCeOrchestration.Python) - # p2() - # assert os.path.exists( - # f"{relocated_path}/.gt_cache_000000/py38_1013/gtcpu_ifirst/__main__/_stencil/" - # ) - - -if __name__ == "__main__": - # TODO: test can be merged once gt4py also generates in the _FV3_X format - print("\n|> test_relocatability_orchestration('dace:cpu')\n") - test_relocatability_orchestration("dace:cpu") - print("\n|> test_relocatability('gt:cpu_ifirst')\n") - test_relocatability("gt:cpu_ifirst") - print("\n|> test_relocatability('dace:cpu')\n") - test_relocatability("dace:cpu") +def test_relocatability(backend: str): + import os + import shutil + + import gt4py + from gt4py.cartesian import config as gt_config + + from pace.util.mpi import MPI + + # Restore original dir name + gt4py.cartesian.config.cache_settings["dir_name"] = os.environ.get( + "GT_CACHE_DIR_NAME", f".gt_cache_{MPI.COMM_WORLD.Get_rank():06}" + ) + + backend_sanitized = backend.replace(":", "") + working_dir = os.getcwd() + + # Compile on default + p0 = OrchestratedProgam(backend, DaCeOrchestration.Python) + p0() + assert os.path.exists( + f"{working_dir}/.gt_cache_000000/py38_1013/{backend_sanitized}" + "/__main__/_stencil/" + ) + + # Compile in another directory + + custom_path = f"{working_dir}/.my_cache_path" + gt_config.cache_settings["root_path"] = custom_path + p1 = OrchestratedProgam(backend, DaCeOrchestration.Python) + p1() + assert os.path.exists( + f"{custom_path}/.gt_cache_000000/py38_1013/{backend_sanitized}" + "/__main__/_stencil/" + ) + + # Check relocability by copying the second cache directory, + # changing the path of gt_config.cache_settings and trying to Run on it + relocated_path = f"{working_dir}/.my_relocated_cache_path" + shutil.copytree( + f"{working_dir}/.gt_cache_000000", relocated_path, dirs_exist_ok=True + ) + gt_config.cache_settings["root_path"] = relocated_path + p2 = OrchestratedProgam(backend, DaCeOrchestration.Python) + p2() + assert os.path.exists( + f"{relocated_path}/.gt_cache_000000/py38_1013/{backend_sanitized}" + "/__main__/_stencil/" + ) From 8de32bc52aa37f091d7d13c3ac9626d7b62f9a6f Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 29 Aug 2023 16:14:09 -0400 Subject: [PATCH 53/57] Update requirements to include external/dace Include boost into main test --- .github/workflows/main_unit_tests.yml | 4 ++-- requirements_dev.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main_unit_tests.yml b/.github/workflows/main_unit_tests.yml index 5dbf4a1f..5800baa6 100644 --- a/.github/workflows/main_unit_tests.yml +++ b/.github/workflows/main_unit_tests.yml @@ -15,9 +15,9 @@ jobs: uses: actions/setup-python@v4.6.0 with: python-version: '3.8.12' - - name: Install OpenMPI for gt4py + - name: Install OpenMPI & Boost for gt4py run: | - sudo apt-get install libopenmpi-dev + sudo apt-get install libopenmpi-dev libboost1.74-dev - name: Install Python packages run: | python -m pip install --upgrade pip diff --git a/requirements_dev.txt b/requirements_dev.txt index 052bf5c3..59853798 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -15,7 +15,7 @@ dace==0.14.0 f90nml>=1.1.0 numpy>=1.15 -e external/gt4py --e util[dace] +-e external/dace -e stencils -e dsl -e physics From 6ef8b6081d005f14c13c70eed363f7f5d411094a Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 29 Aug 2023 16:21:37 -0400 Subject: [PATCH 54/57] Typo --- requirements_dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 59853798..ef46c36c 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,7 +11,6 @@ dask>=2021.10.0 netCDF4 cftime fv3config>=0.9.0 -dace==0.14.0 f90nml>=1.1.0 numpy>=1.15 -e external/gt4py From 51fca6ed788cee5b0091d443d3ddc5014e231f17 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 29 Aug 2023 16:29:18 -0400 Subject: [PATCH 55/57] Revert wrong branch changes --- requirements_dev.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index ef46c36c..484c4948 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -10,11 +10,12 @@ zarr dask>=2021.10.0 netCDF4 cftime +dace==0.14.0 fv3config>=0.9.0 f90nml>=1.1.0 numpy>=1.15 -e external/gt4py --e external/dace +-e util[dace] -e stencils -e dsl -e physics From 132e2c4d184caab174f19dbf01adf06a72ea95a4 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 30 Aug 2023 09:32:29 -0400 Subject: [PATCH 56/57] Fix utest called from pytest --- tests/main/dsl/test_caches.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/main/dsl/test_caches.py b/tests/main/dsl/test_caches.py index d5088e4b..c1f01303 100644 --- a/tests/main/dsl/test_caches.py +++ b/tests/main/dsl/test_caches.py @@ -144,37 +144,33 @@ def test_relocatability(backend: str): ) backend_sanitized = backend.replace(":", "") - working_dir = os.getcwd() # Compile on default p0 = OrchestratedProgam(backend, DaCeOrchestration.Python) p0() assert os.path.exists( - f"{working_dir}/.gt_cache_000000/py38_1013/{backend_sanitized}" - "/__main__/_stencil/" + f"./.gt_cache_000000/py38_1013/{backend_sanitized}/test_caches/_stencil/" ) # Compile in another directory - custom_path = f"{working_dir}/.my_cache_path" + custom_path = "./.my_cache_path" gt_config.cache_settings["root_path"] = custom_path p1 = OrchestratedProgam(backend, DaCeOrchestration.Python) p1() assert os.path.exists( f"{custom_path}/.gt_cache_000000/py38_1013/{backend_sanitized}" - "/__main__/_stencil/" + "/test_caches/_stencil/" ) # Check relocability by copying the second cache directory, # changing the path of gt_config.cache_settings and trying to Run on it - relocated_path = f"{working_dir}/.my_relocated_cache_path" - shutil.copytree( - f"{working_dir}/.gt_cache_000000", relocated_path, dirs_exist_ok=True - ) + relocated_path = "./.my_relocated_cache_path" + shutil.copytree("./.gt_cache_000000", relocated_path, dirs_exist_ok=True) gt_config.cache_settings["root_path"] = relocated_path p2 = OrchestratedProgam(backend, DaCeOrchestration.Python) p2() assert os.path.exists( f"{relocated_path}/.gt_cache_000000/py38_1013/{backend_sanitized}" - "/__main__/_stencil/" + "/test_caches/_stencil/" ) From 689f4b0811e836adad0f330ed59ee19a5f082fdd Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 30 Aug 2023 10:19:56 -0400 Subject: [PATCH 57/57] Update comment --- dsl/pace/dsl/dace/orchestration.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dsl/pace/dsl/dace/orchestration.py b/dsl/pace/dsl/dace/orchestration.py index 1feca341..7858381a 100644 --- a/dsl/pace/dsl/dace/orchestration.py +++ b/dsl/pace/dsl/dace/orchestration.py @@ -194,12 +194,13 @@ def _build_sdfg( ), ) - # Compilation done, either exit or scatter/gather and run + # Compilation done. + # On Build: all ranks sync, then exit. + # On BuildAndRun: all ranks sync, then load the SDFG from + # the expected path (made available by build). + # We use a "FrozenCompiledSDFG" to minimize re-entry cost at call time # DEV NOTE: we explicitly use MPI.COMM_WORLD here because it is # a true multi-machine sync, outside of our own communicator class. - # Also this code is protected in the case of running on one machine by the fact - # that 0 is _always_ a compiling rank & unblock_waiting_tiles is protected - # against scattering when no other ranks are present. if config.get_orchestrate() == DaCeOrchestration.Build: MPI.COMM_WORLD.Barrier() # Protect against early exist which kill SLURM jobs DaCeProgress.log(