From 3afbeefb951c0c6d16ac27605f71c14ffe7d1ea4 Mon Sep 17 00:00:00 2001 From: Oliver Elbert Date: Wed, 16 Aug 2023 15:25:18 -0400 Subject: [PATCH 1/3] Feature/dp driver (#13) * initial commit * adding test config * adding the rest of driver and util code * updating history.md * move u_max to dycore config * uncomment assert * added comment explaining the copy of grid type to dycore config --- .../examples/configs/baroclinic_c12_dp.yaml | 102 ++++++++++++++++++ driver/pace/driver/driver.py | 4 + driver/pace/driver/grid.py | 15 ++- fv3core/pace/fv3core/_config.py | 2 + tests/main/driver/test_example_configs.py | 1 + util/HISTORY.md | 2 + util/pace/util/grid/generation.py | 9 ++ util/pace/util/namelist.py | 8 ++ 8 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 driver/examples/configs/baroclinic_c12_dp.yaml diff --git a/driver/examples/configs/baroclinic_c12_dp.yaml b/driver/examples/configs/baroclinic_c12_dp.yaml new file mode 100644 index 00000000..029767ca --- /dev/null +++ b/driver/examples/configs/baroclinic_c12_dp.yaml @@ -0,0 +1,102 @@ +stencil_config: + compilation_config: + backend: numpy + rebuild: false + validate_args: true + format_source: false + device_sync: false +grid_config: + type: generated + config: + grid_type: 4 + dx_const: 3000.0 + dy_const: 3000.0 + deglat: 10.0 +initialization: + type: baroclinic +performance_config: + collect_performance: true + experiment_name: c12_baroclinic +nx_tile: 12 +nz: 79 +dt_atmos: 225 +minutes: 15 +layout: + - 1 + - 1 +diagnostics_config: + path: output + output_format: netcdf + names: + - u + - v + - ua + - va + - pt + - delp + - qvapor + - qliquid + - qice + - qrain + - qsnow + - qgraupel + z_select: + - level: 65 + names: + - pt +dycore_config: + a_imp: 1.0 + beta: 0. + consv_te: 0. + d2_bg: 0. + d2_bg_k1: 0.2 + d2_bg_k2: 0.1 + d4_bg: 0.15 + d_con: 1.0 + d_ext: 0.0 + dddmp: 0.5 + delt_max: 0.002 + do_sat_adj: true + do_vort_damp: true + fill: true + hord_dp: 6 + hord_mt: 6 + hord_tm: 6 + hord_tr: 8 + hord_vt: 6 + hydrostatic: false + k_split: 1 + ke_bg: 0. + kord_mt: 9 + kord_tm: -9 + kord_tr: 9 + kord_wz: 9 + n_split: 1 + nord: 3 + nwat: 6 + p_fac: 0.05 + rf_cutoff: 3000. + rf_fast: true + tau: 10. + vtdm4: 0.06 + z_tracer: true + do_qa: true + tau_i2s: 1000. + tau_g2v: 1200. + ql_gen: 0.001 + ql_mlt: 0.002 + qs_mlt: 0.000001 + qi_lim: 1.0 + dw_ocean: 0.1 + dw_land: 0.15 + icloud_f: 0 + tau_l2v: 300. + tau_v2l: 90. + fv_sg_adj: 0 + n_sponge: 48 + u_max: 355.0 + +physics_config: + hydrostatic: false + nwat: 6 + do_qa: true diff --git a/driver/pace/driver/driver.py b/driver/pace/driver/driver.py index 07317415..0ef3263b 100644 --- a/driver/pace/driver/driver.py +++ b/driver/pace/driver/driver.py @@ -273,6 +273,10 @@ def from_dict(cls, kwargs: Dict[str, Any]) -> "DriverConfig": kwargs["grid_config"] = GridInitializerSelector.from_dict( kwargs["grid_config"] ) + grid_type = kwargs["grid_config"].config.grid_type + # Copy grid_type to the DycoreConfig if it's not the default value + if grid_type != 0: + kwargs["dycore_config"].grid_type = grid_type if ( isinstance(kwargs["stencil_config"], dict) diff --git a/driver/pace/driver/grid.py b/driver/pace/driver/grid.py index 4817869c..c184d566 100644 --- a/driver/pace/driver/grid.py +++ b/driver/pace/driver/grid.py @@ -85,12 +85,20 @@ class GeneratedGridConfig(GridInitializer): lon_target: desired center longitude for refined tile (deg) lat_target: desired center latitude for refined tile (deg) restart_path: if given, load vertical grid from restart file + grid_type: type of grid, 0 is a gnomonic cubed-sphere, 4 is doubly-periodic + dx_const: constant x-width of grid cells on a dp-grid + dy_const: constant y-width of grid cells on a dp-grid + deglat: latitude to use for coriolis calculations on a dp-grid """ stretch_factor: Optional[float] = 1.0 lon_target: Optional[float] = 350.0 lat_target: Optional[float] = -90.0 restart_path: Optional[str] = None + grid_type: Optional[int] = 0 + dx_const: Optional[float] = 1000.0 + dy_const: Optional[float] = 1000.0 + deglat: Optional[float] = 15.0 def get_grid( self, @@ -99,7 +107,12 @@ def get_grid( ) -> Tuple[DampingCoefficients, DriverGridData, GridData]: metric_terms = MetricTerms( - quantity_factory=quantity_factory, communicator=communicator + quantity_factory=quantity_factory, + communicator=communicator, + grid_type=self.grid_type, + dx_const=self.dx_const, + dy_const=self.dy_const, + deglat=self.deglat, ) if self.stretch_factor != 1: # do horizontal grid transformation _transform_horizontal_grid( diff --git a/fv3core/pace/fv3core/_config.py b/fv3core/pace/fv3core/_config.py index 17609b7c..51fb609f 100644 --- a/fv3core/pace/fv3core/_config.py +++ b/fv3core/pace/fv3core/_config.py @@ -195,6 +195,7 @@ class DynamicalCoreConfig: do_qa: bool = DEFAULT_BOOL layout: Tuple[int, int] = NamelistDefaults.layout grid_type: int = NamelistDefaults.grid_type + u_max: float = NamelistDefaults.u_max # max windspeed for dp config do_f3d: bool = NamelistDefaults.do_f3d inline_q: bool = NamelistDefaults.inline_q do_skeb: bool = NamelistDefaults.do_skeb # save dissipation estimate @@ -334,6 +335,7 @@ def from_namelist(cls, namelist: Namelist) -> "DynamicalCoreConfig": do_qa=namelist.do_qa, layout=namelist.layout, grid_type=namelist.grid_type, + u_max=namelist.u_max, do_f3d=namelist.do_f3d, inline_q=namelist.inline_q, do_skeb=namelist.do_skeb, diff --git a/tests/main/driver/test_example_configs.py b/tests/main/driver/test_example_configs.py index 14d74ce0..e62276d1 100644 --- a/tests/main/driver/test_example_configs.py +++ b/tests/main/driver/test_example_configs.py @@ -13,6 +13,7 @@ TESTED_CONFIGS: List[str] = [ "baroclinic_c12.yaml", + "baroclinic_c12_dp.yaml", "baroclinic_c12_comm_read.yaml", "baroclinic_c12_comm_write.yaml", "baroclinic_c12_null_comm.yaml", diff --git a/util/HISTORY.md b/util/HISTORY.md index e07ed317..26aac41c 100644 --- a/util/HISTORY.md +++ b/util/HISTORY.md @@ -4,6 +4,8 @@ History latest ------ +- Added `dx_const`, `dy_const`, `deglat`, and `u_max` namelist settings for doubly-periodic grids +- Added `dx_const`, `dy_const`, and `deglat` to grid generation code for doubly-periodic grids - Added f32 support to halo exchange data transformation v0.10.0 diff --git a/util/pace/util/grid/generation.py b/util/pace/util/grid/generation.py index 7c7ad98c..b78a7059 100644 --- a/util/pace/util/grid/generation.py +++ b/util/pace/util/grid/generation.py @@ -222,6 +222,9 @@ def __init__( quantity_factory: util.QuantityFactory, communicator: util.CubedSphereCommunicator, grid_type: int = 0, + dx_const: float = 1000.0, + dy_const: float = 1000.0, + deglat: float = 15.0, ): assert grid_type < 3 self._grid_type = grid_type @@ -375,6 +378,9 @@ def from_tile_sizing( communicator: util.CubedSphereCommunicator, backend: str, grid_type: int = 0, + dx_const: float = 1000.0, + dy_const: float = 1000.0, + deglat: float = 15.0, ) -> "MetricTerms": sizer = util.SubtileGridSizer.from_tile_params( nx_tile=npx - 1, @@ -393,6 +399,9 @@ def from_tile_sizing( quantity_factory=quantity_factory, communicator=communicator, grid_type=grid_type, + dx_const=dx_const, + dy_const=dy_const, + deglat=deglat, ) @property diff --git a/util/pace/util/namelist.py b/util/pace/util/namelist.py index ff082736..0133e3f6 100644 --- a/util/pace/util/namelist.py +++ b/util/pace/util/namelist.py @@ -12,6 +12,10 @@ class NamelistDefaults: layout = (1, 1) grid_type = 0 + dx_const = 1000.0 + dy_const = 1000.0 + deglat = 15.0 + u_max = 350.0 do_f3d = False inline_q = False do_skeb = False # save dissipation estimate @@ -372,6 +376,10 @@ class Namelist: # fvmxl: Any # ldebug: Any grid_type: int = NamelistDefaults.grid_type + dx_const: float = NamelistDefaults.dx_const + dy_const: float = NamelistDefaults.dy_const + deglat: float = NamelistDefaults.deglat + u_max: float = NamelistDefaults.u_max do_f3d: bool = NamelistDefaults.do_f3d inline_q: bool = NamelistDefaults.inline_q do_skeb: bool = NamelistDefaults.do_skeb # save dissipation estimate From d8ebc398a6b2dfe21e6adfb81f6232214b7b25ae Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 18 Aug 2023 08:25:24 -0400 Subject: [PATCH 2/3] Turn main unit test & lint on PR, logger clean up [NASA:Update] (#15) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustement threshold to const * Restrict dace to 0.14.1 due to a parsing bug * Add guard for bdt==0 * Fix theroritical timings * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Set default cache path for orchestrated DaCe to respect GT_CACHE_* env * Remove previous per stencil override of default_build_folder * Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Introduce PACE_LOGLEVEL to control log level from outside * Code guidelines clean up * Devops/GitHub actions on (#15) * Linting on PR * Run main unit test * Update python to available 3.8.12 * Fix unit tests (remove dxa, dya rely on halo ex) * Update HISTORY.md * Adapt log_level in driver.run * Verbose the PACE_CONSTANTS * Doc log level hierarchical nature --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Purnendu Chakraborty --- .github/workflows/lint.yml | 27 +++++++++ .github/workflows/main_unit_tests.yml | 27 +++++++++ README.md | 20 ++++++- driver/pace/driver/run.py | 54 +++-------------- dsl/pace/dsl/dace/utils.py | 2 +- .../fv3core/initialization/geos_wrapper.py | 8 +-- fv3core/pace/fv3core/stencils/d_sw.py | 6 +- fv3core/pace/fv3core/stencils/fv_dynamics.py | 3 +- tests/main/fv3core/test_init_from_geos.py | 60 ++++++++++--------- util/HISTORY.md | 1 + util/pace/util/__init__.py | 2 +- util/pace/util/communicator.py | 3 - util/pace/util/local_comm.py | 7 +-- util/pace/util/logging.py | 18 +++++- util/pace/util/monitor/netcdf_monitor.py | 10 +--- util/pace/util/monitor/zarr_monitor.py | 9 +-- util/pace/util/mpi.py | 30 +++++----- 17 files changed, 163 insertions(+), 124 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/main_unit_tests.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..0cc08080 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,27 @@ +name: "Lint" +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout Pace repository + uses: actions/checkout@v3.5.2 + with: + submodules: 'recursive' + - name: Step Python 3.8.12 + uses: actions/setup-python@v4.6.0 + with: + python-version: '3.8.12' + - name: Install OpenMPI for gt4py + run: | + sudo apt-get install libopenmpi-dev + - name: Install Python packages + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt -r requirements_lint.txt + - name: Run lint via pre-commit + run: | + pre-commit run --all-files diff --git a/.github/workflows/main_unit_tests.yml b/.github/workflows/main_unit_tests.yml new file mode 100644 index 00000000..5dbf4a1f --- /dev/null +++ b/.github/workflows/main_unit_tests.yml @@ -0,0 +1,27 @@ +name: "Main unit tests" +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] + +jobs: + main_unit_tests: + runs-on: ubuntu-latest + steps: + - name: Checkout Pace repository + uses: actions/checkout@v3.5.2 + with: + submodules: 'recursive' + - name: Step Python 3.8.12 + uses: actions/setup-python@v4.6.0 + with: + python-version: '3.8.12' + - name: Install OpenMPI for gt4py + run: | + sudo apt-get install libopenmpi-dev + - name: Install Python packages + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + - name: Run all main tests + run: | + pytest -x tests/main diff --git a/README.md b/README.md index 31980394..5884cee8 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Pace is an implementation of the FV3GFS / SHiELD atmospheric model developed by Full Sphinx documentation can be found at [https://ai2cm.github.io/pace/](https://ai2cm.github.io/pace/). **WARNING** This repo is under active development - supported features and procedures can change rapidly and without notice. + ## Quickstart - bare metal ### Build @@ -27,10 +28,13 @@ export BOOST_ROOT=BOOST/ROOT/boost_1_79_0 ``` When cloning Pace you will need to update the repository's submodules as well: + ```shell git clone --recursive https://github.com/ai2cm/pace.git ``` + or if you have already cloned the repository: + ``` git submodule update --init --recursive ``` @@ -43,6 +47,7 @@ source venv_name/bin/activate ``` Inside of your pace `venv` or conda environment pip install the Python requirements, GT4Py, and Pace: + ```shell pip3 install -r requirements_dev.txt -c constraints.txt ``` @@ -52,6 +57,7 @@ Shell scripts to install Pace on specific machines such as Gaea can be found in ### Run With the environment activated, you can run an example baroclinic test case with the following command: + ```shell mpirun -n 6 python3 -m pace.driver.run driver/examples/configs/baroclinic_c12.yaml @@ -61,20 +67,33 @@ mpirun -n 6 --oversubscribe python3 -m pace.driver.run driver/examples/configs/b After the run completes, you will see an output direcotry `output.zarr`. An example to visualize the output is provided in `driver/examples/plot_output.py`. See the [driver example](driver/examples/README.md) section for more details. +### Environment variable configuration + +- `PACE_CONSTANTS`: Pace is bundled with various constants (see _util/pace/util/constants.py_). + - `FV3DYCORE` NOAA's FV3 dynamical core constants (original port) + - `GFS` Constant as defined in NOAA GFS + - `GEOS` Constant as defined in GEOS v13 +- `PACE_FLOAT_PRECISION`: default precision of the field & scalars in the numerics. Default to 64. +- `PACE_LOGLEVEL`: logging level to display (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default to INFO. + ## Quickstart - Docker + ### Build While it is possible to install and build pace bare-metal, we can ensure all system libraries are installed with the correct versions by using a Docker container to test and develop pace. First, you will need to update the git submodules so that any dependencies are cloned and at the correct version: + ```shell git submodule update --init --recursive ``` Then build the `pace` docker image at the top level. + ```shell make build ``` + ### Run ```shell @@ -94,7 +113,6 @@ This git repository is laid out as a mono-repo, containing multiple independent ![Graph of interdependencies of Pace modules, generated from dependences.dot](./dependencies.svg) - ## ML emulation An example of integration of an ML model replacing the microphysics parametrization is available on the `feature/microphysics-emulator` branch. diff --git a/driver/pace/driver/run.py b/driver/pace/driver/run.py index 2d9160cd..e6c39982 100644 --- a/driver/pace/driver/run.py +++ b/driver/pace/driver/run.py @@ -1,58 +1,15 @@ import dataclasses import gc -import logging from typing import Optional import click import yaml -from pace.util.mpi import MPI +from pace.util import pace_log, AVAILABLE_LOG_LEVELS from .driver import Driver, DriverConfig -logger = logging.getLogger(__name__) - - -log_levels = { - "info": logging.INFO, - "debug": logging.DEBUG, - "warning": logging.WARNING, - "error": logging.ERROR, - "critical": logging.CRITICAL, -} - - -def configure_logging(log_rank: Optional[int], log_level: str): - """ - Configure logging for the driver. - - Args: - log_rank: rank to log from, or 'all' to log to all ranks, - forced to 'all' if running without MPI - log_level: log level to use - """ - level = log_levels[log_level.lower()] - if MPI is None: - logging.basicConfig( - level=level, - format="%(asctime)s [%(levelname)s] %(name)s:%(message)s", - handlers=[logging.StreamHandler()], - datefmt="%Y-%m-%d %H:%M:%S", - ) - else: - if log_rank is None or int(log_rank) == MPI.COMM_WORLD.Get_rank(): - logging.basicConfig( - level=level, - format=( - f"%(asctime)s [%(levelname)s] (rank {MPI.COMM_WORLD.Get_rank()}) " - "%(name)s:%(message)s" - ), - handlers=[logging.StreamHandler()], - datefmt="%Y-%m-%d %H:%M:%S", - ) - - @click.command() @click.argument( "CONFIG_PATH", @@ -75,12 +32,15 @@ def command_line(config_path: str, log_rank: Optional[int], log_level: str): CONFIG_PATH is the path to a DriverConfig yaml file. """ - configure_logging(log_rank=log_rank, log_level=log_level) - logger.info("loading DriverConfig from yaml") + level = AVAILABLE_LOG_LEVELS[log_level.lower()] + pace_log.setLevel(level) + pace_log.info("loading DriverConfig from yaml") with open(config_path, "r") as f: config = yaml.safe_load(f) driver_config = DriverConfig.from_dict(config) - logging.info(f"DriverConfig loaded: {yaml.dump(dataclasses.asdict(driver_config))}") + pace_log.info( + f"DriverConfig loaded: {yaml.dump(dataclasses.asdict(driver_config))}" + ) main(driver_config=driver_config) diff --git a/dsl/pace/dsl/dace/utils.py b/dsl/pace/dsl/dace/utils.py index 47eb61ad..40ac3c12 100644 --- a/dsl/pace/dsl/dace/utils.py +++ b/dsl/pace/dsl/dace/utils.py @@ -28,7 +28,7 @@ def __init__(self, config: DaceConfig, label: str): @classmethod def log(cls, prefix: str, message: str): - pace_log.info(f"{prefix} {message}") + pace_log.debug(f"{prefix} {message}") @classmethod def default_prefix(cls, config: DaceConfig) -> str: diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 4fc34052..2835e77e 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -132,9 +132,12 @@ def __init__( self._allocate_output_dir() pace_log.info( - "GEOS-Wrapper with: \n" + "Pace GEOS wrapper initialized: \n" f" dt : {self.dycore_state.bdt}\n" f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" + f" backend: {backend}\n" + f" orchestration: {self._is_orchestrated}\n" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})" ) def _critical_path(self): @@ -173,7 +176,6 @@ def __call__( cyd: np.ndarray, diss_estd: np.ndarray, ) -> Tuple[Dict[str, np.ndarray], Dict[str, List[float]]]: - with self.perf_collector.timestep_timer.clock("numpy-to-dycore"): self.dycore_state = self._put_fortran_data_in_dycore( u, @@ -246,7 +248,6 @@ def _put_fortran_data_in_dycore( cyd: np.ndarray, diss_estd: np.ndarray, ) -> fv3core.DycoreState: - isc = self._grid_indexing.isc jsc = self._grid_indexing.jsc iec = self._grid_indexing.iec + 1 @@ -315,7 +316,6 @@ def _put_fortran_data_in_dycore( return state def _prep_outputs_for_geos(self) -> Dict[str, np.ndarray]: - output_dict = self.output_dict isc = self._grid_indexing.isc jsc = self._grid_indexing.jsc diff --git a/fv3core/pace/fv3core/stencils/d_sw.py b/fv3core/pace/fv3core/stencils/d_sw.py index cc602f4e..51c9ee6e 100644 --- a/fv3core/pace/fv3core/stencils/d_sw.py +++ b/fv3core/pace/fv3core/stencils/d_sw.py @@ -932,7 +932,7 @@ def make_quantity(): ) ) - if (self._d_con > 1.e-5) or (self._do_stochastic_ke_backscatter): + if (self._d_con > 1.0e-5) or (self._do_stochastic_ke_backscatter): self._accumulate_heat_source_and_dissipation_estimate_stencil = ( stencil_factory.from_dims_halo( func=accumulate_heat_source_and_dissipation_estimate, @@ -1254,11 +1254,11 @@ def __call__( self._column_namelist["d_con"], ) - if (self._d_con > 1.e-5) or (self._do_stochastic_ke_backscatter): + if (self._d_con > 1.0e-5) or (self._do_stochastic_ke_backscatter): self._accumulate_heat_source_and_dissipation_estimate_stencil( self._tmp_heat_s, heat_source, self._tmp_diss_e, diss_est ) - + self._update_u_and_v_stencil( self._tmp_ut, self._tmp_vt, diff --git a/fv3core/pace/fv3core/stencils/fv_dynamics.py b/fv3core/pace/fv3core/stencils/fv_dynamics.py index 3299e501..5f3de73a 100644 --- a/fv3core/pace/fv3core/stencils/fv_dynamics.py +++ b/fv3core/pace/fv3core/stencils/fv_dynamics.py @@ -7,7 +7,6 @@ import pace.dsl.gt4py_utils as utils import pace.fv3core.stencils.moist_cv as moist_cv import pace.util -import pace.util.constants as constants from pace.dsl.dace.orchestration import dace_inhibitor, orchestrate from pace.dsl.dace.wrapped_halo_exchange import WrappedHaloUpdater from pace.dsl.stencil import StencilFactory @@ -21,7 +20,7 @@ from pace.fv3core.stencils.neg_adj3 import AdjustNegativeTracerMixingRatio from pace.fv3core.stencils.remapping import LagrangianToEulerian from pace.stencils.c2l_ord import CubedToLatLon -from pace.util import X_DIM, Y_DIM, Z_INTERFACE_DIM, Timer +from pace.util import X_DIM, Y_DIM, Z_INTERFACE_DIM, Timer, constants from pace.util.grid import DampingCoefficients, GridData from pace.util.logging import pace_log from pace.util.mpi import MPI diff --git a/tests/main/fv3core/test_init_from_geos.py b/tests/main/fv3core/test_init_from_geos.py index 252fe7d2..16efe9e9 100644 --- a/tests/main/fv3core/test_init_from_geos.py +++ b/tests/main/fv3core/test_init_from_geos.py @@ -7,7 +7,6 @@ def test_geos_wrapper(): - namelist_dict = { "stencil_config": { "compilation_config": { @@ -82,7 +81,12 @@ def test_geos_wrapper(): comm = NullComm(rank=0, total_ranks=6, fill_value=0.0) backend = "numpy" - wrapper = fv3core.GeosDycoreWrapper(namelist, comm, backend) + wrapper = fv3core.GeosDycoreWrapper( + namelist=namelist, + comm=comm, + backend=backend, + bdt=namelist_dict["dt_atmos"], + ) nhalo = 3 shape_centered = ( namelist["nx_tile"] + 2 * nhalo, @@ -191,31 +195,33 @@ def test_geos_wrapper(): ) diss_estd = np.ones(shape_centered) - output_dict = wrapper( - u, - v, - w, - delz, - pt, - delp, - q, - ps, - pe, - pk, - peln, - pkz, - phis, - q_con, - omga, - ua, - va, - uc, - vc, - mfxd, - mfyd, - cxd, - cyd, - diss_estd, + timings = {} + output_dict, timings = wrapper( + timings=timings, + u=u, + v=v, + w=w, + delz=delz, + pt=pt, + delp=delp, + q=q, + ps=ps, + pe=pe, + pk=pk, + peln=peln, + pkz=pkz, + phis=phis, + q_con=q_con, + omga=omga, + ua=ua, + va=va, + uc=uc, + vc=vc, + mfxd=mfxd, + mfyd=mfyd, + cxd=cxd, + cyd=cyd, + diss_estd=diss_estd, ) assert isinstance(output_dict["u"], np.ndarray) diff --git a/util/HISTORY.md b/util/HISTORY.md index 26aac41c..0b0a42b6 100644 --- a/util/HISTORY.md +++ b/util/HISTORY.md @@ -7,6 +7,7 @@ latest - Added `dx_const`, `dy_const`, `deglat`, and `u_max` namelist settings for doubly-periodic grids - Added `dx_const`, `dy_const`, and `deglat` to grid generation code for doubly-periodic grids - Added f32 support to halo exchange data transformation +- Use one single logger, from logging.py v0.10.0 ------- diff --git a/util/pace/util/__init__.py b/util/pace/util/__init__.py index 4911f2cf..54f684d6 100644 --- a/util/pace/util/__init__.py +++ b/util/pace/util/__init__.py @@ -54,7 +54,7 @@ from .initialization import GridSizer, QuantityFactory, SubtileGridSizer from .io import read_state, write_state from .local_comm import LocalComm -from .logging import pace_log +from .logging import pace_log, AVAILABLE_LOG_LEVELS from .monitor import Monitor, NetCDFMonitor, ZarrMonitor from .mpi import MPIComm from .namelist import Namelist, NamelistDefaults diff --git a/util/pace/util/communicator.py b/util/pace/util/communicator.py index 0611fa98..938469bd 100644 --- a/util/pace/util/communicator.py +++ b/util/pace/util/communicator.py @@ -1,5 +1,4 @@ import abc -import logging from typing import List, Mapping, Optional, Sequence, Tuple, Union, cast import numpy as np @@ -15,8 +14,6 @@ from .utils import device_synchronize -logger = logging.getLogger("pace.util") - try: import cupy except ImportError: diff --git a/util/pace/util/local_comm.py b/util/pace/util/local_comm.py index a289296a..32fd0fb4 100644 --- a/util/pace/util/local_comm.py +++ b/util/pace/util/local_comm.py @@ -1,14 +1,11 @@ import copy -import logging from typing import Any from .comm import Comm +from .logging import pace_log from .utils import ensure_contiguous, safe_assign_array -logger = logging.getLogger("pace.util") - - class ConcurrencyError(Exception): """Exception to denote that a rank cannot proceed because it is waiting on a call from another rank.""" @@ -104,7 +101,7 @@ def bcast(self, value, root=0): "the bcast source" ) value = self._get_buffer("bcast", value) - logger.debug(f"bcast {value} to rank {self.rank}") + pace_log.debug(f"bcast {value} to rank {self.rank}") return value def Barrier(self): diff --git a/util/pace/util/logging.py b/util/pace/util/logging.py index 81b5a7b1..1f9142fe 100644 --- a/util/pace/util/logging.py +++ b/util/pace/util/logging.py @@ -1,15 +1,29 @@ import logging +import os import sys from mpi4py import MPI +LOGLEVEL = os.environ.get("PACE_LOGLEVEL", "INFO").upper() + +# Python log levels are hierarchical, therefore setting INFO +# means DEBUG and everything lower will be logged. +AVAILABLE_LOG_LEVELS = { + "info": logging.INFO, + "debug": logging.DEBUG, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, +} + + def _pace_logger(): name_log = logging.getLogger(__name__) - name_log.setLevel(logging.DEBUG) + name_log.setLevel(LOGLEVEL) handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.DEBUG) + handler.setLevel(LOGLEVEL) formatter = logging.Formatter( fmt=( f"%(asctime)s|%(levelname)s|rank {MPI.COMM_WORLD.Get_rank()}|" diff --git a/util/pace/util/monitor/netcdf_monitor.py b/util/pace/util/monitor/netcdf_monitor.py index 18687216..0b39da60 100644 --- a/util/pace/util/monitor/netcdf_monitor.py +++ b/util/pace/util/monitor/netcdf_monitor.py @@ -1,4 +1,3 @@ -import logging import os from pathlib import Path from typing import Any, Dict, List, Optional, Set @@ -10,13 +9,11 @@ from .. import _xarray as xr from ..filesystem import get_fs +from ..logging import pace_log from ..quantity import Quantity from .convert import to_numpy -logger = logging.getLogger(__name__) - - class _TimeChunkedVariable: def __init__(self, initial: Quantity, time_chunk_size: int): self._data = np.zeros( @@ -46,7 +43,6 @@ def data(self) -> Quantity: class _ChunkedNetCDFWriter: - FILENAME_FORMAT = "state_{chunk:04d}_tile{tile}.nc" def __init__( @@ -62,7 +58,7 @@ def __init__( self._time_units: Optional[str] = None def append(self, state): - logger.debug("appending at time %d", self._i_time) + pace_log.debug("appending at time %d", self._i_time) state = {**state} # copy so we don't mutate the input time = state.pop("time", None) if self._chunked is None: @@ -75,7 +71,7 @@ def append(self, state): self._chunked[name].append(quantity) self._times.append(time) if (self._i_time + 1) % self._time_chunk_size == 0: - logger.debug("flushing on append at time %d", self._i_time) + pace_log.debug("flushing on append at time %d", self._i_time) self.flush() self._i_time += 1 diff --git a/util/pace/util/monitor/zarr_monitor.py b/util/pace/util/monitor/zarr_monitor.py index 12186799..5d7729b9 100644 --- a/util/pace/util/monitor/zarr_monitor.py +++ b/util/pace/util/monitor/zarr_monitor.py @@ -1,4 +1,3 @@ -import logging from datetime import datetime, timedelta from typing import List, Tuple, Union @@ -7,12 +6,11 @@ from .. import _xarray as xr from .. import constants, utils from .._optional_imports import cupy, zarr +from ..logging import pace_log from ..partitioner import Partitioner, subtile_slice from .convert import to_numpy -logger = logging.getLogger("pace.util") - __all__ = ["ZarrMonitor"] @@ -238,7 +236,7 @@ def append(self, quantity): ) from_slice = _get_from_slice(target_slice) - logger.debug( + pace_log.debug( f"assigning data from subtile slice {from_slice} to " f"target slice {target_slice}" ) @@ -310,7 +308,7 @@ def append(self, quantity): ) from_slice = _get_from_slice(target_slice) - logger.debug( + pace_log.debug( f"assigning data from subtile slice {from_slice} to " f"target slice {target_slice}" ) @@ -332,7 +330,6 @@ def append(self, quantity): class _ZarrTimeWriter(_ZarrVariableWriter): - _TIME_CHUNK_SIZE = 1024 def __init__(self, *args, **kwargs): diff --git a/util/pace/util/mpi.py b/util/pace/util/mpi.py index d03b7937..5acc2b00 100644 --- a/util/pace/util/mpi.py +++ b/util/pace/util/mpi.py @@ -2,16 +2,14 @@ from mpi4py import MPI except ImportError: MPI = None -import logging from typing import List, Optional, TypeVar, cast from .comm import Comm, Request +from .logging import pace_log T = TypeVar("T") -logger = logging.getLogger(__name__) - class MPIComm(Comm): def __init__(self): @@ -26,54 +24,56 @@ def Get_size(self) -> int: return self._comm.Get_size() def bcast(self, value: Optional[T], root=0) -> T: - logger.debug("bcast from root %s on rank %s", root, self._comm.Get_rank()) + pace_log.debug("bcast from root %s on rank %s", root, self._comm.Get_rank()) return self._comm.bcast(value, root=root) def barrier(self): - logger.debug("barrier on rank %s", self._comm.Get_rank()) + pace_log.debug("barrier on rank %s", self._comm.Get_rank()) self._comm.barrier() def Barrier(self): pass def Scatter(self, sendbuf, recvbuf, root=0, **kwargs): - logger.debug("Scatter on rank %s with root %s", self._comm.Get_rank(), root) + pace_log.debug("Scatter on rank %s with root %s", self._comm.Get_rank(), root) self._comm.Scatter(sendbuf, recvbuf, root=root, **kwargs) def Gather(self, sendbuf, recvbuf, root=0, **kwargs): - logger.debug("Gather on rank %s with root %s", self._comm.Get_rank(), root) + pace_log.debug("Gather on rank %s with root %s", self._comm.Get_rank(), root) self._comm.Gather(sendbuf, recvbuf, root=root, **kwargs) def allgather(self, sendobj: T) -> List[T]: - logger.debug("allgather on rank %s", self._comm.Get_rank()) + pace_log.debug("allgather on rank %s", self._comm.Get_rank()) return self._comm.allgather(sendobj) def Send(self, sendbuf, dest, tag: int = 0, **kwargs): - logger.debug("Send on rank %s with dest %s", self._comm.Get_rank(), dest) + pace_log.debug("Send on rank %s with dest %s", self._comm.Get_rank(), dest) self._comm.Send(sendbuf, dest, tag=tag, **kwargs) def sendrecv(self, sendbuf, dest, **kwargs): - logger.debug("sendrecv on rank %s with dest %s", self._comm.Get_rank(), dest) + pace_log.debug("sendrecv on rank %s with dest %s", self._comm.Get_rank(), dest) return self._comm.sendrecv(sendbuf, dest, **kwargs) def Isend(self, sendbuf, dest, tag: int = 0, **kwargs) -> Request: - logger.debug("Isend on rank %s with dest %s", self._comm.Get_rank(), dest) + pace_log.debug("Isend on rank %s with dest %s", self._comm.Get_rank(), dest) return self._comm.Isend(sendbuf, dest, tag=tag, **kwargs) def Recv(self, recvbuf, source, tag: int = 0, **kwargs): - logger.debug("Recv on rank %s with source %s", self._comm.Get_rank(), source) + pace_log.debug("Recv on rank %s with source %s", self._comm.Get_rank(), source) self._comm.Recv(recvbuf, source, tag=tag, **kwargs) def Irecv(self, recvbuf, source, tag: int = 0, **kwargs) -> Request: - logger.debug("Irecv on rank %s with source %s", self._comm.Get_rank(), source) + pace_log.debug("Irecv on rank %s with source %s", self._comm.Get_rank(), source) return self._comm.Irecv(recvbuf, source, tag=tag, **kwargs) def Split(self, color, key) -> "Comm": - logger.debug( + pace_log.debug( "Split on rank %s with color %s, key %s", self._comm.Get_rank(), color, key ) return self._comm.Split(color, key) def allreduce(self, sendobj: T, op=None) -> T: - logger.debug("allreduce on rank %s with operator %s", self._comm.Get_rank(), op) + pace_log.debug( + "allreduce on rank %s with operator %s", self._comm.Get_rank(), op + ) return self._comm.allreduce(sendobj, op) From b1ef6b56f1f041c9e8f8478cd218f807122d6ea5 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 11 Sep 2023 13:20:49 -0400 Subject: [PATCH 3/3] [NASA:Update] Distributed dace cache (rework) (#16) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwidth Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustment threshold to const * Remove unused if leading to empty code block * Add guard for bdt==0 * Fix theroritical timings * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Fix bad requirements syntax * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Replace all logger with pace_log Introduce PACE_LOGLEVEL to control log level from outside * Code guidelines clean up * Devops/GitHub actions on (#15) * Add openmpi to the image * Fix unit tests (remove dxa, dya rely on halo ex) * Distributed compilation on orchestrated backend for NxN layouts (#14) * Adapt orchestration distribute compile for NxN layout * Add a more descriptive string base postfix for cache naming Identify the code path for all cases Consistent reload post-compile Create a central space for all caches generation logic No more original layout check required * Add a test on caches relocatability * Deactivate relocatability test due to Python crash Logged as issue 16 * Raise for 1,X and X,1 layouts which requires a new descriptor * Added ak, bk for 137 levels in eta.py * Add floating point precision to GEOS bridge init * Log info GEOS bridge (#18) * Add floating point precision to GEOS bridge init * Update geos/develop to grab NOAA PR9 results (#21) * Verbose choice of block/grid size * GEOS integration (#9) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustement threshold to const * Remove unused if leading to empty code block * Restrict dace to 0.14.1 due to a parsing bug * Add guard for bdt==0 Fix bad merge for bdt with GEOS_Wrapper * Remove unused code * Fix theroritical timings * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Set default cache path for orchestrated DaCe to respect GT_CACHE_* env * Remove previous per stencil override of default_build_folder * Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" * Revert "Remove previous per stencil override of default_build_folder" * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Fix bad requirements syntax * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Fix or explain inlined import * Verbose runtime error when bad dt_atmos * Verbose warm up * re-initialize heat_source and diss_est each call, add do_skeb check to accumulation --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Oliver Elbert --------- Co-authored-by: Rusty Benson <6594772+bensonr@users.noreply.github.com> Co-authored-by: Oliver Elbert Co-authored-by: Purnendu Chakraborty Co-authored-by: Oliver Elbert * [NOAA:Update] Bring back #15 & doubly periodic domain (#25) * Feature/dp driver (#13) * initial commit * adding test config * adding the rest of driver and util code * updating history.md * move u_max to dycore config * uncomment assert * added comment explaining the copy of grid type to dycore config * Turn main unit test & lint on PR, logger clean up [NASA:Update] (#15) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustement threshold to const * Restrict dace to 0.14.1 due to a parsing bug * Add guard for bdt==0 * Fix theroritical timings * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Set default cache path for orchestrated DaCe to respect GT_CACHE_* env * Remove previous per stencil override of default_build_folder * Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Introduce PACE_LOGLEVEL to control log level from outside * Devops/GitHub actions on (#15) * Update python to available 3.8.12 * Fix unit tests (remove dxa, dya rely on halo ex) * Update HISTORY.md * Adapt log_level in driver.run * Verbose the PACE_CONSTANTS * Doc log level hierarchical nature --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Purnendu Chakraborty * Fix non-deterministic temporaries by using `zeros` everywhere instead of `empty` * Update dsl/pace/dsl/caches/codepath.py Co-authored-by: Oliver Elbert * Refactor the test to go around so reload bug --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Purnendu Chakraborty Co-authored-by: Rusty Benson <6594772+bensonr@users.noreply.github.com> Co-authored-by: Oliver Elbert Co-authored-by: Oliver Elbert --- .github/workflows/main_unit_tests.yml | 4 +- driver/pace/driver/driver.py | 2 +- driver/pace/driver/grid.py | 4 +- driver/pace/driver/initialization.py | 5 +- driver/pace/driver/run.py | 2 +- dsl/pace/dsl/caches/cache_location.py | 46 +++ dsl/pace/dsl/caches/codepath.py | 33 ++ dsl/pace/dsl/dace/build.py | 136 ++------ dsl/pace/dsl/dace/dace_config.py | 118 ++++++- dsl/pace/dsl/dace/orchestration.py | 52 +--- dsl/pace/dsl/typing.py | 6 +- .../fv3core/initialization/geos_wrapper.py | 28 +- .../pace/fv3core/testing/translate_dyncore.py | 4 +- .../translate/translate_remapping.py | 2 +- requirements_dev.txt | 2 +- stencils/pace/stencils/testing/grid.py | 2 +- .../stencils/testing/parallel_translate.py | 3 +- stencils/pace/stencils/testing/temporaries.py | 7 +- tests/main/dsl/test_caches.py | 176 +++++++++++ tests/main/dsl/test_dace_config.py | 68 +++- tests/main/test_grid_init.py | 2 - util/pace/util/__init__.py | 2 +- util/pace/util/communicator.py | 10 +- util/pace/util/grid/eta.py | 293 +++++++++++++++++- util/pace/util/grid/generation.py | 5 +- util/pace/util/grid/gnomonic.py | 6 +- util/pace/util/grid/helper.py | 6 +- util/pace/util/halo_data_transformer.py | 4 +- util/pace/util/initialization/allocator.py | 2 +- 29 files changed, 817 insertions(+), 213 deletions(-) create mode 100644 dsl/pace/dsl/caches/cache_location.py create mode 100644 dsl/pace/dsl/caches/codepath.py create mode 100644 tests/main/dsl/test_caches.py diff --git a/.github/workflows/main_unit_tests.yml b/.github/workflows/main_unit_tests.yml index 5dbf4a1f..5800baa6 100644 --- a/.github/workflows/main_unit_tests.yml +++ b/.github/workflows/main_unit_tests.yml @@ -15,9 +15,9 @@ jobs: uses: actions/setup-python@v4.6.0 with: python-version: '3.8.12' - - name: Install OpenMPI for gt4py + - name: Install OpenMPI & Boost for gt4py run: | - sudo apt-get install libopenmpi-dev + sudo apt-get install libopenmpi-dev libboost1.74-dev - name: Install Python packages run: | python -m pip install --upgrade pip diff --git a/driver/pace/driver/driver.py b/driver/pace/driver/driver.py index 0ef3263b..284acaca 100644 --- a/driver/pace/driver/driver.py +++ b/driver/pace/driver/driver.py @@ -275,7 +275,7 @@ def from_dict(cls, kwargs: Dict[str, Any]) -> "DriverConfig": ) grid_type = kwargs["grid_config"].config.grid_type # Copy grid_type to the DycoreConfig if it's not the default value - if grid_type != 0: + if grid_type != 0: kwargs["dycore_config"].grid_type = grid_type if ( diff --git a/driver/pace/driver/grid.py b/driver/pace/driver/grid.py index c184d566..9fa97a06 100644 --- a/driver/pace/driver/grid.py +++ b/driver/pace/driver/grid.py @@ -105,7 +105,6 @@ def get_grid( quantity_factory: QuantityFactory, communicator: CubedSphereCommunicator, ) -> Tuple[DampingCoefficients, DriverGridData, GridData]: - metric_terms = MetricTerms( quantity_factory=quantity_factory, communicator=communicator, @@ -184,8 +183,7 @@ def get_grid( quantity_factory: QuantityFactory, communicator: CubedSphereCommunicator, ) -> Tuple[DampingCoefficients, DriverGridData, GridData]: - - backend = quantity_factory.empty( + backend = quantity_factory.zeros( dims=[pace.util.X_DIM, pace.util.Y_DIM], units="unknown" ).gt4py_backend diff --git a/driver/pace/driver/initialization.py b/driver/pace/driver/initialization.py index 2b6471a8..bd6d96ea 100644 --- a/driver/pace/driver/initialization.py +++ b/driver/pace/driver/initialization.py @@ -154,7 +154,6 @@ def get_driver_state( driver_grid_data: pace.util.grid.DriverGridData, grid_data: pace.util.grid.GridData, ) -> DriverState: - dycore_state = tc_init.init_tc_state( grid_data=grid_data, quantity_factory=quantity_factory, @@ -323,7 +322,7 @@ def get_driver_state( driver_grid_data: pace.util.grid.DriverGridData, grid_data: pace.util.grid.GridData, ) -> DriverState: - backend = quantity_factory.empty( + backend = quantity_factory.zeros( dims=[pace.util.X_DIM, pace.util.Y_DIM], units="unknown" ).gt4py_backend @@ -348,7 +347,6 @@ def _initialize_dycore_state( communicator: pace.util.CubedSphereCommunicator, backend: str, ) -> fv3core.DycoreState: - grid = self._get_serialized_grid(communicator=communicator, backend=backend) ser = self._serializer(communicator) @@ -401,7 +399,6 @@ def get_driver_state( driver_grid_data: pace.util.grid.DriverGridData, grid_data: pace.util.grid.GridData, ) -> DriverState: - return DriverState( dycore_state=self.dycore_state, physics_state=self.physics_state, diff --git a/driver/pace/driver/run.py b/driver/pace/driver/run.py index e6c39982..df70eb14 100644 --- a/driver/pace/driver/run.py +++ b/driver/pace/driver/run.py @@ -5,7 +5,7 @@ import click import yaml -from pace.util import pace_log, AVAILABLE_LOG_LEVELS +from pace.util import AVAILABLE_LOG_LEVELS, pace_log from .driver import Driver, DriverConfig diff --git a/dsl/pace/dsl/caches/cache_location.py b/dsl/pace/dsl/caches/cache_location.py new file mode 100644 index 00000000..ab57a60b --- /dev/null +++ b/dsl/pace/dsl/caches/cache_location.py @@ -0,0 +1,46 @@ +from pace.dsl.caches.codepath import FV3CodePath +from pace.util import CubedSpherePartitioner + + +def identify_code_path( + rank: int, + partitioner: CubedSpherePartitioner, +) -> FV3CodePath: + if partitioner.layout == (1, 1) or partitioner.layout == [1, 1]: + return FV3CodePath.All + elif partitioner.layout[0] == 1 or partitioner.layout[1] == 1: + raise NotImplementedError( + f"Build for layout {partitioner.layout} is not handled" + ) + else: + if partitioner.tile.on_tile_bottom(rank): + if partitioner.tile.on_tile_left(rank): + return FV3CodePath.BottomLeft + if partitioner.tile.on_tile_right(rank): + return FV3CodePath.BottomRight + else: + return FV3CodePath.Bottom + if partitioner.tile.on_tile_top(rank): + if partitioner.tile.on_tile_left(rank): + return FV3CodePath.TopLeft + if partitioner.tile.on_tile_right(rank): + return FV3CodePath.TopRight + else: + return FV3CodePath.Top + else: + if partitioner.tile.on_tile_left(rank): + return FV3CodePath.Left + if partitioner.tile.on_tile_right(rank): + return FV3CodePath.Right + else: + return FV3CodePath.Center + + +def get_cache_fullpath(code_path: FV3CodePath) -> str: + from gt4py.cartesian import config as gt_config + + return f"{gt_config.cache_settings['root_path']}/.gt_cache_{code_path}" + + +def get_cache_directory(code_path: FV3CodePath) -> str: + return f".gt_cache_{code_path}" diff --git a/dsl/pace/dsl/caches/codepath.py b/dsl/pace/dsl/caches/codepath.py new file mode 100644 index 00000000..8ebf9492 --- /dev/null +++ b/dsl/pace/dsl/caches/codepath.py @@ -0,0 +1,33 @@ +import enum + + +class FV3CodePath(enum.Enum): + """Enum listing all possible code paths on a cube sphere. + For any layout the cube sphere has up to 9 different code paths depending on + the positioning of the rank on the tile and which of the edge/corner cases + it has to handle, as well as the possibility for all boundary computations in + the 1x1 layout case. + Since the framework inlines code to optimize, we _cannot_ pre-suppose which code + being kept and/or ejected. This enum serves as the ground truth to map rank to + the proper generated code. + """ + + All = "FV3_A" + BottomLeft = "FV3_BL" + Left = "FV3_L" + TopLeft = "FV3_TL" + Top = "FV3_T" + TopRight = "FV3_TR" + Right = "FV3_R" + BottomRight = "FV3_BR" + Bottom = "FV3_B" + Center = "FV3_C" + + def __str__(self): + return self.value + + def __repr__(self): + return self.value + + def __format__(self, format_spec: str) -> str: + return self.value diff --git a/dsl/pace/dsl/dace/build.py b/dsl/pace/dsl/dace/build.py index 7d8f3db2..b134f569 100644 --- a/dsl/pace/dsl/dace/build.py +++ b/dsl/pace/dsl/dace/build.py @@ -1,9 +1,9 @@ from typing import List, Optional, Tuple -from warnings import warn from dace.sdfg import SDFG import pace.util +from pace.dsl.caches.cache_location import get_cache_directory, get_cache_fullpath from pace.dsl.dace.dace_config import DaceConfig, DaCeOrchestration @@ -11,19 +11,6 @@ # Distributed compilation -def determine_compiling_ranks(config: DaceConfig) -> bool: - is_compiling = False - rank = config.my_rank - size = config.rank_size - - if int(size / 6) == 0: - is_compiling = True - elif rank % int(size / 6) == rank: - is_compiling = True - - return is_compiling - - def unblock_waiting_tiles(comm, sdfg_path: str) -> None: if comm and comm.Get_size() > 1: for tile in range(1, 6): @@ -31,48 +18,6 @@ def unblock_waiting_tiles(comm, sdfg_path: str) -> None: comm.send(sdfg_path, dest=tile * tilesize + comm.Get_rank()) -def get_target_rank(rank: int, partitioner: pace.util.CubedSpherePartitioner): - """From my rank & the current partitioner we determine which - rank we should read from. - For all layout >= 3,3 this presumes build has been done on a - 3,3 layout.""" - if partitioner.layout == (1, 1): - return 0 - if partitioner.layout == (2, 2): - if partitioner.tile.on_tile_bottom(rank): - if partitioner.tile.on_tile_left(rank): - return 0 # "00" - if partitioner.tile.on_tile_right(rank): - return 1 # "10" - if partitioner.tile.on_tile_top(rank): - if partitioner.tile.on_tile_left(rank): - return 2 # "01" - if partitioner.tile.on_tile_right(rank): - return 3 # "11" - else: - if partitioner.tile.on_tile_bottom(rank): - if partitioner.tile.on_tile_left(rank): - return 0 # "00" - if partitioner.tile.on_tile_right(rank): - return 2 # "20" - else: - return 1 # "10" - if partitioner.tile.on_tile_top(rank): - if partitioner.tile.on_tile_left(rank): - return 6 # "02" - if partitioner.tile.on_tile_right(rank): - return 8 # "22" - else: - return 7 # "12" - else: - if partitioner.tile.on_tile_left(rank): - return 3 # "01" - if partitioner.tile.on_tile_right(rank): - return 5 # "21" - else: - return 4 # "11" - - def build_info_filepath() -> str: return "build_info.txt" @@ -101,7 +46,10 @@ def write_build_info( def get_sdfg_path( - daceprog_name: str, config: DaceConfig, sdfg_file_path: Optional[str] = None + daceprog_name: str, + config: DaceConfig, + sdfg_file_path: Optional[str] = None, + override_run_only=False, ) -> Optional[str]: """Build an SDFG path from the qualified program name or it's direct path to .sdfg @@ -113,7 +61,7 @@ def get_sdfg_path( # TODO: check DaceConfig for cache.strategy == name # Guarding against bad usage of this function - if config.get_orchestrate() != DaCeOrchestration.Run: + if not override_run_only and config.get_orchestrate() != DaCeOrchestration.Run: return None # Case of a .sdfg file given by the user to be compiled @@ -125,19 +73,8 @@ def get_sdfg_path( return sdfg_file_path # Case of loading a precompiled .so - lookup using GT_CACHE - from gt4py.cartesian import config as gt_config - - if config.rank_size > 1: - rank = config.my_rank - rank_str = f"_{config.target_rank:06d}" - else: - rank = 0 - rank_str = f"_{rank:06d}" - - sdfg_dir_path = ( - f"{gt_config.cache_settings['root_path']}" - f"/.gt_cache{rank_str}/dacecache/{daceprog_name}" - ) + cache_fullpath = get_cache_fullpath(config.code_path) + sdfg_dir_path = f"{cache_fullpath}/dacecache/{daceprog_name}" if not os.path.isdir(sdfg_dir_path): raise RuntimeError(f"Precompiled SDFG is missing at {sdfg_dir_path}") @@ -153,23 +90,8 @@ def get_sdfg_path( raise RuntimeError( f"SDFG build for {build_backend}, {config._backend} has been asked" ) - # Check layout - build_layout = ast.literal_eval(build_info_file.readline()) - can_read = True - if config.layout == (1, 1) and config.layout != build_layout: - can_read = False - elif config.layout == (2, 2) and config.layout != build_layout: - can_read = False - elif ( - build_layout != (1, 1) and build_layout != (2, 2) and build_layout != (3, 3) - ): - can_read = False - if not can_read: - warn( - f"SDFG build for layout {build_layout}, " - f"cannot be run with current layout {config.layout}, bad layout?" - ) # Check resolution per tile + build_layout = ast.literal_eval(build_info_file.readline()) build_resolution = ast.literal_eval(build_info_file.readline()) if (config.tile_resolution[0] / config.layout[0]) != ( build_resolution[0] / build_layout[0] @@ -179,7 +101,7 @@ def get_sdfg_path( f"cannot be run with current resolution {config.tile_resolution}" ) - print(f"[DaCe Config] Rank {rank} loading SDFG {sdfg_dir_path}") + print(f"[DaCe Config] Rank {config.my_rank} loading SDFG {sdfg_dir_path}") return sdfg_dir_path @@ -189,33 +111,31 @@ def set_distributed_caches(config: "DaceConfig"): # Execute specific initialization per orchestration state orchestration_mode = config.get_orchestrate() + if orchestration_mode == DaCeOrchestration.Python: + return # Check that we have all the file we need to early out in case # of issues. if orchestration_mode == DaCeOrchestration.Run: import os - from gt4py.cartesian import config as gt_config - - # Check our cache exist - if config.rank_size > 1: - rank = config.my_rank - target_rank_str = f"_{config.target_rank:06d}" - else: - rank = 0 - target_rank_str = f"_{rank:06d}" - cache_filepath = ( - f"{gt_config.cache_settings['root_path']}/.gt_cache{target_rank_str}" - ) - if not os.path.exists(cache_filepath): + cache_directory = get_cache_fullpath(config.code_path) + if not os.path.exists(cache_directory): raise RuntimeError( f"{orchestration_mode} error: Could not find caches for rank " - f"{rank} at {cache_filepath}" + f"{config.my_rank} at {cache_directory}" ) - # All, good set this rank cache to the source cache - gt_config.cache_settings["dir_name"] = f".gt_cache{target_rank_str}" - print( - f"[{orchestration_mode}] Rank {rank} " - f"reading cache {gt_config.cache_settings['dir_name']}" - ) + # Set read/write caches to the target rank + from gt4py.cartesian import config as gt_config + + if config.do_compile: + verb = "reading/writing" + else: + verb = "reading" + + gt_config.cache_settings["dir_name"] = get_cache_directory(config.code_path) + pace.util.pace_log.critical( + f"[{orchestration_mode}] Rank {config.my_rank} " + f"{verb} cache {gt_config.cache_settings['dir_name']}" + ) diff --git a/dsl/pace/dsl/dace/dace_config.py b/dsl/pace/dsl/dace/dace_config.py index e6f3e7df..1bb0939e 100644 --- a/dsl/pace/dsl/dace/dace_config.py +++ b/dsl/pace/dsl/dace/dace_config.py @@ -1,13 +1,16 @@ import enum -from typing import Any, Dict, Optional +import os +from typing import Any, Dict, Optional, Tuple import dace.config from dace.codegen.compiled_sdfg import CompiledSDFG from dace.frontend.python.parser import DaceProgram +from pace.dsl.caches.cache_location import identify_code_path +from pace.dsl.caches.codepath import FV3CodePath from pace.dsl.gt4py_utils import is_gpu_backend from pace.util._optional_imports import cupy as cp -from pace.util.communicator import CubedSphereCommunicator +from pace.util.communicator import CubedSphereCommunicator, CubedSpherePartitioner # This can be turned on to revert compilation for orchestration @@ -16,6 +19,95 @@ DEACTIVATE_DISTRIBUTED_DACE_COMPILE = False +def _is_corner(rank: int, partitioner: CubedSpherePartitioner) -> bool: + if partitioner.tile.on_tile_bottom(rank): + if partitioner.tile.on_tile_left(rank): + return True + if partitioner.tile.on_tile_right(rank): + return True + if partitioner.tile.on_tile_top(rank): + if partitioner.tile.on_tile_left(rank): + return True + if partitioner.tile.on_tile_right(rank): + return True + return False + + +def _smallest_rank_bottom(x: int, y: int, layout: Tuple[int, int]): + return y == 0 and x == 1 + + +def _smallest_rank_top(x: int, y: int, layout: Tuple[int, int]): + return y == layout[1] - 1 and x == 1 + + +def _smallest_rank_left(x: int, y: int, layout: Tuple[int, int]): + return x == 0 and y == 1 + + +def _smallest_rank_right(x: int, y: int, layout: Tuple[int, int]): + return x == layout[0] - 1 and y == 1 + + +def _smallest_rank_middle(x: int, y: int, layout: Tuple[int, int]): + return layout[0] > 1 and layout[1] > 1 and x == 1 and y == 1 + + +def _determine_compiling_ranks( + config: "DaceConfig", + partitioner: CubedSpherePartitioner, +) -> bool: + """ + We try to map every layout to a 3x3 layout which MPI ranks + looks like + 6 7 8 + 3 4 5 + 0 1 2 + Using the partitionner we find mapping of the given layout + to all of those. For example on 4x4 layout + 12 13 14 15 + 8 9 10 11 + 4 5 6 7 + 0 1 2 3 + therefore we map + 0 -> 0 + 1 -> 1 + 2 -> NOT COMPILING + 3 -> 2 + 4 -> 3 + 5 -> 4 + 6 -> NOT COMPILING + 7 -> 5 + 8 -> NOT COMPILING + 9 -> NOT COMPILING + 10 -> NOT COMPILING + 11 -> NOT COMPILING + 12 -> 6 + 13 -> 7 + 14 -> NOT COMPILING + 15 -> 8 + """ + + # Tile 0 compiles + if partitioner.tile_index(config.my_rank) != 0: + return False + + # Corners compile + if _is_corner(config.my_rank, partitioner): + return True + + y, x = partitioner.tile.subtile_index(config.my_rank) + + # If edge or center tile, we give way to the smallest rank + return ( + _smallest_rank_left(x, y, config.layout) + or _smallest_rank_bottom(x, y, config.layout) + or _smallest_rank_middle(x, y, config.layout) + or _smallest_rank_right(x, y, config.layout) + or _smallest_rank_top(x, y, config.layout) + ) + + class DaCeOrchestration(enum.Enum): """ Orchestration mode for DaCe @@ -71,8 +163,6 @@ def __init__( # Temporary. This is a bit too out of the ordinary for the common user. # We should refactor the architecture to allow for a `gtc:orchestrated:dace:X` # backend that would signify both the `CPU|GPU` split and the orchestration mode - import os - if orchestration is None: fv3_dacemode_env_var = os.getenv("FV3_DACEMODE", "Python") # The below condition guard against defining empty FV3_DACEMODE and @@ -175,32 +265,30 @@ def __init__( # attempt to kill the dace.conf to avoid confusion if dace.config.Config._cfg_filename: try: - import os - os.remove(dace.config.Config._cfg_filename) except OSError: pass self._backend = backend self.tile_resolution = [tile_nx, tile_nx, tile_nz] - from pace.dsl.dace.build import get_target_rank, set_distributed_caches + from pace.dsl.dace.build import set_distributed_caches # Distributed build required info if communicator: self.my_rank = communicator.rank self.rank_size = communicator.comm.Get_size() - if DEACTIVATE_DISTRIBUTED_DACE_COMPILE: - self.target_rank = communicator.rank - else: - self.target_rank = get_target_rank( - self.my_rank, communicator.partitioner - ) + self.code_path = identify_code_path(self.my_rank, communicator.partitioner) self.layout = communicator.partitioner.layout + self.do_compile = ( + DEACTIVATE_DISTRIBUTED_DACE_COMPILE + or _determine_compiling_ranks(self, communicator.partitioner) + ) else: self.my_rank = 0 self.rank_size = 1 - self.target_rank = 0 + self.code_path = FV3CodePath.All self.layout = (1, 1) + self.do_compile = True set_distributed_caches(self) @@ -226,7 +314,7 @@ def get_orchestrate(self) -> DaCeOrchestration: return self._orchestrate def get_sync_debug(self) -> bool: - return dace.config.Config.get("compiler", "cuda", "syncdebug") + return dace.config.Config.get_bool("compiler", "cuda", "syncdebug") def as_dict(self) -> Dict[str, Any]: return { diff --git a/dsl/pace/dsl/dace/orchestration.py b/dsl/pace/dsl/dace/orchestration.py index 2bd9df5b..7858381a 100644 --- a/dsl/pace/dsl/dace/orchestration.py +++ b/dsl/pace/dsl/dace/orchestration.py @@ -11,12 +11,7 @@ from dace.transformation.auto.auto_optimize import make_transients_persistent from dace.transformation.helpers import get_parent_map -from pace.dsl.dace.build import ( - determine_compiling_ranks, - get_sdfg_path, - unblock_waiting_tiles, - write_build_info, -) +from pace.dsl.dace.build import get_sdfg_path, write_build_info from pace.dsl.dace.dace_config import ( DEACTIVATE_DISTRIBUTED_DACE_COMPILE, DaceConfig, @@ -34,6 +29,7 @@ memory_static_analysis, report_memory_static_analysis, ) +from pace.util import pace_log from pace.util.mpi import MPI @@ -122,7 +118,7 @@ def _build_sdfg( if DEACTIVATE_DISTRIBUTED_DACE_COMPILE: is_compiling = True else: - is_compiling = determine_compiling_ranks(config) + is_compiling = config.do_compile if is_compiling: # Make the transients array persistents if config.is_gpu_backend(): @@ -198,12 +194,13 @@ def _build_sdfg( ), ) - # Compilation done, either exit or scatter/gather and run + # Compilation done. + # On Build: all ranks sync, then exit. + # On BuildAndRun: all ranks sync, then load the SDFG from + # the expected path (made available by build). + # We use a "FrozenCompiledSDFG" to minimize re-entry cost at call time # DEV NOTE: we explicitly use MPI.COMM_WORLD here because it is # a true multi-machine sync, outside of our own communicator class. - # Also this code is protected in the case of running on one machine by the fact - # that 0 is _always_ a compiling rank & unblock_waiting_tiles is protected - # against scattering when no other ranks are present. if config.get_orchestrate() == DaCeOrchestration.Build: MPI.COMM_WORLD.Barrier() # Protect against early exist which kill SLURM jobs DaCeProgress.log( @@ -212,31 +209,16 @@ def _build_sdfg( ) exit(0) elif config.get_orchestrate() == DaCeOrchestration.BuildAndRun: - MPI.COMM_WORLD.Barrier() - if is_compiling: - if not DEACTIVATE_DISTRIBUTED_DACE_COMPILE: - unblock_waiting_tiles(MPI.COMM_WORLD, sdfg.build_folder) - DaCeProgress.log( - DaCeProgress.default_prefix(config), "Build folder exchanged." - ) - csdfg, _ = daceprog.load_precompiled_sdfg( - sdfg.build_folder, *args, **kwargs - ) - config.loaded_precompiled_SDFG[daceprog] = FrozenCompiledSDFG( - daceprog, csdfg, args, kwargs - ) - - else: - source_rank = config.target_rank - # wait for compilation to be done + if not is_compiling: DaCeProgress.log( DaCeProgress.default_prefix(config), - "Rank is not compiling. Waiting for build dir...", - ) - sdfg_path = MPI.COMM_WORLD.recv(source=source_rank) - DaCeProgress.log( - DaCeProgress.default_prefix(config), "Build dir received, loading .so." + "Rank is not compiling. " + "Waiting for compilation to end on all other ranks...", ) + MPI.COMM_WORLD.Barrier() + + with DaCeProgress(config, "Loading"): + sdfg_path = get_sdfg_path(daceprog.name, config, override_run_only=True) csdfg, _ = daceprog.load_precompiled_sdfg(sdfg_path, *args, **kwargs) config.loaded_precompiled_SDFG[daceprog] = FrozenCompiledSDFG( daceprog, csdfg, args, kwargs @@ -267,6 +249,7 @@ def _call_sdfg( config.get_orchestrate() == DaCeOrchestration.Build or config.get_orchestrate() == DaCeOrchestration.BuildAndRun ): + pace_log.info("Building DaCe orchestration") res = _build_sdfg(daceprog, sdfg, config, args, kwargs) elif config.get_orchestrate() == DaCeOrchestration.Run: # We should never hit this, it should be caught by the @@ -302,7 +285,7 @@ def _parse_sdfg( if DEACTIVATE_DISTRIBUTED_DACE_COMPILE: is_compiling = True else: - is_compiling = determine_compiling_ranks(config) + is_compiling = config.do_compile if not is_compiling: # We can not parse the SDFG since we will load the proper # compiled SDFG from the compiling rank @@ -448,7 +431,6 @@ def __get__(self, obj, objtype=None) -> SDFGEnabledCallable: """Return SDFGEnabledCallable wrapping original obj.method from cache. Update cache first if need be""" if (id(obj), id(self.func)) not in _LazyComputepathMethod.bound_callables: - _LazyComputepathMethod.bound_callables[ (id(obj), id(self.func)) ] = _LazyComputepathMethod.SDFGEnabledCallable(self, obj) diff --git a/dsl/pace/dsl/typing.py b/dsl/pace/dsl/typing.py index 05b255ce..d67dd7b6 100644 --- a/dsl/pace/dsl/typing.py +++ b/dsl/pace/dsl/typing.py @@ -22,11 +22,15 @@ DTypes = Union[bool, np.bool_, int, np.int32, np.int64, float, np.float32, np.float64] +def floating_point_precision() -> int: + return int(os.getenv("PACE_FLOAT_PRECISION", "64")) + + def global_set_floating_point_precision(): """Set the global floating point precision for all reference to Float in the codebase. Defaults to 64 bit.""" global Float - precision_in_bit = int(os.getenv("PACE_FLOAT_PRECISION", "64")) + precision_in_bit = floating_point_precision() if precision_in_bit == 64: return np.float64 elif precision_in_bit == 32: diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 2835e77e..a7a526ee 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -11,6 +11,8 @@ from pace.driver.performance.collector import PerformanceCollector from pace.dsl.dace import DaceConfig, orchestrate from pace.dsl.gt4py_utils import is_gpu_backend +from pace.dsl.typing import floating_point_precision +from pace.util._optional_imports import cupy as cp from pace.util.logging import pace_log @@ -131,13 +133,29 @@ def __init__( self.output_dict: Dict[str, np.ndarray] = {} self._allocate_output_dir() + # Feedback information + device_ordinal_info = ( + f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" + if is_gpu_backend(backend) + else "N/A" + ) + MPS_pipe_directory = os.getenv("CUDA_MPS_PIPE_DIRECTORY", None) + MPS_is_on = ( + MPS_pipe_directory + and is_gpu_backend(backend) + and os.path.exists(f"{MPS_pipe_directory}/log") + ) pace_log.info( "Pace GEOS wrapper initialized: \n" - f" dt : {self.dycore_state.bdt}\n" - f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" - f" backend: {backend}\n" - f" orchestration: {self._is_orchestrated}\n" - f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})" + f" dt : {self.dycore_state.bdt}\n" + f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" + f" backend : {backend}\n" + f" float : {floating_point_precision()}bit" + f" orchestration : {self._is_orchestrated}\n" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz}" + f"(halo: {sizer.n_halo})\n" + f" Device ord: {device_ordinal_info}\n" + f" Nvidia MPS : {MPS_is_on}" ) def _critical_path(self): diff --git a/fv3core/pace/fv3core/testing/translate_dyncore.py b/fv3core/pace/fv3core/testing/translate_dyncore.py index 510e299f..6c7da4b7 100644 --- a/fv3core/pace/fv3core/testing/translate_dyncore.py +++ b/fv3core/pace/fv3core/testing/translate_dyncore.py @@ -140,7 +140,7 @@ def compute_parallel(self, inputs, communicator): grid_data.ptop = inputs["ptop"] self._base.make_storage_data_input_vars(inputs) state = DycoreState.init_zeros(quantity_factory=self.grid.quantity_factory) - wsd: pace.util.Quantity = self.grid.quantity_factory.empty( + wsd: pace.util.Quantity = self.grid.quantity_factory.zeros( dims=[pace.util.X_DIM, pace.util.Y_DIM], units="unknown", ) @@ -152,7 +152,7 @@ def compute_parallel(self, inputs, communicator): state[name].data[selection] = value else: setattr(state, name, value) - phis: pace.util.Quantity = self.grid.quantity_factory.empty( + phis: pace.util.Quantity = self.grid.quantity_factory.zeros( dims=[pace.util.X_DIM, pace.util.Y_DIM], units="m", ) diff --git a/fv3core/tests/savepoint/translate/translate_remapping.py b/fv3core/tests/savepoint/translate/translate_remapping.py index fc9196b0..9a2e1f84 100644 --- a/fv3core/tests/savepoint/translate/translate_remapping.py +++ b/fv3core/tests/savepoint/translate/translate_remapping.py @@ -107,7 +107,7 @@ def compute_from_storage(self, inputs): inputs["wsd"] = wsd_2d inputs["q_cld"] = inputs["tracers"]["qcld"] inputs["last_step"] = bool(inputs["last_step"]) - pfull = self.grid.quantity_factory.empty([Z_DIM], units="Pa") + pfull = self.grid.quantity_factory.zeros([Z_DIM], units="Pa") pfull.data[:] = pfull.np.asarray(inputs.pop("pfull")) l_to_e_obj = LagrangianToEulerian( self.stencil_factory, diff --git a/requirements_dev.txt b/requirements_dev.txt index 052bf5c3..484c4948 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -10,8 +10,8 @@ zarr dask>=2021.10.0 netCDF4 cftime -fv3config>=0.9.0 dace==0.14.0 +fv3config>=0.9.0 f90nml>=1.1.0 numpy>=1.15 -e external/gt4py diff --git a/stencils/pace/stencils/testing/grid.py b/stencils/pace/stencils/testing/grid.py index 65ad8870..4cf623b1 100644 --- a/stencils/pace/stencils/testing/grid.py +++ b/stencils/pace/stencils/testing/grid.py @@ -504,7 +504,7 @@ def grid_data(self) -> "GridData": data = getattr(self, name) assert data is not None - quantity = self.quantity_factory.empty(dims=dims, units=units) + quantity = self.quantity_factory.zeros(dims=dims, units=units) if len(quantity.shape) == 3: quantity.data[:] = data[:, :, : quantity.shape[2]] elif len(quantity.shape) == 2: diff --git a/stencils/pace/stencils/testing/parallel_translate.py b/stencils/pace/stencils/testing/parallel_translate.py index dcc1e64d..f10b9b27 100644 --- a/stencils/pace/stencils/testing/parallel_translate.py +++ b/stencils/pace/stencils/testing/parallel_translate.py @@ -12,7 +12,6 @@ class ParallelTranslate: - max_error = TranslateFortranData2Py.max_error near_zero = TranslateFortranData2Py.near_zero compute_grid_option = False @@ -192,7 +191,7 @@ def state_from_inputs(self, inputs: dict, grid=None) -> dict: for name, properties in self.inputs.items(): standard_name = properties.get("name", name) if len(properties["dims"]) > 0: - state[standard_name] = grid.quantity_factory.empty( + state[standard_name] = grid.quantity_factory.zeros( properties["dims"], properties["units"], dtype=inputs[name].dtype ) input_slice = _serialize_slice( diff --git a/stencils/pace/stencils/testing/temporaries.py b/stencils/pace/stencils/testing/temporaries.py index 581387f6..2dd46663 100644 --- a/stencils/pace/stencils/testing/temporaries.py +++ b/stencils/pace/stencils/testing/temporaries.py @@ -40,10 +40,9 @@ def _assert_same_temporaries(dict1: dict, dict2: dict) -> List[str]: attr2 = dict2[attr] if isinstance(attr1, np.ndarray): try: - np.testing.assert_almost_equal( - attr1, attr2, err_msg=f"{attr} not equal" - ) - except AssertionError: + assert np.allclose(attr1, attr2, equal_nan=True) + except AssertionError as e: + print(e) differences.append(attr) else: sub_differences = _assert_same_temporaries(attr1, attr2) diff --git a/tests/main/dsl/test_caches.py b/tests/main/dsl/test_caches.py new file mode 100644 index 00000000..c1f01303 --- /dev/null +++ b/tests/main/dsl/test_caches.py @@ -0,0 +1,176 @@ +import pytest +from gt4py.cartesian.gtscript import PARALLEL, Field, computation, interval +from gt4py.storage import empty, ones + +import pace.dsl +from pace.dsl.dace import orchestrate +from pace.dsl.dace.dace_config import DaceConfig, DaCeOrchestration +from pace.dsl.stencil import CompilationConfig, GridIndexing + + +def _make_storage( + func, + grid_indexing, + stencil_config: pace.dsl.StencilConfig, + *, + dtype=float, + aligned_index=(0, 0, 0), +): + return func( + backend=stencil_config.compilation_config.backend, + shape=grid_indexing.domain, + dtype=dtype, + aligned_index=aligned_index, + ) + + +def _stencil(inp: Field[float], out: Field[float], scalar: float): + with computation(PARALLEL), interval(...): + out = inp + + +def _build_stencil(backend, orchestrated: DaCeOrchestration): + # Make stencil and verify it ran + grid_indexing = GridIndexing( + domain=(5, 5, 5), + n_halo=2, + south_edge=True, + north_edge=True, + west_edge=True, + east_edge=True, + ) + + stencil_config = pace.dsl.StencilConfig( + compilation_config=CompilationConfig(backend=backend, rebuild=True), + dace_config=DaceConfig(None, backend, 5, 5, orchestrated), + ) + + stencil_factory = pace.dsl.StencilFactory(stencil_config, grid_indexing) + + built_stencil = stencil_factory.from_origin_domain( + _stencil, (0, 0, 0), domain=grid_indexing.domain + ) + + return built_stencil, grid_indexing, stencil_config + + +class OrchestratedProgam: + def __init__(self, backend, orchestration): + self.stencil, grid_indexing, stencil_config = _build_stencil( + backend, orchestration + ) + orchestrate(obj=self, config=stencil_config.dace_config) + self.inp = _make_storage(ones, grid_indexing, stencil_config, dtype=float) + self.out = _make_storage(empty, grid_indexing, stencil_config, dtype=float) + + def __call__(self): + self.stencil(self.inp, self.out, self.inp[0, 0, 0]) + + +@pytest.mark.parametrize( + "backend", + [ + pytest.param("dace:cpu"), + ], +) +def test_relocatability_orchestration(backend): + import os + import shutil + + from gt4py.cartesian import config as gt_config + + original_root_directory = gt_config.cache_settings["root_path"] + working_dir = str(os.getcwd()) + + # Compile on default + p0 = OrchestratedProgam(backend, DaCeOrchestration.BuildAndRun) + p0() + assert os.path.exists( + f"{working_dir}/.gt_cache_FV3_A/dacecache/" + "test_caches_OrchestratedProgam___call__", + ) or os.path.exists( + f"{working_dir}/.gt_cache_FV3_A/dacecache/OrchestratedProgam___call__", + ) + + # Compile in another directory + + custom_path = f"{working_dir}/.my_cache_path" + gt_config.cache_settings["root_path"] = custom_path + p1 = OrchestratedProgam(backend, DaCeOrchestration.BuildAndRun) + p1() + assert os.path.exists( + f"{custom_path}/.gt_cache_FV3_A/dacecache/" + "test_caches_OrchestratedProgam___call__", + ) or os.path.exists( + f"{working_dir}/.gt_cache_FV3_A/dacecache/OrchestratedProgam___call__", + ) + + # Check relocability by copying the second cache directory, + # changing the path of gt_config.cache_settings and trying to Run on it + relocated_path = f"{working_dir}/.my_relocated_cache_path" + shutil.copytree(custom_path, relocated_path, dirs_exist_ok=True) + gt_config.cache_settings["root_path"] = relocated_path + p2 = OrchestratedProgam(backend, DaCeOrchestration.Run) + p2() + + # Generate a file exists error to check for bad path + bogus_path = "./nope/notatall/nothappening" + gt_config.cache_settings["root_path"] = bogus_path + with pytest.raises(RuntimeError): + OrchestratedProgam(backend, DaCeOrchestration.Run) + + # Restore cache settings + gt_config.cache_settings["root_path"] = original_root_directory + + +@pytest.mark.parametrize( + "backend", + [ + pytest.param("dace:cpu"), + ], +) +def test_relocatability(backend: str): + import os + import shutil + + import gt4py + from gt4py.cartesian import config as gt_config + + from pace.util.mpi import MPI + + # Restore original dir name + gt4py.cartesian.config.cache_settings["dir_name"] = os.environ.get( + "GT_CACHE_DIR_NAME", f".gt_cache_{MPI.COMM_WORLD.Get_rank():06}" + ) + + backend_sanitized = backend.replace(":", "") + + # Compile on default + p0 = OrchestratedProgam(backend, DaCeOrchestration.Python) + p0() + assert os.path.exists( + f"./.gt_cache_000000/py38_1013/{backend_sanitized}/test_caches/_stencil/" + ) + + # Compile in another directory + + custom_path = "./.my_cache_path" + gt_config.cache_settings["root_path"] = custom_path + p1 = OrchestratedProgam(backend, DaCeOrchestration.Python) + p1() + assert os.path.exists( + f"{custom_path}/.gt_cache_000000/py38_1013/{backend_sanitized}" + "/test_caches/_stencil/" + ) + + # Check relocability by copying the second cache directory, + # changing the path of gt_config.cache_settings and trying to Run on it + relocated_path = "./.my_relocated_cache_path" + shutil.copytree("./.gt_cache_000000", relocated_path, dirs_exist_ok=True) + gt_config.cache_settings["root_path"] = relocated_path + p2 = OrchestratedProgam(backend, DaCeOrchestration.Python) + p2() + assert os.path.exists( + f"{relocated_path}/.gt_cache_000000/py38_1013/{backend_sanitized}" + "/test_caches/_stencil/" + ) diff --git a/tests/main/dsl/test_dace_config.py b/tests/main/dsl/test_dace_config.py index 78553278..cb3566dd 100644 --- a/tests/main/dsl/test_dace_config.py +++ b/tests/main/dsl/test_dace_config.py @@ -1,11 +1,12 @@ import unittest.mock -from pace.dsl.dace.dace_config import DaceConfig +from pace.dsl.dace.dace_config import DaceConfig, _determine_compiling_ranks from pace.dsl.dace.orchestration import ( DaCeOrchestration, orchestrate, orchestrate_function, ) +from pace.util.communicator import CubedSpherePartitioner, TilePartitioner """ @@ -91,3 +92,68 @@ def foo(self): a = A() a.foo() assert not mock_call_sdfg.called + + +def test_orchestrate_distributed_build(): + dummy_dace_config = DaceConfig( + communicator=None, + backend="gtc:dace", + orchestration=DaCeOrchestration.BuildAndRun, + ) + + def _does_compile(rank, partitioner) -> bool: + dummy_dace_config.layout = partitioner.layout + dummy_dace_config.rank_size = partitioner.layout[0] * partitioner.layout[1] * 6 + dummy_dace_config.my_rank = rank + return _determine_compiling_ranks(dummy_dace_config, partitioner) + + # (1, 1) layout, one rank which compiles + cube_partitioner_11 = CubedSpherePartitioner(TilePartitioner((1, 1))) + assert _does_compile(0, cube_partitioner_11) + assert not _does_compile(1, cube_partitioner_11) # not compiling face + + # (2, 2) layout, 4 ranks, all compiling + cube_partitioner_22 = CubedSpherePartitioner(TilePartitioner((2, 2))) + assert _does_compile(0, cube_partitioner_22) + assert _does_compile(1, cube_partitioner_22) + assert _does_compile(2, cube_partitioner_22) + assert _does_compile(3, cube_partitioner_22) + assert not _does_compile(4, cube_partitioner_22) # not compiling face + + # (3, 3) layout, 9 ranks, all compiling + cube_partitioner_33 = CubedSpherePartitioner(TilePartitioner((3, 3))) + assert _does_compile(0, cube_partitioner_33) + assert _does_compile(1, cube_partitioner_33) + assert _does_compile(2, cube_partitioner_33) + assert _does_compile(3, cube_partitioner_33) + assert _does_compile(4, cube_partitioner_33) + assert _does_compile(5, cube_partitioner_33) + assert _does_compile(6, cube_partitioner_33) + assert _does_compile(7, cube_partitioner_33) + assert _does_compile(8, cube_partitioner_33) + assert not _does_compile(9, cube_partitioner_33) # not compiling face + + # (4, 4) layout, 16 ranks, + # expecting compiling:0, 1, 2, 3, 4, 5, 7, 12, 13, 15 + cube_partitioner_44 = CubedSpherePartitioner(TilePartitioner((4, 4))) + assert _does_compile(0, cube_partitioner_44) + assert _does_compile(1, cube_partitioner_44) + assert _does_compile(4, cube_partitioner_44) + assert _does_compile(5, cube_partitioner_44) + assert _does_compile(7, cube_partitioner_44) + assert _does_compile(12, cube_partitioner_44) + assert _does_compile(13, cube_partitioner_44) + assert _does_compile(15, cube_partitioner_44) + assert not _does_compile(2, cube_partitioner_44) # same code path as 3 + assert not _does_compile(6, cube_partitioner_44) # same code path as 5 + assert not _does_compile(8, cube_partitioner_44) # same code path as 4 + assert not _does_compile(11, cube_partitioner_44) # same code path as 7 + assert not _does_compile(16, cube_partitioner_44) # not compiling face + + # For a few other layouts, we check that we always have 9 compiling ranks + for layout in [(5, 5), (10, 10), (20, 20)]: + partition = CubedSpherePartitioner(TilePartitioner(layout)) + compiling = 0 + for i in range(layout[0] * layout[1] * 6): + compiling += 1 if _does_compile(i, partition) else 0 + assert compiling == 9 diff --git a/tests/main/test_grid_init.py b/tests/main/test_grid_init.py index 942dcfd3..4c0e5e2b 100644 --- a/tests/main/test_grid_init.py +++ b/tests/main/test_grid_init.py @@ -51,8 +51,6 @@ def test_grid_init_not_decomposition_dependent(rank: int): assert allclose(metric_terms_1by1.area, metric_terms_3by3.area, partitioner, rank) assert allclose(metric_terms_1by1.dx, metric_terms_3by3.dx, partitioner, rank) assert allclose(metric_terms_1by1.dy, metric_terms_3by3.dy, partitioner, rank) - assert allclose(metric_terms_1by1.dxa, metric_terms_3by3.dxa, partitioner, rank) - assert allclose(metric_terms_1by1.dya, metric_terms_3by3.dya, partitioner, rank) assert allclose( metric_terms_1by1.cos_sg1, metric_terms_3by3.cos_sg1, partitioner, rank ) diff --git a/util/pace/util/__init__.py b/util/pace/util/__init__.py index 54f684d6..58a7c2a5 100644 --- a/util/pace/util/__init__.py +++ b/util/pace/util/__init__.py @@ -54,7 +54,7 @@ from .initialization import GridSizer, QuantityFactory, SubtileGridSizer from .io import read_state, write_state from .local_comm import LocalComm -from .logging import pace_log, AVAILABLE_LOG_LEVELS +from .logging import AVAILABLE_LOG_LEVELS, pace_log from .monitor import Monitor, NetCDFMonitor, ZarrMonitor from .mpi import MPIComm from .namelist import Namelist, NamelistDefaults diff --git a/util/pace/util/communicator.py b/util/pace/util/communicator.py index 938469bd..d2577d8c 100644 --- a/util/pace/util/communicator.py +++ b/util/pace/util/communicator.py @@ -167,7 +167,7 @@ def _get_gather_recv_quantity( ) -> Quantity: """Initialize a Quantity for use when receiving global data during gather""" recv_quantity = Quantity( - send_metadata.np.empty(global_extent, dtype=send_metadata.dtype), + send_metadata.np.zeros(global_extent, dtype=send_metadata.dtype), dims=send_metadata.dims, units=send_metadata.units, origin=tuple([0 for dim in send_metadata.dims]), @@ -182,7 +182,7 @@ def _get_scatter_recv_quantity( ) -> Quantity: """Initialize a Quantity for use when receiving subtile data during scatter""" recv_quantity = Quantity( - send_metadata.np.empty(shape, dtype=send_metadata.dtype), + send_metadata.np.zeros(shape, dtype=send_metadata.dtype), dims=send_metadata.dims, units=send_metadata.units, gt4py_backend=send_metadata.gt4py_backend, @@ -206,7 +206,7 @@ def gather( result: Optional[Quantity] if self.rank == constants.ROOT_RANK: with array_buffer( - send_quantity.np.empty, + send_quantity.np.zeros, (self.partitioner.total_ranks,) + tuple(send_quantity.extent), dtype=send_quantity.data.dtype, ) as recvbuf: @@ -745,7 +745,7 @@ def _get_gather_recv_quantity( # needs to change the quantity dimensions since we add a "tile" dimension, # unlike for tile scatter/gather which retains the same dimensions recv_quantity = Quantity( - metadata.np.empty(global_extent, dtype=metadata.dtype), + metadata.np.zeros(global_extent, dtype=metadata.dtype), dims=(constants.TILE_DIM,) + metadata.dims, units=metadata.units, origin=(0,) + tuple([0 for dim in metadata.dims]), @@ -767,7 +767,7 @@ def _get_scatter_recv_quantity( # needs to change the quantity dimensions since we remove a "tile" dimension, # unlike for tile scatter/gather which retains the same dimensions recv_quantity = Quantity( - metadata.np.empty(shape, dtype=metadata.dtype), + metadata.np.zeros(shape, dtype=metadata.dtype), dims=metadata.dims[1:], units=metadata.units, gt4py_backend=metadata.gt4py_backend, diff --git a/util/pace/util/grid/eta.py b/util/pace/util/grid/eta.py index 50afaadb..dc37aaa2 100644 --- a/util/pace/util/grid/eta.py +++ b/util/pace/util/grid/eta.py @@ -206,7 +206,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 91: - ak = np.array( [ 1.00000000, @@ -402,7 +401,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 72: - ak = np.array( [ 1.00000000, @@ -559,10 +557,299 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ] ) + elif km == 137: + ak = np.array( + [ + 1.00000000, + 1.82500005, + 3.00000000, + 4.63000011, + 6.82797718, + 9.74696636, + 13.6054239, + 18.6089306, + 24.9857178, + 32.9857101, + 42.8792419, + 54.9554634, + 69.5205765, + 86.8958817, + 107.415741, + 131.425507, + 159.279404, + 191.338562, + 227.968948, + 269.539581, + 316.420746, + 368.982361, + 427.592499, + 492.616028, + 564.413452, + 643.339905, + 729.744141, + 823.967834, + 926.344910, + 1037.20117, + 1156.85364, + 1285.61035, + 1423.77014, + 1571.62292, + 1729.44897, + 1897.51929, + 2076.09595, + 2265.43164, + 2465.77051, + 2677.34814, + 2900.39136, + 3135.11938, + 3381.74365, + 3640.46826, + 3911.49048, + 4194.93066, + 4490.81738, + 4799.14941, + 5119.89502, + 5452.99072, + 5798.34473, + 6156.07422, + 6526.94678, + 6911.87061, + 7311.86914, + 7727.41211, + 8159.35400, + 8608.52539, + 9076.40039, + 9562.68262, + 10065.9785, + 10584.6318, + 11116.6621, + 11660.0674, + 12211.5479, + 12766.8730, + 13324.6689, + 13881.3311, + 14432.1396, + 14975.6152, + 15508.2568, + 16026.1152, + 16527.3223, + 17008.7891, + 17467.6133, + 17901.6211, + 18308.4336, + 18685.7188, + 19031.2891, + 19343.5117, + 19620.0430, + 19859.3906, + 20059.9316, + 20219.6641, + 20337.8633, + 20412.3086, + 20442.0781, + 20425.7188, + 20361.8164, + 20249.5117, + 20087.0859, + 19874.0254, + 19608.5723, + 19290.2266, + 18917.4609, + 18489.7070, + 18006.9258, + 17471.8398, + 16888.6875, + 16262.0469, + 15596.6953, + 14898.4531, + 14173.3242, + 13427.7695, + 12668.2578, + 11901.3398, + 11133.3047, + 10370.1758, + 9617.51562, + 8880.45312, + 8163.37500, + 7470.34375, + 6804.42188, + 6168.53125, + 5564.38281, + 4993.79688, + 4457.37500, + 3955.96094, + 3489.23438, + 3057.26562, + 2659.14062, + 2294.24219, + 1961.50000, + 1659.47656, + 1387.54688, + 1143.25000, + 926.507812, + 734.992188, + 568.062500, + 424.414062, + 302.476562, + 202.484375, + 122.101562, + 62.7812500, + 22.8359375, + 3.75781298, + 0.00000000, + 0.00000000, + ] + ) + + bk = np.array( + [ + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 0.00000000, + 7.00000010e-06, + 2.40000008e-05, + 5.90000018e-05, + 1.12000002e-04, + 1.99000002e-04, + 3.39999999e-04, + 5.61999972e-04, + 8.90000025e-04, + 1.35300006e-03, + 1.99200003e-03, + 2.85700010e-03, + 3.97100020e-03, + 5.37799997e-03, + 7.13300006e-03, + 9.26099997e-03, + 1.18060000e-02, + 1.48160001e-02, + 1.83179993e-02, + 2.23549996e-02, + 2.69639995e-02, + 3.21759991e-02, + 3.80260013e-02, + 4.45480011e-02, + 5.17730005e-02, + 5.97280003e-02, + 6.84479997e-02, + 7.79580027e-02, + 8.82859975e-02, + 9.94620025e-02, + 0.111505002, + 0.124448001, + 0.138312995, + 0.153125003, + 0.168909997, + 0.185689002, + 0.203491002, + 0.222332999, + 0.242244005, + 0.263242006, + 0.285353988, + 0.308598012, + 0.332938999, + 0.358253986, + 0.384362996, + 0.411125004, + 0.438391000, + 0.466003001, + 0.493800014, + 0.521619022, + 0.549301028, + 0.576691985, + 0.603648007, + 0.630035996, + 0.655736029, + 0.680643022, + 0.704668999, + 0.727738976, + 0.749796987, + 0.770798028, + 0.790717006, + 0.809535980, + 0.827256024, + 0.843881011, + 0.859431982, + 0.873929024, + 0.887408018, + 0.899900019, + 0.911448002, + 0.922096014, + 0.931881011, + 0.940859973, + 0.949064016, + 0.956550002, + 0.963352025, + 0.969512999, + 0.975077987, + 0.980072021, + 0.984542012, + 0.988499999, + 0.991984010, + 0.995002985, + 0.997630000, + 1.00000000, + ] + ) + else: raise NotImplementedError( - "Only grids with 72, 79, or 91 vertical levels have been implemented so far" + "Only grids with 72, 79, 91 or 137 vertical levels" + "have been implemented so far" ) + if 0.0 in bk: ks = 0 if km == 91 else np.where(bk == 0)[0][-1] ptop = ak[0] diff --git a/util/pace/util/grid/generation.py b/util/pace/util/grid/generation.py index b78a7059..679b9449 100644 --- a/util/pace/util/grid/generation.py +++ b/util/pace/util/grid/generation.py @@ -75,7 +75,7 @@ def quantity_cast_to_model_float( quantity_factory: util.QuantityFactory, qty_64: util.Quantity ) -> util.Quantity: """Copy & cast from 64-bit float to model precision if need be""" - qty = quantity_factory.empty(qty_64.dims, qty_64.units, dtype=Float) + qty = quantity_factory.zeros(qty_64.dims, qty_64.units, dtype=Float) qty.data[:] = qty_64.data[:] return qty @@ -1530,7 +1530,6 @@ def rdyc(self) -> util.Quantity: ) def _init_dgrid(self): - grid_mirror_ew = self.quantity_factory.zeros( self._grid_dims, "radians", @@ -1751,7 +1750,6 @@ def _compute_dxdy(self): return dx, dy def _compute_dxdy_agrid(self): - dx_agrid_64 = self.quantity_factory.zeros( [util.X_DIM, util.Y_DIM], "m", @@ -2149,7 +2147,6 @@ def _calculate_more_trig_terms(self, cos_sg, sin_sg): ) def _init_cell_trigonometry(self): - cosa_u_64 = self.quantity_factory.zeros( [util.X_INTERFACE_DIM, util.Y_DIM], "", diff --git a/util/pace/util/grid/gnomonic.py b/util/pace/util/grid/gnomonic.py index 705014e4..f26af0f2 100644 --- a/util/pace/util/grid/gnomonic.py +++ b/util/pace/util/grid/gnomonic.py @@ -303,9 +303,9 @@ def _mirror_latlon(lon1, lat1, lon2, lat2, lon0, lat0, np): pdot = p0[0] * nb[0] + p0[1] * nb[1] + p0[2] * nb[2] pp = p0 - np.multiply(2.0, pdot) * nb - lon3 = np.empty((1, 1)) - lat3 = np.empty((1, 1)) - pp3 = np.empty((3, 1, 1)) + lon3 = np.zeros((1, 1)) + lat3 = np.zeros((1, 1)) + pp3 = np.zeros((3, 1, 1)) pp3[:, 0, 0] = pp _cart_to_latlon(1, pp3, lon3, lat3, np) diff --git a/util/pace/util/grid/helper.py b/util/pace/util/grid/helper.py index 673e484d..1b977ad8 100644 --- a/util/pace/util/grid/helper.py +++ b/util/pace/util/grid/helper.py @@ -166,8 +166,8 @@ def from_restart( but no fv_core.res.nc in restart data file.""" ) - ak = quantity_factory.empty([Z_INTERFACE_DIM], units="Pa") - bk = quantity_factory.empty([Z_INTERFACE_DIM], units="") + ak = quantity_factory.zeros([Z_INTERFACE_DIM], units="Pa") + bk = quantity_factory.zeros([Z_INTERFACE_DIM], units="") with fs.open(ak_bk_data_file, "rb") as f: ds = xr.open_dataset(f).isel(Time=0).drop_vars("Time") ak.view[:] = ds["ak"].values @@ -322,7 +322,6 @@ def __init__( @classmethod def new_from_metric_terms(cls, metric_terms: MetricTerms): - horizontal_data = HorizontalGridData.new_from_metric_terms(metric_terms) vertical_data = VerticalGridData.new_from_metric_terms(metric_terms) contravariant_data = ContravariantGridData.new_from_metric_terms(metric_terms) @@ -701,7 +700,6 @@ def new_from_grid_variables( es1: pace.util.Quantity, ew2: pace.util.Quantity, ) -> "DriverGridData": - try: vlon1, vlon2, vlon3 = split_quantity_along_last_dim(vlon) vlat1, vlat2, vlat3 = split_quantity_along_last_dim(vlat) diff --git a/util/pace/util/halo_data_transformer.py b/util/pace/util/halo_data_transformer.py index 00a547d6..e97bb97a 100644 --- a/util/pace/util/halo_data_transformer.py +++ b/util/pace/util/halo_data_transformer.py @@ -70,7 +70,7 @@ def _build_flatten_indices( """ # Have to go down to numpy to leverage indices calculation - arr_indices = np.empty(shape, dtype=np.int32, order="C")[slices] + arr_indices = np.zeros(shape, dtype=np.int32, order="C")[slices] # Get offset from first index offset_dims = [] @@ -875,7 +875,6 @@ def _opt_unpack_scalar(self, quantities: List[Quantity]): # Use private stream with self._get_stream(cu_kernel_args.stream): - # Launch kernel blocks = 128 grid_x = (info_x._unpack_buffer_size // blocks) + 1 @@ -942,7 +941,6 @@ def _opt_unpack_vector( # Use private stream with self._get_stream(cu_kernel_args.stream): - # Buffer sizes edge_size = info_x._unpack_buffer_size + info_y._unpack_buffer_size diff --git a/util/pace/util/initialization/allocator.py b/util/pace/util/initialization/allocator.py index c865cbbf..1a68495e 100644 --- a/util/pace/util/initialization/allocator.py +++ b/util/pace/util/initialization/allocator.py @@ -102,7 +102,7 @@ def from_array( That numpy array must correspond to the correct shape and extent for the given dims. """ - base = self.empty( + base = self.zeros( dims=dims, units=units, dtype=data.dtype,