From 0cdba14782dca7bf54e6e7da6a4bf50ddcb94032 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 15 Sep 2023 10:01:10 -0400 Subject: [PATCH] [NASA:Update] Update GT4Py and DaCe + submoduling of DaCe (#21) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustement threshold to const * lint * More linting * Remove unused if leading to empty code block * Restrict dace to 0.14.1 due to a parsing bug * Add guard for bdt==0 Fix bad merge for bdt with GEOS_Wrapper * Remove unused code * Fix theroritical timings Lint * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Set default cache path for orchestrated DaCe to respect GT_CACHE_* env * Remove previous per stencil override of default_build_folder * Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" This reverts commit 4fc5b4dfb82c5e702b66881e3c39f8f49878a0a1. * Revert "Remove previous per stencil override of default_build_folder" This reverts commit 22450279075e746f44a1b6afce7a05b437b1f35d. * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Fix bad requirements syntax * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Replace all logger with pace_log Introduce PACE_LOGLEVEL to control log level from outside * Code guidelines clean up * Devops/GitHub actions on (#15) * Linting on PR * Run main unit test * Update python to available 3.8.12 * Remove cd to pace * Lint: git submodule recursive * Typo * Add openmpi to the image * Linting * Fix unit tests (remove dxa, dya rely on halo ex) * typo * Change name of jobs * Distributed compilation on orchestrated backend for NxN layouts (#14) * Adapt orchestration distribute compile for NxN layout * Remove debug code * Add a more descriptive string base postfix for cache naming Identify the code path for all cases Consistent reload post-compile Create a central space for all caches generation logic No more original layout check required * Add a test on caches relocatability * Verbose todo * Linting on PR * Run main unit test * Update python to available 3.8.12 * Remove cd to pace * Lint: git submodule recursive * Typo * Add openmpi to the image * Linting * Fix unit tests (remove dxa, dya rely on halo ex) * typo * Change name of jobs * Missing enum * Lint imports * Fix unit tests * Deactivate relocability test due to Python crash Logged as issyue 16 * Typo * Raise for 1,X and X,1 layouts which requires a new descriptor * Added ak, bk for 137 levels in eta.py * Add floating point precision to GEOS bridge init * lint * Add device PCI bus id (for MPS debug) * Typo + lint * Try to detect MPS reading the "log" pipe * Lint * Clean up * Log info GEOS bridge (#18) * Add floating point precision to GEOS bridge init * lint * Add device PCI bus id (for MPS debug) * Typo + lint * Try to detect MPS reading the "log" pipe * Lint * Clean up * Update geos/develop to grab NOAA PR9 results (#21) * Verbose choice of block/grid size * added build script for c5 * updated repo to NOAA * GEOS integration (#9) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustement threshold to const * Remove unused if leading to empty code block * Restrict dace to 0.14.1 due to a parsing bug * Add guard for bdt==0 Fix bad merge for bdt with GEOS_Wrapper * Remove unused code * Fix theroritical timings * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Set default cache path for orchestrated DaCe to respect GT_CACHE_* env * Remove previous per stencil override of default_build_folder * Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" * Revert "Remove previous per stencil override of default_build_folder" * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Fix bad requirements syntax * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Fix or explain inlined import * Verbose runtime error when bad dt_atmos * Verbose warm up * re-initialize heat_source and diss_est each call, add do_skeb check to accumulation --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Oliver Elbert --------- Co-authored-by: Rusty Benson <6594772+bensonr@users.noreply.github.com> Co-authored-by: Oliver Elbert Co-authored-by: Purnendu Chakraborty Co-authored-by: Oliver Elbert * [NOAA:Update] Bring back #15 & doubly periodic domain (#25) * Feature/dp driver (#13) * initial commit * adding test config * adding the rest of driver and util code * updating history.md * move u_max to dycore config * uncomment assert * added comment explaining the copy of grid type to dycore config * Turn main unit test & lint on PR, logger clean up [NASA:Update] (#15) * Initialize GeosDycoreWrapper with bdt (timestep) * Use GEOS version of constants * 1. Add qcld to the list of tracers beings advected 2. Made GEOS specific changes to thresholds in saturation adjustment * Accumulate diss_est * Allow GEOS_WRAPPER to process device data * Add clear to collector for 3rd party use. GEOS pass down timings to caller * Make kernel analysis run a copy stencil to compute local bandwith Parametrize tool with backend, output format * Move constant on a env var Add saturation adjustement threshold to const * Restrict dace to 0.14.1 due to a parsing bug * Add guard for bdt==0 * Fix theroritical timings * Fixed a bug where pkz was being calculated twice, and the second calc was wrong * Downgrade DaCe to 0.14.0 pending array aliasing fix * Set default cache path for orchestrated DaCe to respect GT_CACHE_* env * Remove previous per stencil override of default_build_folder * Revert "Set default cache path for orchestrated DaCe to respect GT_CACHE_* env" * Read cache_root in default dace backend * Document faulty behavior with GT_CACHE_DIR_NAME * Check for the string value of CONST_VERSION directly instead of enum * Protect constant selection more rigorusly. Clean abort on unknown constant given * Log constants selection * Refactor NQ to constants.py * Introduce PACE_LOGLEVEL to control log level from outside * Code guidelines clean up * Devops/GitHub actions on (#15) * Linting on PR * Run main unit test * Update python to available 3.8.12 * Fix unit tests (remove dxa, dya rely on halo ex) * Update HISTORY.md * Adapt log_level in driver.run * Verbose the PACE_CONSTANTS * Doc log level hierarchical nature --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Purnendu Chakraborty * Lint --------- Co-authored-by: Oliver Elbert Co-authored-by: Purnendu Chakraborty Co-authored-by: Purnendu Chakraborty * Update gt4py, dace, cleanup (#19) * Update gt4py to top of master on June 21 * Update DaCe to 0.14.2 Workaround aliasing issue in FiniteVolumeTransport * Fix to gt4py storage * Downgrade to dace 0.14.1 * DaCe to 0.14.4 Orchestrating NonHydrostaticPressureGradient Adptating code to newer gt4py * Regenerate constraints.txt * Default constants to GFS Fix snapshot for GPU runs Lint on ETA Fix log level * Remove `daint_venv` submodule * Adding dace as a submodule Removing buildenv as a submodule * Update gt4py to latest master * Skip ConstantPropagation during `Simplify` * Remove buidlenv * Update requirements_dev.txt * Add editable util to requirements_dev.txt * lint * scipy for tests is now needed * Pin `DaCe` to pace-fixes-0 merge * Remove logging setup in test_translate * Make cupy import robust to device not being available * Fix to GEOS bridge MPS detection * Up gt4py to August 14th EOD: - Hip/ROCm - New allocators * DaCE module: swap SSH for HTTPS (#26) * GEOS GridTools stencils build override (#27) * Stencil build override for GEOS * Deactivate warnings if PACE_LOGLEVEL is > WARNING * Better log level * Bad merge (again) * Update fv3core/pace/fv3core/initialization/geos_wrapper.py Co-authored-by: Oliver Elbert * FVTP2D: somewhat better workaround --------- Co-authored-by: Purnendu Chakraborty Co-authored-by: Purnendu Chakraborty Co-authored-by: Rusty Benson <6594772+bensonr@users.noreply.github.com> Co-authored-by: Oliver Elbert Co-authored-by: Oliver Elbert --- .gitmodules | 6 +- buildenv | 1 - constraints.txt | 107 ++++++--------- dsl/pace/dsl/dace/build.py | 2 +- dsl/pace/dsl/dace/orchestration.py | 22 ++- dsl/pace/dsl/dace/sdfg_opt_passes.py | 2 +- external/dace | 1 + external/daint_venv/.gitignore | 129 ------------------ external/daint_venv/LICENSE.txt | 25 ---- external/daint_venv/README.md | 6 - external/daint_venv/install.sh | 50 ------- external/gt4py | 2 +- .../fv3core/initialization/geos_wrapper.py | 84 ++++++++++-- fv3core/pace/fv3core/stencils/fvtp2d.py | 56 +++++--- fv3core/pace/fv3core/stencils/nh_p_grad.py | 6 + requirements_dev.txt | 5 +- .../pace/stencils/testing/test_translate.py | 3 - util/pace/util/_optional_imports.py | 8 ++ util/pace/util/checkpointer/snapshots.py | 3 + util/pace/util/constants.py | 2 +- 20 files changed, 189 insertions(+), 331 deletions(-) delete mode 160000 buildenv create mode 160000 external/dace delete mode 100644 external/daint_venv/.gitignore delete mode 100644 external/daint_venv/LICENSE.txt delete mode 100644 external/daint_venv/README.md delete mode 100755 external/daint_venv/install.sh diff --git a/.gitmodules b/.gitmodules index 8a910e07..60de021d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "external/gt4py"] path = external/gt4py url = https://github.com/gridtools/gt4py.git -[submodule "buildenv"] - path = buildenv - url = https://github.com/ai2cm/buildenv.git +[submodule "external/dace"] + path = external/dace + url = https://github.com/spcl/dace.git diff --git a/buildenv b/buildenv deleted file mode 160000 index ab796639..00000000 --- a/buildenv +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ab7966398258ba924761558c87c94ea8f55fb496 diff --git a/constraints.txt b/constraints.txt index dd122d08..4fafcc59 100644 --- a/constraints.txt +++ b/constraints.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.8 # by the following command: # -# pip-compile --output-file=constraints.txt driver/setup.py dsl/setup.py external/gt4py/setup.cfg fv3core/setup.py physics/setup.py requirements_dev.txt requirements_docs.txt requirements_lint.txt stencils/setup.py util/requirements.txt util/setup.py +# pip-compile --output-file=constraints.txt driver/setup.py dsl/setup.py fv3core/setup.py physics/setup.py requirements_dev.txt requirements_docs.txt requirements_lint.txt stencils/setup.py util/requirements.txt util/setup.py # aenum==3.1.11 # via dace @@ -21,36 +21,31 @@ asttokens==2.0.5 # devtools # stack-data astunparse==1.6.3 - # via dace + # via + # dace + # gt4py async-timeout==3.0.1 # via aiohttp attrs==22.1.0 # via # aiohttp # gt4py - # gt4py (external/gt4py/setup.cfg) # jsonschema # pytest babel==2.9.1 # via sphinx backcall==0.2.0 # via ipython -backports.entry-points-selectable==1.1.1 +backports-entry-points-selectable==1.1.1 # via virtualenv black==22.3.0 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py boltons==21.0.0 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py bump2version==1.0.1 # via -r util/requirements.txt cached-property==1.5.2 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py cachetools==4.2.2 # via google-auth certifi==2021.5.30 @@ -74,20 +69,19 @@ click==8.0.1 # black # flask # gt4py - # gt4py (external/gt4py/setup.cfg) cloudpickle==2.0.0 # via dask +cmake==3.26.4 + # via gt4py commonmark==0.9.1 # via recommonmark coverage==5.5 # via # -r util/requirements.txt # pytest-cov -cytoolz==0.11.2 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) -dace==0.14.0 +cytoolz==0.12.1 + # via gt4py +dace==0.14.4 # via # -r requirements_dev.txt # pace-dsl @@ -109,13 +103,9 @@ decorator==5.0.9 # gcsfs # ipython deepdiff==6.2.1 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py devtools==0.8.0 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py dill==0.3.5.1 # via dace distlib==0.3.2 @@ -155,9 +145,7 @@ flake8==3.8.4 flask==2.1.2 # via dace frozendict==2.3.4 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py fsspec==2021.7.0 # via # dask @@ -196,10 +184,8 @@ googleapis-common-protos==1.53.0 # via google-api-core gprof2dot==2021.2.21 # via pytest-profiling -gridtools-cpp==2.2.2 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) +gridtools-cpp==2.3.0 + # via gt4py h5netcdf==0.11.0 # via -r util/requirements.txt h5py==2.10.0 @@ -217,7 +203,9 @@ imagesize==1.2.0 importlib-metadata==4.11.3 # via flask importlib-resources==5.10.0 - # via jsonschema + # via + # gt4py + # jsonschema iniconfig==1.1.1 # via pytest ipykernel==6.16.2 @@ -232,7 +220,6 @@ jinja2==3.0.1 # via # flask # gt4py - # gt4py (external/gt4py/setup.cfg) # sphinx jsonschema==4.16.0 # via nbformat @@ -244,12 +231,12 @@ jupyter-core==4.11.2 # via # jupyter-client # nbformat +lark==1.1.5 + # via gt4py locket==0.2.1 # via partd mako==1.1.6 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py markupsafe==2.0.1 # via # jinja2 @@ -260,6 +247,11 @@ matplotlib-inline==0.1.6 # ipython mccabe==0.6.1 # via flake8 +mpi4py==3.1.4 + # via + # -r requirements_dev.txt + # pace-driver + # pace-driver (driver/setup.py) mpmath==1.2.1 # via sympy multidict==5.1.0 @@ -272,7 +264,6 @@ mypy-extensions==0.4.3 # via # black # mypy - # typing-inspect nbclient==0.6.8 # via nbmake nbformat==5.7.0 @@ -292,10 +283,9 @@ netcdf4==1.5.7 # pace-driver # pace-driver (driver/setup.py) networkx==2.6.3 - # via - # dace - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via dace +ninja==1.11.1 + # via gt4py nodeenv==1.6.0 # via pre-commit numcodecs==0.7.2 @@ -309,7 +299,6 @@ numpy==1.21.2 # cftime # dace # gt4py - # gt4py (external/gt4py/setup.cfg) # h5py # netcdf4 # numcodecs @@ -322,7 +311,6 @@ numpy==1.21.2 # pace-util # pace-util (util/setup.py) # pandas - # scipy # xarray # zarr oauthlib==3.1.1 @@ -333,7 +321,6 @@ packaging==21.0 # via # dask # gt4py - # gt4py (external/gt4py/setup.cfg) # ipykernel # pytest # sphinx @@ -387,9 +374,7 @@ pyasn1==0.4.8 pyasn1-modules==0.2.8 # via google-auth pybind11==2.8.1 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py pycodestyle==2.6.0 # via flake8 pycparser==2.20 @@ -465,10 +450,6 @@ requests-oauthlib==1.3.0 # via google-auth-oauthlib rsa==4.7.2 # via google-auth -scipy==1.7.1 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) six==1.16.0 # via # asttokens @@ -513,10 +494,8 @@ stack-data==0.5.1 # via ipython sympy==1.9 # via dace -tabulate==0.8.9 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) +tabulate==0.9.0 + # via gt4py toml==0.10.2 # via # pre-commit @@ -530,8 +509,6 @@ toolz==0.11.1 # -r util/requirements.txt # cytoolz # dask - # gt4py - # gt4py (external/gt4py/setup.cfg) # partd tornado==6.2 # via @@ -555,15 +532,9 @@ typing-extensions==4.3.0 # aiohttp # black # gt4py - # gt4py (external/gt4py/setup.cfg) # mypy # pace-util # pace-util (util/setup.py) - # typing-inspect -typing-inspect==0.7.1 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) urllib3==1.26.6 # via requests virtualenv==20.7.2 @@ -590,9 +561,7 @@ xarray==0.19.0 # pace-physics # pace-physics (physics/setup.py) xxhash==2.0.2 - # via - # gt4py - # gt4py (external/gt4py/setup.cfg) + # via gt4py yarl==1.6.3 # via aiohttp zarr==2.9.2 @@ -601,7 +570,9 @@ zarr==2.9.2 # pace-driver # pace-driver (driver/setup.py) zipp==3.8.0 - # via importlib-metadata + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/dsl/pace/dsl/dace/build.py b/dsl/pace/dsl/dace/build.py index b134f569..999d9e8e 100644 --- a/dsl/pace/dsl/dace/build.py +++ b/dsl/pace/dsl/dace/build.py @@ -135,7 +135,7 @@ def set_distributed_caches(config: "DaceConfig"): verb = "reading" gt_config.cache_settings["dir_name"] = get_cache_directory(config.code_path) - pace.util.pace_log.critical( + pace.util.pace_log.info( f"[{orchestration_mode}] Rank {config.my_rank} " f"{verb} cache {gt_config.cache_settings['dir_name']}" ) diff --git a/dsl/pace/dsl/dace/orchestration.py b/dsl/pace/dsl/dace/orchestration.py index 7858381a..eece8847 100644 --- a/dsl/pace/dsl/dace/orchestration.py +++ b/dsl/pace/dsl/dace/orchestration.py @@ -10,6 +10,7 @@ from dace.frontend.python.parser import DaceProgram from dace.transformation.auto.auto_optimize import make_transients_persistent from dace.transformation.helpers import get_parent_map +from dace.transformation.passes.simplify import SimplifyPass from pace.dsl.dace.build import get_sdfg_path, write_build_info from pace.dsl.dace.dace_config import ( @@ -65,17 +66,13 @@ def _download_results_from_dace( gt4py_results = [ gt4py.storage.from_array( r, - default_origin=(0, 0, 0), backend=config.get_backend(), - managed_memory=True, ) for r in dace_result ] else: gt4py_results = [ - gt4py.storage.from_array( - r, default_origin=(0, 0, 0), backend=config.get_backend() - ) + gt4py.storage.from_array(r, backend=config.get_backend()) for r in dace_result ] return gt4py_results @@ -111,6 +108,17 @@ def _to_gpu(sdfg: dace.SDFG): sd.openmp_sections = False +def _simplify(sdfg: dace.SDFG, validate=True, verbose=False): + """Override of sdfg.simplify to skip failing transformation + per https://github.com/spcl/dace/issues/1328 + """ + return SimplifyPass( + validate=validate, + verbose=verbose, + skip=["ConstantPropagation"], + ).apply_pass(sdfg, {}) + + def _build_sdfg( daceprog: DaceProgram, sdfg: dace.SDFG, config: DaceConfig, args, kwargs ): @@ -144,7 +152,7 @@ def _build_sdfg( del sdfg_kwargs[k] with DaCeProgress(config, "Simplify (1/2)"): - sdfg.simplify(validate=False, verbose=True) + _simplify(sdfg, validate=False, verbose=True) # Perform pre-expansion fine tuning with DaCeProgress(config, "Split regions"): @@ -155,7 +163,7 @@ def _build_sdfg( sdfg.expand_library_nodes() with DaCeProgress(config, "Simplify (2/2)"): - sdfg.simplify(validate=False, verbose=True) + _simplify(sdfg, validate=False, verbose=True) # Move all memory that can be into a pool to lower memory pressure. # Change Persistent memory (sub-SDFG) into Scope and flag it. diff --git a/dsl/pace/dsl/dace/sdfg_opt_passes.py b/dsl/pace/dsl/dace/sdfg_opt_passes.py index 2ef97dc8..17ec2180 100644 --- a/dsl/pace/dsl/dace/sdfg_opt_passes.py +++ b/dsl/pace/dsl/dace/sdfg_opt_passes.py @@ -21,4 +21,4 @@ def splittable_region_expansion(sdfg: dace.SDFG, verbose: bool = False): "K", ] if verbose: - pace_log.info(f"Reordered schedule for {node.label}") + pace_log.debug(f"Reordered schedule for {node.label}") diff --git a/external/dace b/external/dace new file mode 160000 index 00000000..892d6147 --- /dev/null +++ b/external/dace @@ -0,0 +1 @@ +Subproject commit 892d61478165a9a5ccf226732dadb54b7b50f1f3 diff --git a/external/daint_venv/.gitignore b/external/daint_venv/.gitignore deleted file mode 100644 index b6e47617..00000000 --- a/external/daint_venv/.gitignore +++ /dev/null @@ -1,129 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ diff --git a/external/daint_venv/LICENSE.txt b/external/daint_venv/LICENSE.txt deleted file mode 100644 index c091b607..00000000 --- a/external/daint_venv/LICENSE.txt +++ /dev/null @@ -1,25 +0,0 @@ -BSD License - -Copyright (c) 2019, Vulcan Technologies LLC -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/external/daint_venv/README.md b/external/daint_venv/README.md deleted file mode 100644 index c9382c8e..00000000 --- a/external/daint_venv/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Standard Python environment on Daint - -This repo contains the definition of the standard Python virtual environment -on Piz Daint. - -Note: This repo is most probably of little general relevance if you are not working on the climate modeling team or on the Piz Daint supercomputer at CSCS in Switzerland. If you happen to want to use this repo for some purpose, it's probably best to reach out to someone on the climate modeling team to get help for achieving what you'd like to achieve. diff --git a/external/daint_venv/install.sh b/external/daint_venv/install.sh deleted file mode 100755 index f252abac..00000000 --- a/external/daint_venv/install.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash - -SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -BUILDENV_DIR=$SCRIPT_DIR/../../buildenv - -VERSION=vcm_1.0 -env_file=env.daint.sh -src_dir=$(pwd) - -# module environment -source ${BUILDENV_DIR}/machineEnvironment.sh -source ${BUILDENV_DIR}/${env_file} - -# echo commands and stop on error -set -e -set -x - -dst_dir=${1:-${installdir}/venv/${VERSION}} -wheeldir=${2:-${installdir}/wheeldir} -save_wheel=${3: false} - -# delete any pre-existing venv directories -if [ -d ${dst_dir} ] ; then - /bin/rm -rf ${dst_dir} -fi - -# setup virtual env -python3 -m venv ${dst_dir} -source ${dst_dir}/bin/activate -python3 -m pip install --upgrade pip -python3 -m pip install --upgrade wheel - -# installation of standard packages that are backend specific -if [ $save_wheel ]; then - python3 -m pip wheel --wheel-dir=$wheeldir cupy Cython clang-format -fi -python3 -m pip install --find-links=$wheeldir cupy Cython clang-format - -python3 -m pip install ${installdir}/mpi4py/mpi4py-3.1.0a0-cp38-cp38-linux_x86_64.whl - -# deactivate virtual environment -deactivate - -# echo module environment -echo "Note: this virtual env has been created on `hostname`." -cat ${BUILDENV_DIR}/${env_file} ${dst_dir}/bin/activate > ${dst_dir}/bin/activate~ -mv ${dst_dir}/bin/activate~ ${dst_dir}/bin/activate - - -exit 0 diff --git a/external/gt4py b/external/gt4py index 4be34ec1..1717d3c2 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 4be34ec1ec98a02ce500eed73ab739fda258ce40 +Subproject commit 1717d3c24dee33ff4f7ede7aacb7513283d40fce diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index a7a526ee..abcb0632 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -1,21 +1,76 @@ import enum +import logging import os from datetime import timedelta from typing import Dict, List, Tuple import f90nml import numpy as np +from gt4py.cartesian.config import build_settings as gt_build_settings +from mpi4py import MPI import pace.util from pace import fv3core from pace.driver.performance.collector import PerformanceCollector -from pace.dsl.dace import DaceConfig, orchestrate +from pace.dsl.dace import orchestrate +from pace.dsl.dace.build import set_distributed_caches +from pace.dsl.dace.dace_config import DaceConfig, DaCeOrchestration from pace.dsl.gt4py_utils import is_gpu_backend from pace.dsl.typing import floating_point_precision from pace.util._optional_imports import cupy as cp from pace.util.logging import pace_log +class StencilBackendCompilerOverride: + """Override the Pace global stencil JIT to allow for 9-rank build + on any setup. + + This is a workaround that requires to know _exactly_ when build is happening. + Using this as a context manager, we leverage the DaCe build system to override + the name and build the 9 codepaths required- while every other rank wait. + + This should be removed when we refactor the GT JIT to distribute building + much more efficiently + """ + + def __init__(self, comm: MPI.Intracomm, config: DaceConfig): + self.comm = comm + self.config = config + + # Orchestration or mono-node is not concerned + self.no_op = self.config.is_dace_orchestrated() or self.comm.Get_size() == 1 + + # We abuse the DaCe build system + if not self.no_op: + config._orchestrate = DaCeOrchestration.Build + set_distributed_caches(config) + config._orchestrate = DaCeOrchestration.Python + + # We remove warnings from the stencils compiling when in critical and/or + # error + if pace_log.level > logging.WARNING: + gt_build_settings["extra_compile_args"]["cxx"].append("-w") + gt_build_settings["extra_compile_args"]["cuda"].append("-w") + + def __enter__(self): + if self.no_op: + return + if self.config.do_compile: + pace_log.info(f"Stencil backend compiles on {self.comm.Get_rank()}") + else: + pace_log.info(f"Stencil backend waits on {self.comm.Get_rank()}") + self.comm.Barrier() + + def __exit__(self, type, value, traceback): + if self.no_op: + return + if not self.config.do_compile: + pace_log.info(f"Stencil backend read cache on {self.comm.Get_rank()}") + else: + pace_log.info(f"Stencil backend compiled on {self.comm.Get_rank()}") + self.comm.Barrier() + + @enum.unique class MemorySpace(enum.Enum): HOST = 0 @@ -113,17 +168,18 @@ def __init__( metric_terms ) - self.dynamical_core = fv3core.DynamicalCore( - comm=self.communicator, - grid_data=grid_data, - stencil_factory=stencil_factory, - quantity_factory=quantity_factory, - damping_coefficients=damping_coefficients, - config=self.dycore_config, - timestep=timedelta(seconds=self.dycore_state.bdt), - phis=self.dycore_state.phis, - state=self.dycore_state, - ) + with StencilBackendCompilerOverride(MPI.COMM_WORLD, stencil_config.dace_config): + self.dynamical_core = fv3core.DynamicalCore( + comm=self.communicator, + grid_data=grid_data, + stencil_factory=stencil_factory, + quantity_factory=quantity_factory, + damping_coefficients=damping_coefficients, + config=self.dycore_config, + timestep=timedelta(seconds=self.dycore_state.bdt), + phis=self.dycore_state.phis, + state=self.dycore_state, + ) self._fortran_mem_space = fortran_mem_space self._pace_mem_space = ( @@ -141,7 +197,7 @@ def __init__( ) MPS_pipe_directory = os.getenv("CUDA_MPS_PIPE_DIRECTORY", None) MPS_is_on = ( - MPS_pipe_directory + MPS_pipe_directory is not None and is_gpu_backend(backend) and os.path.exists(f"{MPS_pipe_directory}/log") ) @@ -154,7 +210,7 @@ def __init__( f" orchestration : {self._is_orchestrated}\n" f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz}" f"(halo: {sizer.n_halo})\n" - f" Device ord: {device_ordinal_info}\n" + f" Device ord : {device_ordinal_info}\n" f" Nvidia MPS : {MPS_is_on}" ) diff --git a/fv3core/pace/fv3core/stencils/fvtp2d.py b/fv3core/pace/fv3core/stencils/fvtp2d.py index 8cf06c94..3369bcab 100644 --- a/fv3core/pace/fv3core/stencils/fvtp2d.py +++ b/fv3core/pace/fv3core/stencils/fvtp2d.py @@ -236,6 +236,18 @@ def make_quantity(): domain=idx.domain_compute(add=(1, 1, 1)), ) + def _transport_flux(self, x_unit_flux, y_unit_flux, q_x_flux, q_y_flux): + self.stencil_transport_flux( + self._q_advected_y_x_advected_mean, + self._q_x_advected_mean, + self._q_advected_x_y_advected_mean, + self._q_y_advected_mean, + x_unit_flux, + y_unit_flux, + q_x_flux, + q_y_flux, + ) + def __call__( self, q, @@ -283,7 +295,8 @@ def __call__( (as opposed to per-area) then this must be provided for damping to be correct """ - # [DaCe] dace.frontend.python.common.DaceSyntaxError: Keyword "Raise" disallowed + # TODO [DaCe] dace.frontend.python.common.DaceSyntaxError: + # Keyword "Raise" disallowed # if ( # self.delnflux is not None # and mass is None @@ -292,14 +305,16 @@ def __call__( # raise ValueError( # "when damping is enabled, mass must be given if mass flux is given" # ) - if x_mass_flux is None: - x_unit_flux = x_area_flux - else: - x_unit_flux = x_mass_flux - if y_mass_flux is None: - y_unit_flux = y_area_flux - else: - y_unit_flux = y_mass_flux + + # TODO [DaCe] Original aliasing failing code + # if x_mass_flux is None: + # x_unit_flux = x_area_flux + # else: + # x_unit_flux = x_mass_flux + # if y_mass_flux is None: + # y_unit_flux = y_area_flux + # else: + # y_unit_flux = y_mass_flux # TODO: consider whether to refactor xppm/yppm to output fluxes by also taking # y_area_flux as an input (flux = area_flux * advected_mean), since a flux is @@ -336,15 +351,18 @@ def __call__( self._q_advected_x, cry, self._q_advected_x_y_advected_mean ) - self.stencil_transport_flux( - self._q_advected_y_x_advected_mean, - self._q_x_advected_mean, - self._q_advected_x_y_advected_mean, - self._q_y_advected_mean, - x_unit_flux, - y_unit_flux, - q_x_flux, - q_y_flux, - ) + # TODO [DACE]: due to an aliiasing issue (see above for original code) + # we duplicate the code here + if x_mass_flux is None: + if y_mass_flux is None: + self._transport_flux(x_area_flux, y_area_flux, q_x_flux, q_y_flux) + else: + self._transport_flux(x_area_flux, y_mass_flux, q_x_flux, q_y_flux) + else: + if y_mass_flux is None: + self._transport_flux(x_mass_flux, y_area_flux, q_x_flux, q_y_flux) + else: + self._transport_flux(x_mass_flux, y_mass_flux, q_x_flux, q_y_flux) + if self._do_delnflux: self.delnflux(q, q_x_flux, q_y_flux, mass=mass) diff --git a/fv3core/pace/fv3core/stencils/nh_p_grad.py b/fv3core/pace/fv3core/stencils/nh_p_grad.py index 9e657fe7..b9f9e1ff 100644 --- a/fv3core/pace/fv3core/stencils/nh_p_grad.py +++ b/fv3core/pace/fv3core/stencils/nh_p_grad.py @@ -1,6 +1,7 @@ from gt4py.cartesian.gtscript import PARALLEL, computation, interval import pace.util +from pace.dsl.dace import orchestrate from pace.dsl.stencil import StencilFactory from pace.dsl.typing import Float, FloatField, FloatFieldIJ from pace.fv3core.stencils.a2b_ord4 import AGrid2BGridFourthOrder @@ -130,6 +131,11 @@ def __init__( grid_data: GridData, grid_type, ): + orchestrate( + obj=self, + config=stencil_factory.config.dace_config, + ) + grid_indexing = stencil_factory.grid_indexing self.orig = grid_indexing.origin_compute() domain_full_k = grid_indexing.domain_compute(add=(1, 1, 0)) diff --git a/requirements_dev.txt b/requirements_dev.txt index 484c4948..4978ff36 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -3,6 +3,7 @@ pytest-subtests pytest-regressions pytest-profiling pytest-cov +scipy nbmake mpi4py xarray @@ -10,14 +11,14 @@ zarr dask>=2021.10.0 netCDF4 cftime -dace==0.14.0 fv3config>=0.9.0 f90nml>=1.1.0 numpy>=1.15 -e external/gt4py --e util[dace] +-e external/dace -e stencils -e dsl -e physics -e fv3core -e driver +-e util diff --git a/stencils/pace/stencils/testing/test_translate.py b/stencils/pace/stencils/testing/test_translate.py index 14e8cef8..2f4e11c0 100644 --- a/stencils/pace/stencils/testing/test_translate.py +++ b/stencils/pace/stencils/testing/test_translate.py @@ -1,6 +1,5 @@ # type: ignore import copy -import logging import os from typing import Any, Dict, List @@ -227,7 +226,6 @@ def test_sequential_savepoint( threshold_overrides, xy_indices=True, ): - caplog.set_level(logging.DEBUG, logger="fv3core") if case.testobj is None: pytest.xfail( f"no translate object available for savepoint {case.savepoint_name}" @@ -348,7 +346,6 @@ def test_parallel_savepoint( int((MPI.COMM_WORLD.Get_size() // 6) ** 0.5), ) communicator = get_communicator(MPI.COMM_WORLD, layout) - caplog.set_level(logging.DEBUG, logger="fv3core") if case.testobj is None: pytest.xfail( f"no translate object available for savepoint {case.savepoint_name}" diff --git a/util/pace/util/_optional_imports.py b/util/pace/util/_optional_imports.py index 1f35e20a..990a954d 100644 --- a/util/pace/util/_optional_imports.py +++ b/util/pace/util/_optional_imports.py @@ -24,6 +24,14 @@ def __call__(self, *args, **kwargs): except ImportError: cupy = None +if cupy is not None: + # Cupy might be available - but not the device + try: + cupy.cuda.runtime.deviceSynchronize() + except cupy.cuda.runtime.CUDARuntimeError: + cupy = None + + try: import gt4py except ImportError: diff --git a/util/pace/util/checkpointer/snapshots.py b/util/pace/util/checkpointer/snapshots.py index 37db4bcd..97912f6e 100644 --- a/util/pace/util/checkpointer/snapshots.py +++ b/util/pace/util/checkpointer/snapshots.py @@ -2,6 +2,7 @@ import numpy as np +from pace.util._optional_imports import cupy as cp from pace.util._optional_imports import xarray as xr from .base import Checkpointer @@ -16,6 +17,8 @@ def make_dims(savepoint_dim, label, data_list): """ data = np.concatenate([array[None, :] for array in data_list], axis=0) dims = [savepoint_dim] + [f"{label}_dim{i}" for i in range(len(data.shape[1:]))] + if cp and isinstance(data, cp.ndarray): + data = data.get() return dims, data diff --git a/util/pace/util/constants.py b/util/pace/util/constants.py index ef57ed18..d5485aea 100644 --- a/util/pace/util/constants.py +++ b/util/pace/util/constants.py @@ -13,7 +13,7 @@ class ConstantVersions(Enum): GEOS = "GEOS" # Constant as defined in GEOS v13 -CONST_VERSION_AS_STR = os.environ.get("PACE_CONSTANTS", "FV3DYCORE") +CONST_VERSION_AS_STR = os.environ.get("PACE_CONSTANTS", "GFS") try: CONST_VERSION = ConstantVersions[CONST_VERSION_AS_STR]