From 9cc544d2cb04416f106a4392cdbe49545e616197 Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Wed, 20 Sep 2023 10:48:58 +0200 Subject: [PATCH] Forcing builder (#365) * Make generic distributed esmvaltool recipe * New API for forcing generator * Use fluent builder pattern to construct esmvaltool recipes * Added builder.add_variables(['pr']) * Working GenericDistributedForcing.generate * Move all esmvaltool specific stuff to own package This make base package much cleaner * Move builder test * Fix mypy errors * Use builder for LisfloodForcing * Dont use name of Python built-in package as module name Got Traceback (most recent call last): File "/home/verhoes/git/eWaterCycle/ewatercycle/src/ewatercycle/esmvaltool/diagnostic/copy.py", line 7, in from esmvaltool.diag_scripts.shared import ( File "/home/verhoes/mambaforge/envs/ewatercycle/lib/python3.10/site-packages/esmvaltool/diag_scripts/shared/__init__.py", line 2, in from . import io, iris_helpers, names, plot File "/home/verhoes/mambaforge/envs/ewatercycle/lib/python3.10/site-packages/esmvaltool/diag_scripts/shared/io.py", line 5, in from pprint import pformat File "/home/verhoes/mambaforge/envs/ewatercycle/lib/python3.10/pprint.py", line 38, in import dataclasses as _dataclasses File "/home/verhoes/mambaforge/envs/ewatercycle/lib/python3.10/dataclasses.py", line 3, in import copy File "/home/verhoes/git/eWaterCycle/ewatercycle/src/ewatercycle/esmvaltool/diagnostic/copy.py", line 7, in from esmvaltool.diag_scripts.shared import ( ImportError: cannot import name 'ProvenanceLogger' from partially initialized module 'esmvaltool.diag_scripts.shared' (most likely due to a circular import) (/home/verhoes/mambaforge/envs/ewatercycle/lib/python3.10/site-packages/esmvaltool/diag_scripts/shared/__init__.py) * Added GenericLumpedForcing, untested as dcache is curently down * Use recipe builder for MarrmotForcing * Use builder in WflowForcing * Use builder for ForcingHype * Make mip required in dataset * More docs * Make expected recipe string consistent * Parse esmvaltool recipe output when non netcdf files are produced + more docs + fix tests * Got LisfloodForcing.generate(lisvap=None) working Had to make Dataset.mip optional otherwise tdps var with mip=day throws esmvaltool error * Make MarrmotForcing.generate work * Fix tests for wflow and pcrglobwb * Add support for variable without temporal selection * give right dataset to lisvap * Correct expected orog var * Make mypy happy * Dont copy line number, >>> and ... from code blocks * More tests, flake8 fixes and docs * Tests for DefaultForcing + Remove discriminator from forcing classes As you now have to specify which class you want to use to load a forcing. * Replaced with tests/src/base/test_forcing.py * Remove discriminator from forcing classes from tests * Make flake8 happier * Model validator should return otherwise object is set to None * Add test for MarrmotForcing.to_xarray() * Add generic forcing to user guide + custom forcing instructions + prevent circular deps with generic forcings in entry points * More docs * Use Dataset object instead of dict in pre defined datasets * Tested GenericDistributedForcing with non-era dataset * Allow extra attributes in Dataset '*' in forcing files was caused by version attribute in Dataset being silently forgotten. By allowing extra attributes the generate file has version value instead of `*`. * Tested CMIP6 * Add dataset.version to expected recipes * Add changes to CHANGELOG + more docs + more todos * Fix tests with updated model names * Disable lisflood model tests as downloading parameter set too slow * Implement PR suggestions * Rename build_recipe() to build__recipe() * Centralize esmvaltool config for download * Rename esmvaltool.models to esmvaltool.schema When we talk about models we mean hydrological models. --------- Co-authored-by: Peter Kalverla --- CHANGELOG.md | 8 + docs/adding_models.rst | 82 ++ docs/conf.py | 14 +- docs/user_guide.ipynb | 216 +++++- src/ewatercycle/base/forcing.py | 421 ++++++++--- src/ewatercycle/base/model.py | 4 + src/ewatercycle/esmvaltool/__init__.py | 1 + src/ewatercycle/esmvaltool/builder.py | 433 +++++++++++ src/ewatercycle/esmvaltool/datasets.py | 39 + .../esmvaltool/diagnostic/__init__.py | 0 .../esmvaltool/diagnostic/copier.py | 52 ++ src/ewatercycle/esmvaltool/run.py | 155 ++++ src/ewatercycle/esmvaltool/schema.py | 150 ++++ src/ewatercycle/forcing.py | 25 +- src/ewatercycle/plugins/hype/forcing.py | 117 ++- src/ewatercycle/plugins/lisflood/forcing.py | 222 +++--- src/ewatercycle/plugins/marrmot/forcing.py | 198 ++--- src/ewatercycle/plugins/pcrglobwb/forcing.py | 240 ++++-- src/ewatercycle/plugins/wflow/forcing.py | 161 ++-- src/ewatercycle/testing/fixtures.py | 10 +- src/ewatercycle/testing/helpers.py | 51 ++ src/ewatercycle/util.py | 27 - tests/plugins/hype/test_forcing.py | 262 +++---- tests/plugins/lisflood/test_forcing.py | 712 ++++++++++++------ tests/plugins/lisflood/test_model.py | 11 +- tests/plugins/marrmot/test_forcing.py | 201 +++-- tests/plugins/marrmot/test_model_m01.py | 4 +- tests/plugins/pcrglobwb/test_forcing.py | 261 ++++++- tests/plugins/wflow/test_forcing.py | 141 +++- tests/src/base/test_forcing.py | 151 ++++ tests/src/esmvaltool/__init__.py | 0 tests/src/esmvaltool/test_builder.py | 251 ++++++ tests/src/esmvaltool/test_run.py | 53 ++ tests/src/esmvaltool/test_schema.py | 65 ++ tests/src/forcing/test_default.py | 174 ----- tests/src/models/test_abstract.py | 40 +- 36 files changed, 3659 insertions(+), 1293 deletions(-) create mode 100644 src/ewatercycle/esmvaltool/__init__.py create mode 100644 src/ewatercycle/esmvaltool/builder.py create mode 100644 src/ewatercycle/esmvaltool/datasets.py create mode 100644 src/ewatercycle/esmvaltool/diagnostic/__init__.py create mode 100644 src/ewatercycle/esmvaltool/diagnostic/copier.py create mode 100644 src/ewatercycle/esmvaltool/run.py create mode 100644 src/ewatercycle/esmvaltool/schema.py create mode 100644 src/ewatercycle/testing/helpers.py create mode 100644 tests/src/base/test_forcing.py create mode 100644 tests/src/esmvaltool/__init__.py create mode 100644 tests/src/esmvaltool/test_builder.py create mode 100644 tests/src/esmvaltool/test_run.py create mode 100644 tests/src/esmvaltool/test_schema.py delete mode 100644 tests/src/forcing/test_default.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 61974c29..3db9779e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ Formatted as described on [https://keepachangelog.com](https://keepachangelog.co ### Added - Apptainer support ([#290](https://github.com/eWaterCycle/ewatercycle/issues/290)) +- Forcing ((#365)[https://github.com/eWaterCycle/ewatercycle/pull/365]): + - GenericDistributedForcing class + - GenericLumpedForcing class + - Generate from not just ERA5 or ERA-Interim dataset, but any ESMvalTool supported dataset +- Testing helpers for plugins ((#365)[https://github.com/eWaterCycle/ewatercycle/pull/365]) ### Changed @@ -18,6 +23,9 @@ Formatted as described on [https://keepachangelog.com](https://keepachangelog.co - Functions of a model inside a container that return the same result each call are cached with [MemoizedBmi](https://grpc4bmi.readthedocs.io/en/latest/api/grpc4bmi.bmi_memoized.html#grpc4bmi.bmi_memoized.MemoizedBmi) ([#339](https://github.com/eWaterCycle/ewatercycle/pull/339)) - Moved CaseConfig to src/utils.py - forcing.load_foreign has been superceded by using sources.model(...) +- Forcing ((#365)[https://github.com/eWaterCycle/ewatercycle/pull/365]): + - Instead of modifying an existing recipe now builds a ESMValTool recipe from scratch using a fluent interface + - DefaultForcing has overridable class methods for each step of the forcing generation process (build_recipe, run_recipe, recipe_output_to_forcing_arguments). ### Deprecated diff --git a/docs/adding_models.rst b/docs/adding_models.rst index af11d6e1..ddeb2c4f 100644 --- a/docs/adding_models.rst +++ b/docs/adding_models.rst @@ -21,6 +21,7 @@ There are roughly five steps to adding a model to eWaterCycle: 3. :ref:`Make recipe` 4. :ref:`Add to Python package` 5. :ref:`Add to platform` +6. :ref:`Custom forcing` If you want to add a new version of a model the procedure is roughly the same, but you can skip several steps. If you are already familiar with the @@ -238,3 +239,84 @@ Adding a new version of a model involves the following code changes: * Create new release of Python package. Done by package maintainers .. _scripts: https://github.com/eWaterCycle/infra/tree/main/roles/prep_shared_data + +.. _Custom forcing: + +Custom forcing +============== + +If your model can use generic forcing data +(:py:class:`~ewatercycle.base.forcing.GenericDistributedForcing` or :py:class:`~ewatercycle.base.forcing.GenericLumpedForcing`), you can skip this section. + +If your model needs custom forcing data, you need to create your own forcing class. + +The forcing class should sub class :py:class:`~ewatercycle.base.forcing.DefaultForcing`. + +In the class you have to define attributes for the forcing files your model will need. + +To use a ESMValTool recipe you have to implement the :py:meth:`~ewatercycle.base.forcing.DefaultForcing._build_recipe` method. +It should return a :py:class:`~ewatercycle.esmvaltool.models.Recipe` object which can be build using the +:py:class:`~ewatercycle.esmvaltool.builder.RecipeBuilder` class. +For example if your model only needs precipitation you can implement the method like this: + +.. code-block:: python + + from ewatercycle.forcing import RecipeBuilder + + ... + + @classmethod + def _build_recipe(cls, + start_time: datetime, + end_time: datetime, + shape: Path, + dataset: Dataset | str | dict = "ERA5", + ): + return ( + RecipeBuilder() + .start(start_time.year) + .end(end_time.year) + .shape(shape) + .dataset(dataset) + .add_variable("pr") + .build() + ) + +If your ESMValTool recipe needs additional arguments you can add and document them by implementing the :py:meth:`~ewatercycle.base.forcing.DefaultForcing.generate` method like +so + +.. code-block:: python + + @classmethod + def generate( + cls, + , + my_argument: str, + ): + """Generate forcing data for my model. + + Args: + + my_argument: My argument + """ + return super().generate( + , + my_argument=my_argument, + ) + + +The recipe output is mapped to the forcing class arguments with the :py:meth:`~ewatercycle.base.forcing.DefaultForcing._recipe_output_to_forcing_arguments` method. +If you want to change the mapping you can override this method. + +If you do not want to use ESMValTool to generate recipes you can override the :py:meth:`~ewatercycle.base.forcing.DefaultForcing.generate` method. + +To list your forcing class in :py:const:`ewatercycle.forcing.sources` you have to register in the `ewatercycle.forcings` entry point group. +It can then be imported with + +.. code-block:: python + + from ewatercycle.forcings import sources + + forcing = source['MyForcing']( + ... + ) diff --git a/docs/conf.py b/docs/conf.py index 7ae6fc48..5a27cedf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,17 +13,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import sys -# from pathlib import Path - -# src = Path(__file__).parent / ".." / "src" -# sys.path.insert(0, str(src.absolute())) - - # -- General configuration ------------------------------------------------ # Add any Sphinx extension module names here, as strings. They can be @@ -201,3 +190,6 @@ "sklearn": ("https://scikit-learn.org/stable", None), "xarray": ("https://docs.xarray.dev/en/stable/", None), } + +# Dont copy line number, >>> and ... from code blocks +copybutton_exclude = ".linenos, .gp" diff --git a/docs/user_guide.ipynb b/docs/user_guide.ipynb index b7fc9eac..828a1668 100644 --- a/docs/user_guide.ipynb +++ b/docs/user_guide.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "bd2d9bd5", "metadata": {}, "outputs": [], @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "id": "570e6d66", "metadata": {}, "outputs": [ @@ -58,47 +58,137 @@ "data": { "text/html": [ "
Configuration(\n",
-       "    grdc_location=PosixPath('/mnt/data/observation/grdc/dailies'),\n",
+       "    grdc_location=PosixPath('.'),\n",
        "    container_engine='apptainer',\n",
-       "    apptainer_dir=PosixPath('/mnt/data/singularity-images'),\n",
+       "    apptainer_dir=PosixPath('/home/verhoes/dcache/singularity-images'),\n",
        "    singularity_dir=None,\n",
        "    output_dir=PosixPath('.'),\n",
-       "    parameterset_dir=PosixPath('/mnt/data/parameter-sets'),\n",
+       "    parameterset_dir=PosixPath('/home/verhoes/git/eWaterCycle/parameter-sets'),\n",
        "    parameter_sets={\n",
+       "        'lisflood_fraser': ParameterSet(\n",
+       "            name='lisflood_fraser',\n",
+       "            directory=PosixPath('/home/verhoes/git/eWaterCycle/parameter-sets/lisflood_fraser'),\n",
+       "            config=PosixPath('/home/verhoes/git/eWaterCycle/parameter-sets/lisflood_fraser/settings_lat_lon-Run.xml\n",
+       "'),\n",
+       "            doi='N/A',\n",
+       "            target_model='lisflood',\n",
+       "            supported_model_versions={'20.10'},\n",
+       "            downloader=GitHubDownloader(\n",
+       "                org='ec-jrc',\n",
+       "                repo='lisflood-usecases',\n",
+       "                branch='master',\n",
+       "                subfolder='LF_lat_lon_UseCase'\n",
+       "            )\n",
+       "        ),\n",
+       "        'pcrglobwb_rhinemeuse_30min': ParameterSet(\n",
+       "            name='pcrglobwb_rhinemeuse_30min',\n",
+       "            directory=PosixPath('/home/verhoes/git/eWaterCycle/parameter-sets/pcrglobwb_rhinemeuse_30min'),\n",
+       "            config=PosixPath('/home/verhoes/git/eWaterCycle/parameter-sets/pcrglobwb_rhinemeuse_30min/ini_and_batch\n",
+       "_files/deltares_laptop/setup_natural_test.ini'),\n",
+       "            doi='https://doi.org/10.5281/zenodo.1045339',\n",
+       "            target_model='pcrglobwb',\n",
+       "            supported_model_versions={'setters'},\n",
+       "            downloader=GitHubDownloader(\n",
+       "                org='UU-Hydro',\n",
+       "                repo='PCR-GLOBWB_input_example',\n",
+       "                branch='master',\n",
+       "                subfolder='RhineMeuse30min'\n",
+       "            )\n",
+       "        ),\n",
        "        'wflow_rhine_sbm_nc': ParameterSet(\n",
        "            name='wflow_rhine_sbm_nc',\n",
-       "            directory=PosixPath('/mnt/data/parameter-sets/wflow_rhine_sbm_nc'),\n",
-       "            config=PosixPath('/mnt/data/parameter-sets/wflow_rhine_sbm_nc/wflow_sbm_NC.ini'),\n",
+       "            directory=PosixPath('/home/verhoes/git/eWaterCycle/parameter-sets/wflow_rhine_sbm_nc'),\n",
+       "            config=PosixPath('/home/verhoes/git/eWaterCycle/parameter-sets/wflow_rhine_sbm_nc/wflow_sbm_NC.ini'),\n",
        "            doi='N/A',\n",
        "            target_model='wflow',\n",
-       "            supported_model_versions={'2020.1.1', '2020.1.3', '2020.1.2'},\n",
+       "            supported_model_versions={'2020.1.2', '2020.1.3', '2020.1.1'},\n",
+       "            downloader=GitHubDownloader(\n",
+       "                org='openstreams',\n",
+       "                repo='wflow',\n",
+       "                branch='master',\n",
+       "                subfolder='examples/wflow_rhine_sbm_nc'\n",
+       "            )\n",
+       "        ),\n",
+       "        'lisflood_global-masked_01degree_ERA5': ParameterSet(\n",
+       "            name='lisflood_global-masked_01degree_ERA5',\n",
+       "            directory=PosixPath('/home/verhoes/dcache/parameter-sets/lisflood_global-masked_01degree'),\n",
+       "            config=PosixPath('/home/verhoes/dcache/parameter-sets/lisflood_global-masked_01degree/settings_lisflood\n",
+       "_ERA5.xml'),\n",
+       "            doi='N/A',\n",
+       "            target_model='lisflood',\n",
+       "            supported_model_versions={'20.10'},\n",
        "            downloader=None\n",
        "        )\n",
        "    },\n",
-       "    ewatercycle_config=PosixPath('/etc/ewatercycle.yaml')\n",
+       "    ewatercycle_config=PosixPath('/home/verhoes/.config/ewatercycle/ewatercycle.yaml')\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mConfiguration\u001b[0m\u001b[1m(\u001b[0m\n", - " \u001b[33mgrdc_location\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/mnt/data/observation/grdc/dailies'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mgrdc_location\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'.'\u001b[0m\u001b[1m)\u001b[0m,\n", " \u001b[33mcontainer_engine\u001b[0m=\u001b[32m'apptainer'\u001b[0m,\n", - " \u001b[33mapptainer_dir\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/mnt/data/singularity-images'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mapptainer_dir\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/dcache/singularity-images'\u001b[0m\u001b[1m)\u001b[0m,\n", " \u001b[33msingularity_dir\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", " \u001b[33moutput_dir\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'.'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[33mparameterset_dir\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/mnt/data/parameter-sets'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mparameterset_dir\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/parameter-sets'\u001b[0m\u001b[1m)\u001b[0m,\n", " \u001b[33mparameter_sets\u001b[0m=\u001b[1m{\u001b[0m\n", + " \u001b[32m'lisflood_fraser'\u001b[0m: \u001b[1;35mParameterSet\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mname\u001b[0m=\u001b[32m'lisflood_fraser'\u001b[0m,\n", + " \u001b[33mdirectory\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/parameter-sets/lisflood_fraser'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mconfig\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/parameter-sets/lisflood_fraser/settings_lat_lon-Run.xml\u001b[0m\n", + "\u001b[32m'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mdoi\u001b[0m=\u001b[32m'N/A'\u001b[0m,\n", + " \u001b[33mtarget_model\u001b[0m=\u001b[32m'lisflood'\u001b[0m,\n", + " \u001b[33msupported_model_versions\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'20.10'\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[33mdownloader\u001b[0m=\u001b[1;35mGitHubDownloader\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33morg\u001b[0m=\u001b[32m'ec-jrc'\u001b[0m,\n", + " \u001b[33mrepo\u001b[0m=\u001b[32m'lisflood-usecases'\u001b[0m,\n", + " \u001b[33mbranch\u001b[0m=\u001b[32m'master'\u001b[0m,\n", + " \u001b[33msubfolder\u001b[0m=\u001b[32m'LF_lat_lon_UseCase'\u001b[0m\n", + " \u001b[1m)\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[32m'pcrglobwb_rhinemeuse_30min'\u001b[0m: \u001b[1;35mParameterSet\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mname\u001b[0m=\u001b[32m'pcrglobwb_rhinemeuse_30min'\u001b[0m,\n", + " \u001b[33mdirectory\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/parameter-sets/pcrglobwb_rhinemeuse_30min'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mconfig\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/parameter-sets/pcrglobwb_rhinemeuse_30min/ini_and_batch\u001b[0m\n", + "\u001b[32m_files/deltares_laptop/setup_natural_test.ini'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mdoi\u001b[0m=\u001b[32m'https://doi.org/10.5281/zenodo.1045339'\u001b[0m,\n", + " \u001b[33mtarget_model\u001b[0m=\u001b[32m'pcrglobwb'\u001b[0m,\n", + " \u001b[33msupported_model_versions\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'setters'\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[33mdownloader\u001b[0m=\u001b[1;35mGitHubDownloader\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33morg\u001b[0m=\u001b[32m'UU-Hydro'\u001b[0m,\n", + " \u001b[33mrepo\u001b[0m=\u001b[32m'PCR-GLOBWB_input_example'\u001b[0m,\n", + " \u001b[33mbranch\u001b[0m=\u001b[32m'master'\u001b[0m,\n", + " \u001b[33msubfolder\u001b[0m=\u001b[32m'RhineMeuse30min'\u001b[0m\n", + " \u001b[1m)\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", " \u001b[32m'wflow_rhine_sbm_nc'\u001b[0m: \u001b[1;35mParameterSet\u001b[0m\u001b[1m(\u001b[0m\n", " \u001b[33mname\u001b[0m=\u001b[32m'wflow_rhine_sbm_nc'\u001b[0m,\n", - " \u001b[33mdirectory\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/mnt/data/parameter-sets/wflow_rhine_sbm_nc'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[33mconfig\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/mnt/data/parameter-sets/wflow_rhine_sbm_nc/wflow_sbm_NC.ini'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mdirectory\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/parameter-sets/wflow_rhine_sbm_nc'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mconfig\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/parameter-sets/wflow_rhine_sbm_nc/wflow_sbm_NC.ini'\u001b[0m\u001b[1m)\u001b[0m,\n", " \u001b[33mdoi\u001b[0m=\u001b[32m'N/A'\u001b[0m,\n", " \u001b[33mtarget_model\u001b[0m=\u001b[32m'wflow'\u001b[0m,\n", - " \u001b[33msupported_model_versions\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'2020.1.1'\u001b[0m, \u001b[32m'2020.1.3'\u001b[0m, \u001b[32m'2020.1.2'\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[33msupported_model_versions\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'2020.1.2'\u001b[0m, \u001b[32m'2020.1.3'\u001b[0m, \u001b[32m'2020.1.1'\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[33mdownloader\u001b[0m=\u001b[1;35mGitHubDownloader\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33morg\u001b[0m=\u001b[32m'openstreams'\u001b[0m,\n", + " \u001b[33mrepo\u001b[0m=\u001b[32m'wflow'\u001b[0m,\n", + " \u001b[33mbranch\u001b[0m=\u001b[32m'master'\u001b[0m,\n", + " \u001b[33msubfolder\u001b[0m=\u001b[32m'examples/wflow_rhine_sbm_nc'\u001b[0m\n", + " \u001b[1m)\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[32m'lisflood_global-masked_01degree_ERA5'\u001b[0m: \u001b[1;35mParameterSet\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mname\u001b[0m=\u001b[32m'lisflood_global-masked_01degree_ERA5'\u001b[0m,\n", + " \u001b[33mdirectory\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/dcache/parameter-sets/lisflood_global-masked_01degree'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mconfig\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/dcache/parameter-sets/lisflood_global-masked_01degree/settings_lisflood\u001b[0m\n", + "\u001b[32m_ERA5.xml'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mdoi\u001b[0m=\u001b[32m'N/A'\u001b[0m,\n", + " \u001b[33mtarget_model\u001b[0m=\u001b[32m'lisflood'\u001b[0m,\n", + " \u001b[33msupported_model_versions\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'20.10'\u001b[0m\u001b[1m}\u001b[0m,\n", " \u001b[33mdownloader\u001b[0m=\u001b[3;35mNone\u001b[0m\n", " \u001b[1m)\u001b[0m\n", " \u001b[1m}\u001b[0m,\n", - " \u001b[33mewatercycle_config\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/etc/ewatercycle.yaml'\u001b[0m\u001b[1m)\u001b[0m\n", + " \u001b[33mewatercycle_config\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/.config/ewatercycle/ewatercycle.yaml'\u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] }, @@ -388,7 +478,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "id": "6b7a256d", "metadata": {}, "outputs": [], @@ -407,20 +497,20 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "id": "9624c99d", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
ForcingSources['HypeForcing', 'LisfloodForcing', 'MarrmotForcing', 'PCRGlobWBForcing', 'WflowForcing', \n",
-       "'DefaultForcing']\n",
+       "
ForcingSources['GenericDistributedForcing', 'GenericLumpedForcing', 'HypeForcing', 'LisfloodForcing', \n",
+       "'MarrmotForcing', 'PCRGlobWBForcing', 'WflowForcing']\n",
        "
\n" ], "text/plain": [ - "ForcingSources\u001b[1m[\u001b[0m\u001b[32m'HypeForcing'\u001b[0m, \u001b[32m'LisfloodForcing'\u001b[0m, \u001b[32m'MarrmotForcing'\u001b[0m, \u001b[32m'PCRGlobWBForcing'\u001b[0m, \u001b[32m'WflowForcing'\u001b[0m, \n", - "\u001b[32m'DefaultForcing'\u001b[0m\u001b[1m]\u001b[0m\n" + "ForcingSources\u001b[1m[\u001b[0m\u001b[32m'GenericDistributedForcing'\u001b[0m, \u001b[32m'GenericLumpedForcing'\u001b[0m, \u001b[32m'HypeForcing'\u001b[0m, \u001b[32m'LisfloodForcing'\u001b[0m, \n", + "\u001b[32m'MarrmotForcing'\u001b[0m, \u001b[32m'PCRGlobWBForcing'\u001b[0m, \u001b[32m'WflowForcing'\u001b[0m\u001b[1m]\u001b[0m\n" ] }, "metadata": {}, @@ -509,16 +599,81 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "00fe9118", + "id": "db31b882", "metadata": {}, "source": [ "### Generating forcing data\n", "\n", - "In most cases, you will not have access to tailor-made forcing data, and manually pre-processing existing datasets can be quite a pain. eWaterCycle includes a forcing generator that can do all the required steps to go from the available datasets (ERA5, ERA-Interim, etc) to whatever format the models require. This is done through [ESMValTool recipes](https://docs.esmvaltool.org/en/latest/recipes/recipe_hydrology.html). For some models (e.g. lisflood) additional computations are done, as some steps require data and/or code that is not available to ESMValTool.\n", + "In most cases, you will not have access to tailor-made forcing data, and manually pre-processing existing datasets can be quite a pain. eWaterCycle includes a forcing generator that can do all the required steps to go from the available datasets (ERA5, ERA-Interim, etc) to whatever format the models require. This is done through [ESMValTool recipes](https://docs.esmvaltool.org/en/latest/recipes/recipe_hydrology.html).\n", "\n", - "Apart from some standard parameters (start time, datasets, etc.), the forcing generator sometimes requires additional model-specific options. For our wflow example case, we need to pass the DEM file to the ESMValTool recipe as well. All model-specific options are listed in the [API documentation](https://ewatercycle.readthedocs.io/en/latest/apidocs/ewatercycle.forcing.html#ewatercycle.forcing.generate)." + "If your model needs NetCDF files for precipitation, air temperature, minimum air temperature and maximum air temperature variables you can use the generic forcing generators:\n", + "\n", + "* [GenericDistributedForcing](autoapi/ewatercycle/base/forcing/index.html#ewatercycle.base.forcing.GenericDistributedForcing) for distributed models aka grid based models\n", + "* [GenericLumpedForcing](autoapi/ewatercycle/base/forcing/index.html#ewatercycle.base.forcing.GenericLumpedForcing) for lumped models aka point based models\n", + "\n", + "To generate forcing for the Rhine river basin for 2001 and 2002 you can use the following code:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d52b0a76", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GenericDistributedForcing(\n",
+       "    start_time='2000-01-01T00:00:00Z',\n",
+       "    end_time='2001-01-01T00:00:00Z',\n",
+       "    directory=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/docs/esmvaltool_output/ewcrepnjk26_5n_20230904_0\n",
+       "91856/work/diagnostic/script'),\n",
+       "    shape=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/src/ewatercycle/testing/data/Rhine/Rhine.shp'),\n",
+       "    pr='OBS6_ERA5_reanaly_1_day_pr_2000-2001.nc',\n",
+       "    tas='OBS6_ERA5_reanaly_1_day_tas_2000-2001.nc',\n",
+       "    tasmin='OBS6_ERA5_reanaly_1_day_tasmin_2000-2001.nc',\n",
+       "    tasmax='OBS6_ERA5_reanaly_1_day_tasmax_2000-2001.nc'\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mGenericDistributedForcing\u001b[0m\u001b[1m(\u001b[0m\n", + " \u001b[33mstart_time\u001b[0m=\u001b[32m'2000-01-01T00:00:00Z'\u001b[0m,\n", + " \u001b[33mend_time\u001b[0m=\u001b[32m'2001-01-01T00:00:00Z'\u001b[0m,\n", + " \u001b[33mdirectory\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/ewatercycle/docs/esmvaltool_output/ewcrepnjk26_5n_20230904_0\u001b[0m\n", + "\u001b[32m91856/work/diagnostic/script'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mshape\u001b[0m=\u001b[1;35mPosixPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/verhoes/git/eWaterCycle/ewatercycle/src/ewatercycle/testing/data/Rhine/Rhine.shp'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[33mpr\u001b[0m=\u001b[32m'OBS6_ERA5_reanaly_1_day_pr_2000-2001.nc'\u001b[0m,\n", + " \u001b[33mtas\u001b[0m=\u001b[32m'OBS6_ERA5_reanaly_1_day_tas_2000-2001.nc'\u001b[0m,\n", + " \u001b[33mtasmin\u001b[0m=\u001b[32m'OBS6_ERA5_reanaly_1_day_tasmin_2000-2001.nc'\u001b[0m,\n", + " \u001b[33mtasmax\u001b[0m=\u001b[32m'OBS6_ERA5_reanaly_1_day_tasmax_2000-2001.nc'\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from ewatercycle.testing.fixtures import rhine_shape\n", + "\n", + "shape = rhine_shape()\n", + "forcing = ewatercycle.forcing.sources[\"GenericDistributedForcing\"].generate(\n", + " dataset=\"ERA5\",\n", + " start_time=\"2000-01-01T00:00:00Z\",\n", + " end_time=\"2001-01-01T00:00:00Z\",\n", + " shape=shape.absolute(),\n", + ")\n", + "print(forcing)" + ] + }, + { + "cell_type": "markdown", + "id": "6805235d", + "metadata": {}, + "source": [ + "For some models (e.g. lisflood) additional computations are done, as some steps require data and/or code that is not available to ESMValTool. Apart from some standard parameters (start time, datasets, etc.), the forcing generator sometimes requires additional model-specific options. For our wflow example case, we need to pass the DEM file to the ESMValTool recipe as well. All model-specific options are listed in their [documentation](https://ewatercycle.readthedocs.io/en/latest/plugins.html)." ] }, { @@ -528,13 +683,10 @@ "metadata": {}, "source": [ "
\n", - "

Current limitations

\n", + "

Current forcing generation dataset sources

\n", " \n", - "

eWaterCycle uses ESMValTool to generate forcing data. As the Python interface of ESMValTool is still under development, the options to modify ESMValTool settings from within eWaterCycle are still limited. Currently we support changing the most basic settings such as start time, end time, and region. We implemented support for ERA5 and ERA-Interim data.

\n", - "\n", - "

In principle, ESMValTool can work for a wide range of input datasets. Adding support for more datasets in eWaterCycle is on the horizon. If you have a use case and are willing to contribute, please don't hesitate to reach out to us, for example by opening an issue.

\n", - "
\n", - "\n" + "

eWaterCycle uses ESMValTool to generate forcing data. You can generate forcing from pre-defined `ERA5` and `ERA-Interim` datasets. There is also support for generating forcing from datasets on [ESGF](https://esgf.llnl.gov/) by supplying a [Dataset object](autoapi/ewatercycle/esmvaltool/models/index.html#ewatercycle.esmvaltool.models.Dataset) to the :py:func:`ewatercycle.base.forcing.Defaultforcing.generate` method.

\n", + "" ] }, { diff --git a/src/ewatercycle/base/forcing.py b/src/ewatercycle/base/forcing.py index 795ea789..e53afcbf 100644 --- a/src/ewatercycle/base/forcing.py +++ b/src/ewatercycle/base/forcing.py @@ -1,26 +1,60 @@ +"""Base classes for eWaterCycle forcings. + +Configuring ESMValTool +---------------------- + +.. _esmvaltool-configuring: + +To download data from ESFG via ESMValTool you will need a ~/.esmvaltool/config-user.yml file with something like: + +.. code-block:: yaml + + search_esgf: when_missing + download_dir: ~/climate_data + rootpath: + CMIP6: ~/climate_data/CMIP6 + drs: + CMIP6: ESGF + +A config file can be generated with: + +.. code-block:: bash + + esmvaltool config get-config-user + +See `ESMValTool configuring docs `_ +for more information. +""" import logging +from datetime import datetime from pathlib import Path -from tempfile import NamedTemporaryFile -from typing import Annotated, Literal, Optional, Union - -from esmvalcore.config import Session -from esmvalcore.experimental import CFG, Recipe -from esmvalcore.experimental.recipe_output import RecipeOutput -from pydantic import BaseModel, field_validator -from pydantic.functional_validators import AfterValidator +from typing import Annotated, Optional, TypeVar, Union + +from pydantic import BaseModel +from pydantic.functional_validators import AfterValidator, model_validator from ruamel.yaml import YAML -from ewatercycle.util import to_absolute_path +from ewatercycle.esmvaltool.builder import ( + build_generic_distributed_forcing_recipe, + build_generic_lumped_forcing_recipe, +) +from ewatercycle.esmvaltool.run import run_recipe +from ewatercycle.esmvaltool.schema import Dataset, Recipe +from ewatercycle.util import get_time, to_absolute_path logger = logging.getLogger(__name__) FORCING_YAML = "ewatercycle_forcing.yaml" def _to_absolute_path(v: Union[str, Path]): - """Wraps to_absolute_path to a single-arg function, to use as Pydantic validator.""" + """Absolute path validator.""" return to_absolute_path(v) +# Needed so subclass.generate() can return type of subclass instead of base class. +AnyForcing = TypeVar("AnyForcing", bound="DefaultForcing") + + class DefaultForcing(BaseModel): """Container for forcing data. @@ -31,40 +65,46 @@ class DefaultForcing(BaseModel): end_time: End time of forcing in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. shape: Path to a shape file. Used for spatial selection. + If relative then it is relative to the given directory. """ - model: Literal["default"] = "default" + # TODO add validation for start_time and end_time + # using https://docs.pydantic.dev/latest/usage/types/datetime/ + # TODO make sure start_time < end_time start_time: str end_time: str directory: Optional[Annotated[Path, AfterValidator(_to_absolute_path)]] = None shape: Optional[Path] = None - @field_validator("shape") - @classmethod - def _absolute_shape(cls, v, info): - if v is None: - return v - return to_absolute_path( - v, parent=info.data["directory"], must_be_in_parent=False - ) + @model_validator(mode="after") + def _absolute_shape(self): + if self.shape is not None and self.directory is not None: + self.shape = to_absolute_path( + self.shape, parent=self.directory, must_be_in_parent=False + ) + return self @classmethod def generate( - cls, - dataset: str, + cls: type[AnyForcing], + dataset: str | Dataset | dict, start_time: str, end_time: str, shape: str, directory: Optional[str] = None, **model_specific_options, - ) -> "DefaultForcing": + ) -> AnyForcing: """Generate forcings for a model. The forcing is generated with help of `ESMValTool `_. Args: - dataset: Name of the source dataset. See :py:const:`~ewatercycle.base.forcing.DATASETS`. + dataset: Dataset to get forcing data from. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. start_time: Start time of forcing in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. end_time: nd time of forcing in UTC and ISO format string e.g. @@ -72,8 +112,59 @@ def generate( shape: Path to a shape file. Used for spatial selection. directory: Directory in which forcing should be written. If not given will create timestamped directory. + """ - raise NotImplementedError("No default forcing generator available.") + recipe = cls._build_recipe( + dataset=dataset, + start_time=get_time(start_time), + end_time=get_time(end_time), + shape=Path(shape), + **model_specific_options, + ) + recipe_output = cls._run_recipe( + recipe, directory=Path(directory) if directory else None + ) + directory = recipe_output.pop("directory") + arguments = cls._recipe_output_to_forcing_arguments( + recipe_output, model_specific_options + ) + forcing = cls( + directory=Path(directory), + start_time=start_time, + end_time=end_time, + shape=shape, + **arguments, + ) + forcing.save() + return forcing + + @classmethod + def _recipe_output_to_forcing_arguments(cls, recipe_output, model_specific_options): + return { + **recipe_output, + **model_specific_options, + } + + @classmethod + def _build_recipe( + cls, + start_time: datetime, + end_time: datetime, + shape: Path, + dataset: Dataset | str | dict, + **model_specific_options, + ): + # TODO do we want an implementation here? + # If so how is it different from GenericDistributedForcing? + raise NotImplementedError("No default recipe available.") + + @classmethod + def _run_recipe( + cls, + recipe: Recipe, + directory: Optional[Path] = None, + ) -> dict[str, str]: + return run_recipe(recipe, directory) def save(self): """Export forcing data for later use.""" @@ -82,7 +173,7 @@ def save(self): raise ValueError("Cannot save forcing without directory.") target = self.directory / FORCING_YAML # We want to make the yaml and its parent movable, - # so the directory and shape should not be included in the yaml file + # so the directory should not be included in the yaml file clone = self.model_copy() # TODO: directory should not be optional, can we remove the directory @@ -98,7 +189,7 @@ def save(self): ) fdict = clone.model_dump(exclude={"directory"}, exclude_none=True, mode="json") - with open(target, "w") as f: + with target.open("w") as f: yaml.dump(fdict, f) return target @@ -123,107 +214,223 @@ def load(cls, directory: str | Path): ) metadata = meta.read_text() # Workaround for legacy forcing files having !PythonClass tag. - # Get model name of non-initialized BaseModel with Pydantic class property: - modelname = cls.model_fields["model"].default # type: ignore - metadata = metadata.replace(f"!{cls.__name__}", f"model: {modelname}") + # Remove it so ewatercycle.forcing.source[].load(dir) works. + metadata = metadata.replace(f"!{cls.__name__}", "") fdict = yaml.load(metadata) fdict["directory"] = data_source return cls(**fdict) - @classmethod - def plot(cls): - raise NotImplementedError("No generic plotting method available.") - def __eq__(self, other): return self.__dict__ == other.__dict__ -def _session(directory: Optional[str] = None) -> Optional[Session]: - """When directory is set return a ESMValTool session that will write recipe output to that directory.""" - if directory is None: - return None +class GenericDistributedForcing(DefaultForcing): + """Generic forcing data for a distributed model. + + Attributes: + pr: Path to NetCDF file with precipitation data. + tas: Path to NetCDF file with air temperature data. + tasmin: Path to NetCDF file with minimum air temperature data. + tasmax: Path to NetCDF file with maximum air temperature data. - class TimeLessSession(Session): - def __init__(self, output_dir: Path): - super().__init__(CFG.copy()) - self.output_dir = output_dir + Examples: - @property - def session_dir(self): - return self.output_dir + To generate forcing from ERA5 for the Rhine catchment for 2000-2001: - return TimeLessSession(Path(directory).absolute()) + .. code-block:: python + from pathlib import Path + from rich import print + from ewatercycle.base.forcing import GenericDistributedForcing + + shape = Path("./src/ewatercycle/testing/data/Rhine/Rhine.shp") + forcing = GenericDistributedForcing.generate( + dataset='ERA5', + start_time='2000-01-01T00:00:00Z', + end_time='2001-01-01T00:00:00Z', + shape=shape.absolute(), + ) + print(forcing) + + Gives something like: + + .. code-block:: python + + GenericDistributedForcing( + model='generic_distributed', + start_time='2000-01-01T00:00:00Z', + end_time='2001-01-01T00:00:00Z', + directory=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/esmvaltool_output/tmp05upitxoewcrep_20230815_154640/work/diagnostic/script'), + shape=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/src/ewatercycle/testing/data/Rhine/Rhine.shp'), + pr='OBS6_ERA5_reanaly_1_day_pr_2000-2001.nc', + tas='OBS6_ERA5_reanaly_1_day_tas_2000-2001.nc', + tasmin='OBS6_ERA5_reanaly_1_day_tasmin_2000-2001.nc', + tasmax='OBS6_ERA5_reanaly_1_day_tasmax_2000-2001.nc' + ) -def run_esmvaltool_recipe(recipe: Recipe, output_dir: str | None) -> RecipeOutput: - """Run an ESMValTool recipe. + To generate forcing from CMIP6 for the Rhine catchment for 2000-2001 + (make sure :ref:`ESMValTool is configured ` correctly): + + .. code-block:: python + + from pathlib import Path + from rich import print + from ewatercycle.base.forcing import GenericDistributedForcing + + shape = Path("./src/ewatercycle/testing/data/Rhine/Rhine.shp") + cmip_dataset = { + "dataset": "EC-Earth3", + "project": "CMIP6", + "grid": "gr", + "exp": ["historical",], + "ensemble": "r6i1p1f1", + } + + forcing = GenericDistributedForcing.generate( + dataset=cmip_dataset, + start_time="2000-01-01T00:00:00Z", + end_time="2001-01-01T00:00:00Z", + shape=shape.absolute(), + ) + print(forcing) - The recipe.data dictionary can be modified before running the recipe. + Gives something like: - During run the recipe.path is overwritten with a temporary file containing the updated recipe. + .. code-block:: python - Args: - recipe: ESMValTool recipe - output_dir: Directory where output should be written to. - If None then output is written to generated timestamped directory. + GenericDistributedForcing( + start_time='2000-01-01T00:00:00Z', + end_time='2001-01-01T00:00:00Z', + directory=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/esmvaltool_output/ewcrep0ibzlds__20230904_082748/work/diagnostic/script'), + shape=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/src/ewatercycle/testing/data/Rhine/Rhine.shp'), + pr='CMIP6_EC-Earth3_day_historical_r6i1p1f1_pr_gr_2000-2001.nc', + tas='CMIP6_EC-Earth3_day_historical_r6i1p1f1_tas_gr_2000-2001.nc', + tasmin='CMIP6_EC-Earth3_day_historical_r6i1p1f1_tasmin_gr_2000-2001.nc', + tasmax='CMIP6_EC-Earth3_day_historical_r6i1p1f1_tasmax_gr_2000-2001.nc' + ) + """ + + pr: str + tas: str + tasmin: str + tasmax: str + + @classmethod + def _build_recipe( + cls, + start_time: datetime, + end_time: datetime, + shape: Path, + dataset: Dataset | str | dict = "ERA5", + **model_specific_options, + ): + return build_generic_distributed_forcing_recipe( + # TODO allow finer selection then a whole year. + # using ISO 8601 str as type or timerange attribute see + # https://docs.esmvaltool.org/projects/ESMValCore/en/latest/recipe/overview.html#recipe-section-datasets + start_year=start_time.year, + end_year=end_time.year, + shape=shape, + dataset=dataset, + # TODO which variables are needed for a generic forcing? + # As they are stored as object attributes + # we can not have a customizable list + variables=("pr", "tas", "tasmin", "tasmax"), + ) + + # TODO add helper method to get forcing data as xarray.Dataset? - Returns: - ESMValTool recipe output + +class GenericLumpedForcing(GenericDistributedForcing): + """Generic forcing data for a lumped model. + + Attributes: + pr: Path to NetCDF file with precipitation data. + tas: Path to NetCDF file with air temperature data. + tasmin: Path to NetCDF file with minimum air temperature data. + tasmax: Path to NetCDF file with maximum air temperature data. Example: - >>> from ewatercycle.forcing import run_esmvaltool_recipe - >>> from esmvalcore.experimental.recipe import get_recipe - >>> recipe = get_recipe('hydrology/recipe_wflow.yml') - >>> recipe.data['scripts']['script']['dem_file'] = 'my_dem.nc' - >>> output_dir = Path('./output_dir') - >>> output = run_esmvaltool_recipe(recipe, output_dir) + To generate forcing from ERA5 for the Rhine catchment for 2000-2001: + + .. code-block:: python + + from pathlib import Path + from rich import print + from ewatercycle.base.forcing import GenericLumpedForcing + + shape = Path("./src/ewatercycle/testing/data/Rhine/Rhine.shp") + forcing = GenericLumpedForcing.generate( + dataset='ERA5', + start_time='2000-01-01T00:00:00Z', + end_time='2001-01-01T00:00:00Z', + shape=shape.absolute(), + ) + print(forcing) + + Gives something like: + + .. code-block:: python + + GenericLumpedForcing( + model='generic_distributed', + start_time='2000-01-01T00:00:00Z', + end_time='2001-01-01T00:00:00Z', + directory=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/esmvaltool_output/ewcrep90hmnvat_20230816_124951/work/diagnostic/script'), + shape=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/src/ewatercycle/testing/data/Rhine/Rhine.shp'), + pr='OBS6_ERA5_reanaly_1_day_pr_2000-2001.nc', + tas='OBS6_ERA5_reanaly_1_day_tas_2000-2001.nc', + tasmin='OBS6_ERA5_reanaly_1_day_tasmin_2000-2001.nc', + tasmax='OBS6_ERA5_reanaly_1_day_tasmax_2000-2001.nc' + ) + + To generate forcing from CMIP6 for the Rhine catchment for 2000-2001 + (make sure :ref:`ESMValTool is configured ` correctly): + + .. code-block:: python + + from pathlib import Path + from rich import print + from ewatercycle.base.forcing import GenericLumpedForcing + + shape = Path("./src/ewatercycle/testing/data/Rhine/Rhine.shp") + cmip_dataset = { + "dataset": "EC-Earth3", + "project": "CMIP6", + "grid": "gr", + "exp": ["historical",], + "ensemble": "r6i1p1f1", + } + + forcing = GenericLumpedForcing.generate( + dataset=cmip_dataset, + start_time="2000-01-01T00:00:00Z", + end_time="2001-01-01T00:00:00Z", + shape=shape.absolute(), + ) + print(forcing) """ - # ESMVALCore 2.8.1 always runs original recipe, - # write updated recipe to disk and use - recipe.path = _write_recipe(recipe) - # TODO write recipe in output_dir? - # TODO fix in esmvalcore and wait for new version? - - session = _session(output_dir) - output = recipe.run(session=session) - - # remove updated recipe file - recipe.path.unlink() - - return output - - -def _write_recipe(recipe: Recipe) -> Path: - updated_recipe_file = NamedTemporaryFile( - suffix=recipe.path.name, mode="w", delete=False - ) - yaml = YAML(typ="safe") - yaml.dump(recipe.data, updated_recipe_file) - updated_recipe_file.close() - return Path(updated_recipe_file.name) - - -DATASETS = { - "ERA5": { - "dataset": "ERA5", - "project": "OBS6", - "tier": 3, - "type": "reanaly", - "version": 1, - }, - "ERA-Interim": { - "dataset": "ERA-Interim", - "project": "OBS6", - "tier": 3, - "type": "reanaly", - "version": 1, - }, -} -"""Dictionary of allowed forcing datasets. - -Where key is the name of the dataset and -value is an `ESMValTool dataset section `_. -""" + + # files returned by generate() have only time coordinate and zero lons/lats. + # TODO inject centroid of shape as single lon/lat into files? + # use diagnostic script or overwrite generate() + + @classmethod + def _build_recipe( + cls, + start_time: datetime, + end_time: datetime, + shape: Path, + dataset: Dataset | str | dict = "ERA5", + **model_specific_options, + ): + return build_generic_lumped_forcing_recipe( + start_year=start_time.year, + end_year=end_time.year, + shape=shape, + dataset=dataset, + variables=("pr", "tas", "tasmin", "tasmax"), + ) diff --git a/src/ewatercycle/base/model.py b/src/ewatercycle/base/model.py index 8334b44b..a2c62bb4 100644 --- a/src/ewatercycle/base/model.py +++ b/src/ewatercycle/base/model.py @@ -83,6 +83,10 @@ def _check_parameter_set(self): def _make_bmi_instance(self) -> OptionalDestBmi: """Attach a BMI instance to self._bmi.""" + # this has different signature than pymt see + # https://github.com/csdms/pymt/blob/9ef61a0010b4997d5a2b09e5d434371598291261/pymt/framework/bmi_setup.py#L77C21-L77C32 + # where it is {}.items() + # TODO is this OK? @property def parameters(self) -> dict[str, Any]: """Display the model's parameters and their values.""" diff --git a/src/ewatercycle/esmvaltool/__init__.py b/src/ewatercycle/esmvaltool/__init__.py new file mode 100644 index 00000000..2f58224d --- /dev/null +++ b/src/ewatercycle/esmvaltool/__init__.py @@ -0,0 +1 @@ +"""Module to generate forcings using ESMValTool.""" diff --git a/src/ewatercycle/esmvaltool/builder.py b/src/ewatercycle/esmvaltool/builder.py new file mode 100644 index 00000000..382126be --- /dev/null +++ b/src/ewatercycle/esmvaltool/builder.py @@ -0,0 +1,433 @@ +"""Builder and runner for ESMValTool recipes. + +The recipes can be used to generate forcings. +""" +import logging +from pathlib import Path +from typing import Literal, Sequence + +from ewatercycle.esmvaltool.datasets import DATASETS +from ewatercycle.esmvaltool.diagnostic import copier +from ewatercycle.esmvaltool.schema import ( + ClimateStatistics, + Dataset, + Diagnostic, + Documentation, + Recipe, + Script, + TargetGrid, + Variable, +) +from ewatercycle.util import get_extents + +DIAGNOSTIC_NAME = "diagnostic" +SPATIAL_PREPROCESSOR_NAME = "spatial" +SCRIPT_NAME = "script" +DEFAULT_DIAGNOSTIC_SCRIPT = copier.__file__ + +logger = logging.getLogger(__name__) + + +class RecipeBuilder: + """Builder for ESMValTool recipes tailored to generate forcings. + + Example: + + To create a recipe from ERA5 dataset and the Rhine basin: + + .. code-block:: python + + >>> from ewatercycle.testing.fixtures import rhine_shape + >>> from ewatercycle.forcing import RecipeBuilder + >>> recipe = ( + ... RecipeBuilder() + ... .title("Generic distributed forcing recipe") + ... .dataset("ERA5") + ... .start(2000) + ... .end(2001) + ... .shape(rhine_shape()) + ... .add_variable("pr") + ... .build() + ... ) + >>> recipe.save("recipe.yml") + + To run the recipe: + + .. code-block:: bash + + esmvaltool recipe.yml + + Order in which methods are called matters in the following cases: + + * regrid before adding variables + * lump after spatial selection and before adding variables + * temporal selection before adding variables + * spatial selection before adding variables + + """ + + _recipe: Recipe + _start_year: int = 0 + _end_year: int = 10000 + _mip: str = "day" + + def __init__(self) -> None: + self._recipe = Recipe( + documentation=Documentation( + description="", + title="", + authors=["unmaintained"], + projects=["ewatercycle"], + ), + preprocessors={ + SPATIAL_PREPROCESSOR_NAME: {}, + }, + diagnostics={ + DIAGNOSTIC_NAME: Diagnostic( + variables={}, + scripts={SCRIPT_NAME: {"script": DEFAULT_DIAGNOSTIC_SCRIPT}}, + ) + }, + ) + + def build(self) -> Recipe: + """Build the recipe. + + Should be called after all other methods. + """ + # TODO de-duplicate preprocessors + if self._recipe.datasets is None or len(self._recipe.datasets) == 0: + raise ValueError("Recipe has no dataset") + return self._recipe + + def description(self, description: str) -> "RecipeBuilder": + """Set the description of the recipe. + + Args: + description: Description of the recipe. + """ + self._recipe.documentation.description = description + return self + + def title(self, title: str) -> "RecipeBuilder": + """Set the title of the recipe. + + Args: + title: Title of the recipe. + """ + self._recipe.documentation.title = title + return self + + def dataset(self, dataset: Dataset | str | dict) -> "RecipeBuilder": + """Set the dataset of the recipe. + + Args: + dataset: Dataset to use for the recipe. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. + + Only one dataset is allowed when generating eWaterCycle forcings. + Calling this method again will overwrite the previous dataset. + """ + # Can only have one dataset + if isinstance(dataset, str): + dataset = DATASETS[dataset] + elif isinstance(dataset, dict): + dataset = Dataset(**dataset) + if not isinstance(dataset, Dataset): + raise ValueError( + f"dataset must be a Dataset, str or dict, got {type(dataset)}" + ) + self._recipe.datasets = [dataset] + return self + + def mip(self, value: str) -> "RecipeBuilder": + """Set the default time frequency for all later added variables. + + Args: + value: time frequency, e.g. 'day', 'Eday', 'CFday', 'fx'. + Defaults to 'day'. + """ + self._mip = value + return self + + def start(self, value: int) -> "RecipeBuilder": + """Set the start year of the recipe. + + Args: + value: Start year of the recipe. + """ + # TODO also accept datetime object + self._start_year = value + return self + + def end(self, value: int) -> "RecipeBuilder": + """Set the end year of the recipe. + + Args: + value: End year of the recipe. + """ + # TODO also accept datetime object + # TODO is end year inclusive or exclusive? + self._end_year = value + return self + + @property + def _preprocessors(self): + if self._recipe.preprocessors is None: + raise ValueError("Recipe has no preprocessors") + return self._recipe.preprocessors + + def regrid(self, scheme: str, target_grid: TargetGrid) -> "RecipeBuilder": + """Regrid the data from the dataset to a different grid. + + Args: + schema: Regridding scheme to use. See + https://docs.esmvaltool.org/projects/ESMValCore/en/latest/recipes/recipe_file.html#regrid + target_grid: Target grid to regrid to. + """ + self._preprocessors[SPATIAL_PREPROCESSOR_NAME]["regrid"] = { + "scheme": scheme, + "target_grid": target_grid, + } + return self + + def shape( + self, file: Path | str, crop: bool = True, decomposed: bool = False + ) -> "RecipeBuilder": + """Select data within a shapefile. + + Args: + file: Path to shapefile. + crop: Crop data to shapefile extent. Otherwise data outside shapefile extent is set to NaN. + decomposed: Decompose shapefile into separate polygons. + """ + self._preprocessors[SPATIAL_PREPROCESSOR_NAME]["extract_shape"] = { + "shapefile": str(file), + "crop": crop, + "decomposed": decomposed, + } + return self + + def region( + self, + start_longitude: float, + end_longitude: float, + start_latitude: float, + end_latitude: float, + ) -> "RecipeBuilder": + """Select data within a region. + + Args: + start_longitude: Start longitude of the region. + end_longitude: End longitude of the region. + start_latitude: Start latitude of the region. + end_latitude: End latitude of the region. + """ + self._preprocessors[SPATIAL_PREPROCESSOR_NAME]["extract_region"] = { + "start_longitude": start_longitude, + "end_longitude": end_longitude, + "start_latitude": start_latitude, + "end_latitude": end_latitude, + } + return self + + def region_by_shape(self, shape: Path, pad=0) -> "RecipeBuilder": + """Select data within a region defined by extents of a shapefile. + + Args: + shape: Path to shapefile. + pad: Pad the region with this many degrees. + """ + extents = get_extents(shape, pad) + return self.region(**extents) + + def lump( + self, + operator: Literal[ + "mean", "median", "std_dev", "sum", "variance", "min", "max", "rms" + ] = "mean", + ) -> "RecipeBuilder": + """Lump gridded data into a single value spatially. + + See + https://docs.esmvaltool.org/projects/ESMValCore/en/latest/api/esmvalcore.preprocessor.html#esmvalcore.preprocessor.area_statistics + + Args: + operator: The operator to use for lumping. + """ + # TODO do we need different operator for different variables? + # TODO should lumping come after unit conversion? Or does it not matter? + self._preprocessors[SPATIAL_PREPROCESSOR_NAME]["area_statistics"] = { + "operator": operator + } + return self + + @property + def _diagnostic(self) -> Diagnostic: + if self._recipe.diagnostics is None: + raise ValueError("Recipe has no diagnostics") + return self._recipe.diagnostics[DIAGNOSTIC_NAME] + + def add_variables(self, variables: Sequence[str]) -> "RecipeBuilder": + """Add variables to the recipe. + + Args: + variables: Names of variables to add to the recipe. + """ + for variable in variables: + self.add_variable(variable) + return self + + def add_variable( + self, + variable: str, + mip: str | None = None, + units: str | None = None, + stats: ClimateStatistics | None = None, + short_name: str | None = None, + start_year: int | None | Literal[False] = None, + end_year: int | None | Literal[False] = None, + ): + """Add a variable to the recipe. + + Args: + variable: The name of the variable to add. + mip: The MIP table to use for the variable. + If not given then defaults to what was set with self.mip(value). + units: The unit to convert the variable to. + Default no conversion. See + https://docs.esmvaltool.org/projects/ESMValCore/en/latest/recipes/recipe_file.html#convert-units + stats: The climate statistics to apply to the variable. + Defaults to not applying any statistics. + short_name: A short name for the variable. Defaults to variable name. + start_year: The start year of the variable. + Defaults to start year of dataset. + Use False to disable temporal selection. + end_year: The end year of the variable. Defaults to end year of dataset. + Use False to disable temporal selection. + + """ + # TODO check variable is in dataset + # Each variable needs its own single preprocessor + preprocessor_name = self._add_preprocessor(variable, units, stats) + if self._diagnostic.variables is None: + raise ValueError("Recipe has no variables") + if mip is None: + mip = self._mip + if start_year is None: + start_year = self._start_year + if start_year is False: + start_year = None + if end_year is None: + end_year = self._end_year + if end_year is False: + end_year = None + self._diagnostic.variables[variable] = Variable( + mip=mip, + preprocessor=preprocessor_name, + start_year=start_year, + # TODO check if end_year is exclusive or inclusive + end_year=end_year, + short_name=short_name, + ) + return self + + def _add_preprocessor(self, preprocessor_name, units, stats): + if preprocessor_name not in self._preprocessors: + preprocessor = {} + # TODO allow spatial preprocessor to be configured after adding variables + if SPATIAL_PREPROCESSOR_NAME in self._preprocessors: + preprocessor = {**self._preprocessors[SPATIAL_PREPROCESSOR_NAME]} + if units is not None: + preprocessor["convert_units"] = {"units": units} + if stats is not None: + preprocessor["climate_statistics"] = { + "operator": stats.operator, + "period": stats.period, + } + self._preprocessors[preprocessor_name] = preprocessor + return preprocessor_name + + def script( + self, script: str, arguments: dict[str, str] | None = None + ) -> "RecipeBuilder": + """Set script of recipe. + + When script has not been set will default to copying + the ESMValTool preprocessed files to the output directory + using the :py:mod:`ewatercycle.esmvaltool.diagnostic.copier` script. + + Args: + script: Path to script to run. + arguments: Arguments to pass to the script. + """ + if self._diagnostic.scripts is None: + raise ValueError("Recipe has no scripts") + self._diagnostic.scripts[SCRIPT_NAME] = Script(script=script, **arguments or {}) + return self + + +def build_generic_distributed_forcing_recipe( + start_year: int, + end_year: int, + shape: Path, + dataset: Dataset | str | dict = "ERA5", + variables: Sequence[str] = ("pr", "tas", "tasmin", "tasmax"), +): + """Build a generic distributed forcing recipe. + + Args: + start_year: Start year of the data to retrieve. + end_year: End year of the data to retrieve. + shape: Path to shapefile. Used for spatial selection. + dataset: Dataset to use for the recipe. + variables: Names of variables to add to the recipe. + + Recipe will return a NetCDF file for each variable. + """ + return ( + RecipeBuilder() + .title("Generic distributed forcing recipe") + .description("Generic distributed forcing recipe") + .dataset(dataset) + .start(start_year) + .end(end_year) + .shape(shape) + .add_variables(variables) + .build() + ) + + +def build_generic_lumped_forcing_recipe( + start_year: int, + end_year: int, + shape: Path, + dataset: Dataset | str | dict = "ERA5", + variables: Sequence[str] = ("pr", "tas", "tasmin", "tasmax"), +): + """Build a generic lumped forcing recipe. + + Args: + start_year: Start year of the data to retrieve. + end_year: End year of the data to retrieve. + shape: Path to shapefile. Used for spatial selection. + dataset: Dataset to use for the recipe. + variables: Names of variables to add to the recipe. + + Recipe will return a NetCDF file for each variable. + """ + return ( + RecipeBuilder() + .title("Generic lumped forcing recipe") + .description("Generic lumped forcing recipe") + .dataset(dataset) + .start(start_year) + .end(end_year) + .shape(shape) + .lump() + .add_variables(variables) + .build() + ) diff --git a/src/ewatercycle/esmvaltool/datasets.py b/src/ewatercycle/esmvaltool/datasets.py new file mode 100644 index 00000000..2fb380f4 --- /dev/null +++ b/src/ewatercycle/esmvaltool/datasets.py @@ -0,0 +1,39 @@ +"""Forcing datasets.""" + +from ewatercycle.esmvaltool.schema import Dataset + +DATASETS = { + "ERA5": Dataset( + dataset="ERA5", + project="OBS6", + tier=3, + type="reanaly", + version=1, + ), + "ERA-Interim": Dataset( + dataset="ERA-Interim", + project="OBS6", + tier=3, + type="reanaly", + version=1, + ), +} +"""Dictionary of predefined forcing datasets. + +Where key is the name of the dataset and +value is an `ESMValTool dataset section +`_. + +.. code-block:: python + + >> from ewatercycle.forcing import DATASETS + >> list(DATASETS.keys()) + ['ERA5', 'ERA-Interim'] + +""" + +# TODO move predefined forcing datasets to ewatercycle.CFG +# would give more work for person setting up ewatercycle environment. +# but would make it easier for users to use the predefined datasets. +# during testing we must overwrite the predefined datasets in CFG +# with the datasets the tests need. diff --git a/src/ewatercycle/esmvaltool/diagnostic/__init__.py b/src/ewatercycle/esmvaltool/diagnostic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ewatercycle/esmvaltool/diagnostic/copier.py b/src/ewatercycle/esmvaltool/diagnostic/copier.py new file mode 100644 index 00000000..cd4c7d02 --- /dev/null +++ b/src/ewatercycle/esmvaltool/diagnostic/copier.py @@ -0,0 +1,52 @@ +"""ESMValTool diagnostic script that copies the preprocessing output to the diagnostic output.""" +import logging +import shutil +from pathlib import Path + +from esmvaltool.diag_scripts.shared import ( + ProvenanceLogger, + get_diagnostic_filename, + run_diagnostic, +) + +logger = logging.getLogger(Path(__file__).name) + + +def main(cfg): + """Copy input data to the output directory. + + Args: + cfg: Configuration dictionary. + """ + input_data = cfg["input_data"] + provenance = { + "caption": "Forcings for generic hydrological model", + "domains": ["global"], + "authors": [ + "unmaintained", + ], + "projects": [ + "ewatercycle", + ], + "references": [ + "acknow_project", + ], + } + for input_file in input_data.keys(): + input_path = Path(input_file) + outfile = get_diagnostic_filename(input_path.stem, cfg, input_path.suffix[1:]) + logger.info("Copying %s to %s", input_file, outfile) + shutil.copy(input_file, outfile) + with ProvenanceLogger(cfg) as provenance_logger: + provenance_logger.log( + outfile, + { + **provenance, + "ancestors": [input_file], + }, + ) + + +if __name__ == "__main__": + with run_diagnostic() as config: + main(config) diff --git a/src/ewatercycle/esmvaltool/run.py b/src/ewatercycle/esmvaltool/run.py new file mode 100644 index 00000000..0fbf5cb1 --- /dev/null +++ b/src/ewatercycle/esmvaltool/run.py @@ -0,0 +1,155 @@ +"""Run ESMValTool recipes.""" +import logging +from pathlib import Path +from tempfile import NamedTemporaryFile + +from esmvalcore.config import CFG, Session +from esmvalcore.experimental.recipe import Recipe as ESMValToolRecipe +from esmvalcore.experimental.recipe_output import DataFile, ImageFile, RecipeOutput + +from ewatercycle.esmvaltool.schema import Recipe + +logger = logging.getLogger(__name__) + + +class _TimeLessSession(Session): + """ESMValTool session that does not use time in session directory.""" + + def __init__(self, output_dir: Path): + super().__init__(CFG.copy()) + self.output_dir = output_dir + + @property + def session_dir(self): + return self.output_dir + + +def _session(directory: Path | str | None = None) -> Session | None: + """Make ESMValTool session with optional output directory. + + Args: + directory: Directory where output should be written to. + If None then output is written to generated timestamped directory. + """ + if directory is None: + return None + return _TimeLessSession(Path(directory).absolute()) + + +def run_recipe(recipe: Recipe, output_dir: Path | None = None) -> dict[str, str]: + """Run an ESMValTool recipe. + + Args: + recipe: ESMValTool recipe + output_dir: Directory where output should be written to. + If None then output is written to generated timestamped directory. + + Returns: + Dictionary with forcing data variables as keys and file names as values + and a key called directory with value the parent directory of the file names. + + Example: + + To run a recipe that generates a distributed forcing dataset: + + >>> from ewatercycle.testing.fixtures import rhine_shape + >>> from ewatercycle.esmvaltool.builder import ( + ... build_generic_distributed_forcing_recipe + ... ) + >>> from ewatercycle.esmvaltool.run import run_recipe + >>> shape = rhine_shape() + >>> recipe = build_generic_distributed_forcing_recipe( + ... start_year=2000, + ... end_year=2001, + ... shape=shape, + ... dataset='ERA5', + ... ) + >>> output = run_recipe(recipe) + >>> output + diagnostic/script: + DataFile('OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc') + DataFile('OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc') + DataFile('OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc') + DataFile('OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc') + + diagnostic/pr: + DataFile('OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc') + + diagnostic/tas: + DataFile('OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc') + + diagnostic/tasmin: + DataFile('OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc') + + diagnostic/tasmax: + DataFile('OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc') + + """ + output = _save_and_run_recipe(recipe, output_dir) + return _parse_recipe_output(output) + + +def _save_and_run_recipe(recipe: Recipe, output_dir: Path | None) -> RecipeOutput: + """Save recipe to temporary file and run it with ESMValTool. + + Args: + recipe: ESMValTool recipe + output_dir: Directory where output should be written to. + If None then output is written to generated timestamped directory. + + Returns: + ESMValTool recipe output + """ + recipe_file = NamedTemporaryFile( + prefix="ewcrep", suffix=".yml", mode="w", delete=False + ) + recipe_path = Path(recipe_file.name) + + try: + recipe.save(recipe_path) + + logger.info("Running recipe %s with ESMValTool", recipe_path) + + # TODO don't like having to different Recipe classes, should fix upstream + esmlvaltool_recipe = ESMValToolRecipe(recipe_path) + session = _session(output_dir) + output = esmlvaltool_recipe.run(session=session) + finally: + recipe_path.unlink() + return output + + +def _parse_recipe_output(recipe_output: RecipeOutput) -> dict[str, str]: + """Parse ESMValTool recipe output into a dictionary. + + This method assumes: + + * Recipe had at least one diagnostic + * Diagnostic produced at least one file + * All files are in the same directory + * The first variable name in a NetCDF file is the primary one + + Returns: + Dictionary with forcing data variables as keys and file names as values + and a key called directory with value the parent directory of the file names. + """ + first_diagnostic_output = list(recipe_output.values())[0] + output_files = first_diagnostic_output.files + if not output_files: + raise ValueError("No recipe output files found") + forcing_files = {} + for output_file in output_files: + var_name = output_file.path.stem + if isinstance(output_file, DataFile): + # Datafile means ends with .nc + # Use first variable name from inside file as key + dataset = output_file.load_xarray() + var_name = list(dataset.data_vars.keys())[0] + dataset.close() + elif isinstance(output_file, ImageFile): + # Skip image files + continue + # Assume all files are in the same directory + forcing_files[var_name] = output_file.path.name + directory = str(output_file.path.parent) + return {"directory": directory, **forcing_files} diff --git a/src/ewatercycle/esmvaltool/schema.py b/src/ewatercycle/esmvaltool/schema.py new file mode 100644 index 00000000..ea8bcff0 --- /dev/null +++ b/src/ewatercycle/esmvaltool/schema.py @@ -0,0 +1,150 @@ +"""ESMValTool recipe schema and preprocessor types. + +The classes and their attributes in this module are based +on the ESMValTool recipe schema at +https://github.com/ESMValGroup/ESMValCore/blob/main/esmvalcore/_recipe/recipe_schema.yml +. +""" +from io import StringIO +from pathlib import Path +from typing import Any, Literal, TypedDict + +from pydantic import BaseModel, ConfigDict +from ruamel.yaml import YAML + + +class Dataset(BaseModel): + """ESMValTool dataset section.""" + + dataset: str + project: str | None = None + # TODO add min max + start_year: int | None = None + end_year: int | None = None + ensemble: str | list[str] | None = None + exp: str | list[str] | None = None + mip: str | None = None + realm: str | None = None + shift: str | None = None + tier: Literal[1, 2, 3] | None = None + type: str | None = None + grid: str | None = None + # any more attributes are passed through + # to ESMValTool without validation + model_config = ConfigDict(extra="allow") + + +class Variable(BaseModel): + """ESMValTool variable section.""" + + project: str | None = None + # TODO add min max + start_year: int | None = None + end_year: int | None = None + ensemble: str | list[str] | None = None + timerange: str | None = None # note: not in yamale spec + exp: str | list[str] | None = None + mip: str | None = None + preprocessor: str | None = None + reference_dataset: str | None = None + alternative_dataset: str | None = None + fx_files: list[str] | None = None + additional_datasets: list[Dataset] | None = None + short_name: str | None = None + + +class Script(BaseModel): + """ESMValTool script section.""" + + model_config = ConfigDict(extra="allow") + script: str + + +class Diagnostic(BaseModel): + """ESMValTool diagnostic section.""" + + scripts: dict[str, Script] | None = None + additional_datasets: list[Dataset] | None = None + title: str | None = None + description: str | None = None + themes: list[str] | None = None + realms: list[str] | None = None + variables: dict[str, Variable] | None = None + + +class Documentation(BaseModel): + """ESMValTool documentation section.""" + + title: str + description: str + # TODO add min 1 + authors: list[str] + projects: list[str] | None = None + references: list[str] | None = None + + +class Recipe(BaseModel): + """ESMValTool recipe.""" + + documentation: Documentation + datasets: list[Dataset] | None = None + # value depends on the key which is the name of the preprocessor + # see https://docs.esmvaltool.org/projects/ESMValCore/en/v2.9.0/recipe/preprocessor.html + preprocessors: dict[str, dict[str, Any]] | None = None + diagnostics: dict[str, Diagnostic] | None = None + + @classmethod + def load(cls, path: Path) -> "Recipe": + """Load recipe from path.""" + with path.open(encoding="utf-8") as f: + return cls.from_yaml(f.read()) + + @classmethod + def from_yaml(cls, recipe_string: str) -> "Recipe": + """Load recipe from YAML string.""" + yaml = YAML(typ="rt") + raw_recipe = yaml.load(recipe_string) + return cls(**raw_recipe) + + def to_yaml(self) -> str: + """Return recipe as YAML string.""" + # use rt to preserve order of preprocessor keys + yaml = YAML(typ="rt") + stream = StringIO() + yaml.dump(self.model_dump(exclude_none=True), stream) + return stream.getvalue() + + def save(self, path: Path) -> None: + """Save recipe to path. + + Args: + path: Path to save recipe to. + """ + with path.open("w") as f: + f.write(self.to_yaml()) + + +class ClimateStatistics(BaseModel): + """Arguments for the :py:func:`~esmvalcore.preprocessor.climate_statistics` preprocessor.""" + + operator: Literal["mean", "std", "min", "max", "median", "sum"] = "mean" + period: Literal["hour", "day", "month", "year"] = "day" + + +ExtractRegion = dict[ + Literal["start_longitude", "end_longitude", "start_latitude", "end_latitude"], float +] +"""Arguments for the :py:func:`~esmvalcore.preprocessor.extract_region` preprocessor.""" + +TargetGrid = TypedDict( + "TargetGrid", + { + "start_longitude": float, + "end_longitude": float, + "start_latitude": float, + "end_latitude": float, + "step_longitude": float, + "step_latitude": float, + }, +) +"""Type for target_grid argument for the :py:func:`~esmvalcore.preprocessor.regrid` preprocessor.""" diff --git a/src/ewatercycle/forcing.py b/src/ewatercycle/forcing.py index 8ab79e87..c6feba80 100644 --- a/src/ewatercycle/forcing.py +++ b/src/ewatercycle/forcing.py @@ -5,7 +5,11 @@ from importlib_metadata import EntryPoint -from ewatercycle.base.forcing import DefaultForcing +from ewatercycle.base.forcing import ( + DefaultForcing, + GenericDistributedForcing, + GenericLumpedForcing, +) class ForcingSources(Mapping): @@ -18,14 +22,14 @@ def __init__(self, *args, **kw): self._raw_dict = dict(*args, **kw) def __getitem__(self, key) -> Type[DefaultForcing]: - """Gets the entry point, loads it, and returns the Forcing object.""" + """Get the entry point, loads it, and returns the Forcing object.""" if isinstance(self._raw_dict[key], EntryPoint): return self._raw_dict[key].load() else: return self._raw_dict[key] def __getattr__(self, attr): - """Accesses the keys like attributes. E.g. sources.HypeForcing.""" + """Access the keys like attributes. E.g. sources.HypeForcing.""" if attr in self._raw_dict.keys(): return self.__getitem__(attr) else: @@ -42,10 +46,15 @@ def __repr__(self): _forcings: dict[str, Any] = { - entry_point.name: entry_point - for entry_point in entry_points(group="ewatercycle.forcings") # /NOSONAR + "GenericDistributedForcing": GenericDistributedForcing, + "GenericLumpedForcing": GenericLumpedForcing, } -_forcings["DefaultForcing"] = DefaultForcing +_forcings.update( + { + entry_point.name: entry_point + for entry_point in entry_points(group="ewatercycle.forcings") # /NOSONAR + } +) sources = ForcingSources(_forcings) """Dictionary filled with available forcing sources. @@ -78,6 +87,8 @@ def __repr__(self): >>> forcing = sources.DefaultForcing.load("path/to/forcing/directory") To get your own forcing source to be listed here it needs to be -registered in the :py:data:`ewatercycle.forcings` `entry point group `_. +registered in the `ewatercycle.forcings` +`entry point group `_ +. """ diff --git a/src/ewatercycle/plugins/hype/forcing.py b/src/ewatercycle/plugins/hype/forcing.py index 6d0f64ac..4eca1eac 100644 --- a/src/ewatercycle/plugins/hype/forcing.py +++ b/src/ewatercycle/plugins/hype/forcing.py @@ -1,13 +1,14 @@ -"""Forcing related functionality for hype""" +"""Forcing related functionality for hype.""" -from typing import Literal, Optional +from datetime import datetime +from pathlib import Path import pandas as pd import xarray as xr -from esmvalcore.experimental import get_recipe -from ewatercycle.base.forcing import DATASETS, DefaultForcing, run_esmvaltool_recipe -from ewatercycle.util import get_time, to_absolute_path +from ewatercycle.base.forcing import DefaultForcing +from ewatercycle.esmvaltool.builder import RecipeBuilder +from ewatercycle.esmvaltool.schema import Dataset class HypeForcing(DefaultForcing): @@ -28,68 +29,26 @@ class HypeForcing(DefaultForcing): Tobs (optional): Input file for temperature data. Defaults to 'Tobs.txt' """ - # type ignored because pydantic wants literal in base class while mypy does not - model: Literal["hype"] = "hype" # type: ignore Pobs: str = "Pobs.txt" TMAXobs: str = "TMAXobs.txt" TMINobs: str = "TMINobs.txt" Tobs: str = "Tobs.txt" @classmethod - def generate( # type: ignore + def _build_recipe( cls, - dataset: str, - start_time: str, - end_time: str, - shape: str, - directory: Optional[str] = None, - ) -> "HypeForcing": - # load the ESMValTool recipe - recipe_name = "hydrology/recipe_hype.yml" - recipe = get_recipe(recipe_name) - - # model-specific updates to the recipe - preproc_names = ("preprocessor", "temperature", "water") - - for preproc_name in preproc_names: - recipe.data["preprocessors"][preproc_name]["extract_shape"][ - "shapefile" - ] = str(to_absolute_path(shape)) - - recipe.data["datasets"] = [DATASETS[dataset]] - - variables = recipe.data["diagnostics"]["hype"]["variables"] - var_names = "tas", "tasmin", "tasmax", "pr" - - startyear = get_time(start_time).year - for var_name in var_names: - variables[var_name]["start_year"] = startyear - - endyear = get_time(end_time).year - for var_name in var_names: - variables[var_name]["end_year"] = endyear - - # generate forcing data and retreive useful information - recipe_output = run_esmvaltool_recipe(recipe, directory) - - # retrieve forcing files - recipe_files = list(recipe_output.values())[0].files - forcing_files = {f.path.stem: f.path for f in recipe_files} - directory = str(forcing_files["Pobs"].parent) - - # instantiate forcing object based on generated data - generated_forcing = HypeForcing( - directory=directory, - start_time=start_time, - end_time=end_time, + start_time: datetime, + end_time: datetime, + shape: Path, + dataset: Dataset | str | dict, + **model_specific_options + ): + return build_hype_recipe( + start_year=start_time.year, + end_year=end_time.year, shape=shape, - Pobs=forcing_files["Pobs"].name, - TMAXobs=forcing_files["TMAXobs"].name, - TMINobs=forcing_files["TMINobs"].name, - Tobs=forcing_files["Tobs"].name, + dataset=dataset, ) - generated_forcing.save() - return generated_forcing def to_xarray(self) -> xr.Dataset: """Load forcing files into a xarray Dataset. @@ -99,8 +58,8 @@ def to_xarray(self) -> xr.Dataset: """ assert self.directory is not None, "Forcing directory is not set" - # TODO add lats/lons to dataset - # maybe infer from centers of subbasins in shapefile in ewatercycle_forcing.yaml? + # TODO add lats/lons to dataset maybe infer + # from centers of subbasins in shapefile in ewatercycle_forcing.yaml? ds = xr.Dataset() ds["Pobs"] = pd.read_csv( self.directory / self.Pobs, sep=" ", index_col="DATE", parse_dates=True @@ -124,3 +83,41 @@ def to_xarray(self) -> xr.Dataset: "history": "Created by ewatercycle.plugins.hype.forcing.HypeForcing.to_xarray()", } return ds + + +def build_hype_recipe( + start_year: int, + end_year: int, + shape: Path, + dataset: Dataset | str | dict, +): + """Build an ESMValTool recipe for Hype forcing data. + + Args: + start_year: The start year of the recipe. + end_year: The end year of the recipe. + shape: The shape of the recipe. + dataset: Dataset to get forcing data from. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. + + Returns: + The built recipe. + """ + return ( + RecipeBuilder() + .title("Hype forcing data") + .dataset(dataset) + .start(start_year) + .end(end_year) + .shape(shape, decomposed=True) + .lump() + .add_variable("tas", units="degC") + .add_variable("tasmin", units="degC") + .add_variable("tasmax", units="degC") + .add_variable("pr", units="kg m-2 d-1") + .script("hydrology/hype.py") + .build() + ) diff --git a/src/ewatercycle/plugins/lisflood/forcing.py b/src/ewatercycle/plugins/lisflood/forcing.py index 207aa804..97b924d2 100644 --- a/src/ewatercycle/plugins/lisflood/forcing.py +++ b/src/ewatercycle/plugins/lisflood/forcing.py @@ -1,20 +1,14 @@ -"""Forcing related functionality for lisflood""" +"""Forcing related functionality for lisflood.""" import logging from pathlib import Path -from typing import Literal, Optional +from typing import Optional, cast -from esmvalcore.experimental import get_recipe - -from ewatercycle.base.forcing import ( - DATASETS, - DefaultForcing, - _session, - run_esmvaltool_recipe, -) +from ewatercycle.base.forcing import DefaultForcing +from ewatercycle.esmvaltool.builder import RecipeBuilder +from ewatercycle.esmvaltool.schema import Dataset, Recipe, TargetGrid from ewatercycle.plugins.lisflood.lisvap import create_lisvap_config, lisvap from ewatercycle.util import ( - data_files_from_recipe_output, fit_extents_to_grid, get_extents, get_time, @@ -46,24 +40,26 @@ class LisfloodForcing(DefaultForcing): PrefixET0: Path to a NetCDF or pcraster file with potential (reference) evapotranspiration rate data - .. code-block:: python + Example: - from ewatercycle.forcing import sources + To load forcing data from a directory: - forcing = sources.LisfloodForcing( - directory='/data/lisflood-forcings-case1', - start_time='1989-01-02T00:00:00Z', - end_time='1999-01-02T00:00:00Z', - PrefixPrecipitation='tp.nc', - PrefixTavg='ta.nc', - PrefixE0='e.nc', - PrefixES0='es.nc', - PrefixET0='et.nc' - ) + .. code-block:: python + + from ewatercycle.forcing import sources + + forcing = sources.LisfloodForcing( + directory='/data/lisflood-forcings-case1', + start_time='1989-01-02T00:00:00Z', + end_time='1999-01-02T00:00:00Z', + PrefixPrecipitation='tp.nc', + PrefixTavg='ta.nc', + PrefixE0='e.nc', + PrefixES0='es.nc', + PrefixET0='et.nc' + ) """ - # type ignored because pydantic wants literal in base class while mypy does not - model: Literal["lisflood"] = "lisflood" # type: ignore PrefixPrecipitation: str = "pr.nc" PrefixTavg: str = "tas.nc" PrefixE0: str = "e0.nc" @@ -74,7 +70,7 @@ class LisfloodForcing(DefaultForcing): @classmethod def generate( # type: ignore cls, - dataset: str, + dataset: Dataset | str | dict, start_time: str, end_time: str, shape: str, @@ -88,7 +84,11 @@ def generate( # type: ignore `ESMValTool `_. Args: - dataset: Name of the source dataset. See :py:const:`~ewatercycle.base.forcing.DATASETS`. + dataset: Dataset to get forcing data from. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. start_time: Start time of forcing in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. end_time: nd time of forcing in UTC and ISO format string e.g. @@ -105,87 +105,41 @@ def generate( # type: ignore centers. - ``start_latitude``: latitude at the center of the first grid cell. - ``end_latitude``: longitude at the center of the last grid cell. - - ``step_latitude``: constant latitude distance between grid cell centers. + - ``step_latitude``: constant latitude distance between grid cell \ + centers. - Make sure the target grid matches up with the grid in the mask_map and files in parameterset_dir. + Make sure the target grid matches up with the grid in the mask_map + and files in parameterset_dir. Also the `shape` should be within the target grid. - If not given will guestimate target grid from `shape` using a 0.1x0.1 grid with 0.05 offset. - run_lisvap: Lisvap specification. Default is None. If lisvap should be run then + If not given will guestimate target grid from `shape` + using a 0.1x0.1 grid with 0.05 offset. + run_lisvap: Lisvap specification. Default is None. + If lisvap should be run then give a dictionary with following key/value pairs: - lisvap_config: Name of Lisvap configuration file. - mask_map: A mask for the spatial selection. - This file should have same extent and resolution as parameter-set. + This file should have same extent and resolution + as parameter-set. - parameterset_dir: Directory of the parameter set. - Directory should contains the Lisvap config file and files the config points to. + Directory should contains the Lisvap config file + and files the config points to. """ - # load the ESMValTool recipe - recipe_name = "hydrology/recipe_lisflood.yml" - recipe = get_recipe(recipe_name) - - # model-specific updates to the recipe - preproc_names = ( - "general", - "daily_water", - "daily_temperature", - "daily_radiation", - "daily_windspeed", + # Cannot call super as we want recipe_output not forcing object + start_year = get_time(start_time).year + end_year = get_time(end_time).year + recipe = build_lisflood_recipe( + start_year=start_year, + end_year=end_year, + shape=Path(shape), + dataset=dataset, + target_grid=target_grid, ) - - basin = to_absolute_path(shape).stem - for preproc_name in preproc_names: - recipe.data["preprocessors"][preproc_name]["extract_shape"][ - "shapefile" - ] = shape - recipe.data["diagnostics"]["diagnostic_daily"]["scripts"]["script"][ - "catchment" - ] = basin - - if target_grid is None: - logger.warning("target_grid was not given, guestimating from shape") - step = 0.1 - target_grid = fit_extents_to_grid(get_extents(shape), step=step) - target_grid.update( - { - "step_longitude": step, - "step_latitude": step, - } - ) - for preproc_name in preproc_names: - preproc = recipe.data["preprocessors"][preproc_name] - # Remove stuff from old version of ESMValTool recipe, as regrid preproccesor takes care of region extraction. - if "extract_region" in preproc: - del preproc["extract_region"] - del preproc["custom_order"] - if "lon_offset" in preproc["regrid"]: - del preproc["regrid"]["lon_offset"] - if "lat_offset" in preproc["regrid"]: - del preproc["regrid"]["lat_offset"] - preproc["regrid"]["target_grid"] = target_grid - - recipe.data["datasets"] = [DATASETS[dataset]] - - variables = recipe.data["diagnostics"]["diagnostic_daily"]["variables"] - var_names = "pr", "tas", "tasmax", "tasmin", "tdps", "uas", "vas", "rsds" - - startyear = get_time(start_time).year - for var_name in var_names: - variables[var_name]["start_year"] = startyear - - endyear = get_time(end_time).year - for var_name in var_names: - variables[var_name]["end_year"] = endyear - - # set crop to false to keep the entire globe (time consuming) - # because lisflood parameter set is global i.e. - # recipe.data["preprocessors"]["general"]["extract_shape"]["crop"] = False - # However, lisflood diagnostics line 144 gives error - # ValueError: The 'longitude' DimCoord points array must be strictly monotonic. - - # generate forcing data and retrieve useful information - recipe_output = run_esmvaltool_recipe(recipe, directory) - directory, forcing_files = data_files_from_recipe_output(recipe_output) + forcing_files = cls._run_recipe( + recipe, directory=Path(directory) if directory else None + ) + directory = forcing_files["directory"] if run_lisvap: # Get lisvap specific options and make paths absolute @@ -206,15 +160,22 @@ def generate( # type: ignore f"{reindexed_forcing_directory}/{forcing_files[var_name]}", ) # Add lisvap file names + basin = Path(shape).stem for var_name in {"e0", "es0", "et0"}: forcing_files[ var_name - ] = f"lisflood_{dataset}_{basin}_{var_name}_{startyear}_{endyear}.nc" + ] = f"lisflood_{dataset}_{basin}_{var_name}_{start_year}_{end_year}.nc" + if isinstance(dataset, Dataset): + lisvap_dataset = dataset.dataset + elif isinstance(dataset, dict): + lisvap_dataset = dataset["dataset"] + else: + lisvap_dataset = dataset config_file = create_lisvap_config( parameterset_dir, str(reindexed_forcing_directory), - dataset, + lisvap_dataset, lisvap_config, mask_map, start_time, @@ -230,7 +191,7 @@ def generate( # type: ignore # TODO add a logger message about the results of lisvap using # exit_code, stdout, stderr # Instantiate forcing object based on generated data - generated_forcing = LisfloodForcing( + generated_forcing = cls( directory=str(reindexed_forcing_directory), start_time=start_time, end_time=end_time, @@ -249,8 +210,8 @@ def generate( # type: ignore ) logger.warning("%s", message) # instantiate forcing object based on generated data - generated_forcing = LisfloodForcing( - directory=directory, + generated_forcing = cls( + directory=Path(directory), start_time=start_time, end_time=end_time, shape=shape, @@ -259,3 +220,58 @@ def generate( # type: ignore ) generated_forcing.save() return generated_forcing + + +def build_lisflood_recipe( + start_year: int, + end_year: int, + shape: Path, + dataset: Dataset | str | dict, + target_grid: Optional[dict] = None, +) -> Recipe: + """Build an ESMValTool recipe for lisflood forcing. + + Args: + start_year: Start year of forcing. + end_year: End year of forcing. + shape: Path to a shape file. Used for spatial selection. + dataset: Dataset to get forcing data from. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. + target_grid: the ``target_grid`` should be a ``dict`` with the + following keys: + start_longitude, end_longitude, start_latitude, end_latitude + """ + if target_grid is None: + logger.warning("target_grid was not given, guestimating from shape") + step = 0.1 + target_grid = fit_extents_to_grid(get_extents(shape), step=step) + target_grid.update( + { + "step_longitude": step, + "step_latitude": step, + } + ) + + return ( + RecipeBuilder() + .title("Lisflood forcing recipe") + .description("Lisflood forcing recipe") + .dataset(dataset) + .start(start_year) + .end(end_year) + .regrid(target_grid=cast(TargetGrid, target_grid), scheme="linear") + .shape(shape, crop=True) + .add_variable("pr", units="kg m-2 d-1") + .add_variable("tas", units="degC") + # Rest of variables are inputs for lisvap + .add_variable("tasmin", units="degC") + .add_variable("tasmax", units="degC") + .add_variable("tdps", units="degC", mip="Eday") + .add_variables(["uas", "vas"]) + .add_variable("rsds", units="J m-2 day-1") + .script("hydrology/lisflood.py", {"catchment": shape.stem}) + .build() + ) diff --git a/src/ewatercycle/plugins/marrmot/forcing.py b/src/ewatercycle/plugins/marrmot/forcing.py index c3afcde5..44e08125 100644 --- a/src/ewatercycle/plugins/marrmot/forcing.py +++ b/src/ewatercycle/plugins/marrmot/forcing.py @@ -1,20 +1,20 @@ -"""Forcing related functionality for marrmot.""" +"""Forcing related functionality for MARRMoT.""" from datetime import datetime from pathlib import Path -from typing import Literal, Optional +from typing import Optional import pandas as pd import xarray as xr -from esmvalcore.experimental import get_recipe from scipy.io import loadmat -from ewatercycle.base.forcing import DATASETS, DefaultForcing, run_esmvaltool_recipe -from ewatercycle.util import get_time, to_absolute_path +from ewatercycle.base.forcing import DefaultForcing +from ewatercycle.esmvaltool.builder import RecipeBuilder +from ewatercycle.esmvaltool.schema import Dataset, Recipe class MarrmotForcing(DefaultForcing): - """Container for marrmot forcing data. + """Container for MARRMoT forcing data. Args: directory: Directory where forcing data files are stored. @@ -23,115 +23,86 @@ class MarrmotForcing(DefaultForcing): end_time: End time of forcing in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. shape: Path to a shape file. Used for spatial selection. - forcing_file: Matlab file that contains forcings for Marrmot + forcing_file: Matlab file that contains forcings for MARRMoT models. See format forcing file in `model implementation `_. - .. code-block:: python + Examples: - from ewatercycle.forcing import sources + From existing forcing data: - forcing = sources.MarrmotForcing( - 'marmot', - directory='/data/marrmot-forcings-case1', - start_time='1989-01-02T00:00:00Z', - end_time='1999-01-02T00:00:00Z', - forcing_file='marrmot-1989-1999.mat' - ) - """ + .. code-block:: python - # type ignored because pydantic wants literal in base class while mypy does not - model: Literal["marrmot"] = "marrmot" # type: ignore - forcing_file: Optional[str] = "marrmot.mat" + from ewatercycle.forcing import sources - @classmethod - def generate( # type: ignore - cls, - dataset: str, - start_time: str, - end_time: str, - shape: str, - directory: Optional[str] = None, - ) -> "MarrmotForcing": - # load the ESMValTool recipe - recipe_name = "hydrology/recipe_marrmot.yml" - recipe = get_recipe(recipe_name) - - # model-specific updates to the recipe - basin = to_absolute_path(shape).stem - recipe.data["preprocessors"]["daily"]["extract_shape"]["shapefile"] = shape - recipe.data["diagnostics"]["diagnostic_daily"]["scripts"]["script"][ - "basin" - ] = basin - - recipe.data["diagnostics"]["diagnostic_daily"]["additional_datasets"] = [ - DATASETS[dataset] - ] - - variables = recipe.data["diagnostics"]["diagnostic_daily"]["variables"] - var_names = "tas", "pr", "psl", "rsds", "rsdt" - - startyear = get_time(start_time).year - for var_name in var_names: - variables[var_name]["start_year"] = startyear - - endyear = get_time(end_time).year - for var_name in var_names: - variables[var_name]["end_year"] = endyear - - # generate forcing data and retrieve useful information - recipe_output = run_esmvaltool_recipe(recipe, directory) - task_output = recipe_output["diagnostic_daily/script"] - - # check that recipe output contains only one .mat file - matlab_files = [] - for datafile in task_output.files: - if datafile.path.suffix == ".mat": - matlab_files.append(datafile) - - if len(matlab_files) == 0: - raise FileNotFoundError( - "No .mat files found in output directory: " + str(directory) + forcing = sources.MarrmotForcing( + directory='/data/marrmot-forcings-case1', + start_time='1989-01-02T00:00:00Z', + end_time='1999-01-02T00:00:00Z', + forcing_file='marrmot-1989-1999.mat' ) - if len(matlab_files) > 1: - raise FileNotFoundError( - "More than one .mat files found in output directory: " + str(directory) + + Generate from ERA5 forcing dataset and Rhine. + + .. code-block:: python + + from ewatercycle.forcing import sources + from ewatercycle.testing.fixtures import rhine_shape + + shape = rhine_shape() + forcing = sources.MarrmotForcing.generate( + dataset='ERA5', + start_time='2000-01-01T00:00:00Z', + end_time='2001-01-01T00:00:00Z', + shape=shape, ) + """ - # everything ok so retreive paths - forcing_file: Path = matlab_files[0].path - directory = str(forcing_file.parent) + forcing_file: Optional[str] = "marrmot.mat" - # instantiate forcing object based on generated data - generated_forcing = MarrmotForcing( - directory=directory, - start_time=start_time, - end_time=end_time, + @classmethod + def _build_recipe( + cls, + start_time: datetime, + end_time: datetime, + shape: Path, + dataset: Dataset | str | dict, + **model_specific_options, + ): + return build_marrmot_recipe( + start_year=start_time.year, + end_year=end_time.year, shape=shape, - forcing_file=forcing_file.name, + dataset=dataset, ) - generated_forcing.save() - return generated_forcing + + @classmethod + def _recipe_output_to_forcing_arguments(cls, recipe_output, model_specific_options): + # key in recipe_output is concat of dataset, shape start year and end year + # for example 'marrmot_ERA5_Rhine_2000_2001.mat' + # instead of constructing key just use first and only value of dict + first_forcing_file = next(iter(recipe_output.values())) + return {"forcing_file": first_forcing_file} def to_xarray(self) -> xr.Dataset: - """Load forcing data from a matlab file into an xarray dataset. + """Load forcing data from a Matlab file into an xarray dataset. Returns: Dataset with forcing data. - - Example: - - >>> fn = forcing.directory / forcing.forcing_file - >>> ds = load_forcing_file(fn) - >>> ds - """ - dataset = loadmat(self.forcing_file, mat_dtype=True) - precip = dataset["forcing"]["precip"][0][0][0] - temp = dataset["forcing"]["temp"][0][0][0] - pet = dataset["forcing"]["pet"][0][0][0] - forcing_start = datetime(*map(int, dataset["time_start"][0][:3])) # type: ignore - forcing_end = datetime(*map(int, dataset["time_end"][0][:3])) # type: ignore + if self.directory is None or self.forcing_file is None: + raise ValueError("Directory or forcing_file is not set") + fn = self.directory / self.forcing_file + dataset = loadmat(fn, mat_dtype=True) + # Generated forcing with ewatercycle has shape (1, ) + # Mat files from elsewhere can have shape (, 1) + precip = dataset["forcing"]["precip"][0][0].flatten() + temp = dataset["forcing"]["temp"][0][0].flatten() + pet = dataset["forcing"]["pet"][0][0].flatten() + time_start = dataset["time_start"][0][:3] + forcing_start = datetime(*map(int, time_start)) # type: ignore + time_end = dataset["time_end"][0][:3] + forcing_end = datetime(*map(int, time_end)) # type: ignore # store data as a pandas Series (deliberately keep default time: 00:00) index = pd.date_range(forcing_start, forcing_end, name="time") lat, lon = dataset["data_origin"][0] @@ -164,3 +135,38 @@ def to_xarray(self) -> xr.Dataset: "history": "Created by ewatercycle.plugins.marrmot.forcing.MarrmotForcing.to_xarray()", }, ) + + +def build_marrmot_recipe( + start_year: int, + end_year: int, + shape: Path, + dataset: Dataset | str | dict, +) -> Recipe: + """Build an ESMValTool recipe for generating forcing for MARRMoT. + + Args: + start_year: Start year of forcing. + end_year: End year of forcing. + shape: Path to a shape file. Used for spatial selection. + dataset: Dataset to get forcing data from. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. + """ + return ( + RecipeBuilder() + .title("Generate forcing for the MARRMoT hydrological model") + .description("Generate forcing for the MARRMoT hydrological model") + .dataset(dataset) + .start(start_year) + .end(end_year) + .shape(shape) + # TODO do lumping in recipe preprocessor instead of in diagnostic script + # .lump() + .add_variables(("tas", "pr", "psl", "rsds")) + .add_variable("rsdt", mip="CFday") + .script("hydrology/marrmot.py", {"basin": shape.stem}) + .build() + ) diff --git a/src/ewatercycle/plugins/pcrglobwb/forcing.py b/src/ewatercycle/plugins/pcrglobwb/forcing.py index 7a05f5ea..cf5e5484 100644 --- a/src/ewatercycle/plugins/pcrglobwb/forcing.py +++ b/src/ewatercycle/plugins/pcrglobwb/forcing.py @@ -1,25 +1,17 @@ -"""Forcing related functionality for pcrglobwb""" +"""Forcing related functionality for PCR-GLOBWB.""" -from typing import Literal, Optional +from datetime import datetime +from pathlib import Path +from typing import Optional -from esmvalcore.experimental import get_recipe - -from ewatercycle.base.forcing import ( - DATASETS, - DefaultForcing, - _session, - run_esmvaltool_recipe, -) -from ewatercycle.util import ( - data_files_from_recipe_output, - get_extents, - get_time, - to_absolute_path, -) +from ewatercycle.base.forcing import DefaultForcing +from ewatercycle.esmvaltool.builder import RecipeBuilder +from ewatercycle.esmvaltool.schema import ClimateStatistics, Dataset, ExtractRegion +from ewatercycle.util import get_time class PCRGlobWBForcing(DefaultForcing): - """Container for pcrglobwb forcing data. + """Container for PCR-GLOBWB forcing data. Args: directory: Directory where forcing data files are stored. @@ -32,17 +24,53 @@ class PCRGlobWBForcing(DefaultForcing): 'precipitation.nc'. temperatureNC (optional): Input file for temperature data. Defaults to 'temperature.nc' + + Example: + + To generate forcing from ERA5 for the Rhine catchment for 2000-2001: + + .. code-block:: python + + from pathlib import Path + + from rich import print + + from ewatercycle.plugins.pcrglobwb.forcing import PCRGlobWBForcing + + shape = Path("./src/ewatercycle/testing/data/Rhine/Rhine.shp") + + forcing = PCRGlobWBForcing.generate( + dataset='ERA5', + start_time='2000-01-01T00:00:00Z', + end_time='2001-01-01T00:00:00Z', + shape=shape.absolute(), + start_time_climatology='2000-01-01T00:00:00Z', + end_time_climatology='2001-01-01T00:00:00Z', + ) + print(forcing) + + Gives something like: + + .. code-block:: python + + PCRGlobWBForcing( + model='pcrglobwb', + start_time='2000-01-01T00:00:00Z', + end_time='2001-01-01T00:00:00Z', + directory=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/esmvaltool_output/ewcrephogjj0pt_20230816_095928/work/diagnostic/script'), + shape=PosixPath('/home/verhoes/git/eWaterCycle/ewatercycle/src/ewatercycle/testing/data/Rhine/Rhine.shp'), + precipitationNC='pcrglobwb_OBS6_ERA5_reanaly_*_day_pr_2000-2001_Rhine.nc', + temperatureNC='pcrglobwb_OBS6_ERA5_reanaly_*_day_tas_2000-2001_Rhine.nc' + ) """ - # type ignored because pydantic wants literal in base class while mypy does not - model: Literal["pcrglobwb"] = "pcrglobwb" # type: ignore precipitationNC: Optional[str] = "precipitation.nc" temperatureNC: Optional[str] = "temperature.nc" @classmethod def generate( # type: ignore cls, - dataset: str, + dataset: str | Dataset | dict, start_time: str, end_time: str, shape: str, @@ -57,7 +85,11 @@ def generate( # type: ignore `ESMValTool `_. Args: - dataset: Name of the source dataset. See :py:const:`~ewatercycle.base.forcing.DATASETS`. + dataset: Dataset to get forcing data from. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. start_time: Start time of forcing in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. end_time: nd time of forcing in UTC and ISO format string e.g. @@ -71,70 +103,112 @@ def generate( # type: ignore contain `start_longitude`, `end_longitude`, `start_latitude`, `end_latitude` """ - # load the ESMValTool recipe - recipe_name = "hydrology/recipe_pcrglobwb.yml" - recipe = get_recipe(recipe_name) - - # model-specific updates to the recipe - preproc_names = ( - "crop_basin", - "preproc_pr", - "preproc_tas", - "preproc_pr_clim", - "preproc_tas_clim", + # method is replicated here to document the model specific options + return super(PCRGlobWBForcing, cls).generate( + dataset=dataset, + start_time=start_time, + end_time=end_time, + shape=shape, + start_time_climatology=start_time_climatology, + end_time_climatology=end_time_climatology, + extract_region=extract_region, + directory=directory, ) - if dataset is not None: - recipe.data["diagnostics"]["diagnostic_daily"]["additional_datasets"] = [ - DATASETS[dataset] - ] - - basin = to_absolute_path(shape).stem - recipe.data["diagnostics"]["diagnostic_daily"]["scripts"]["script"][ - "basin" - ] = basin - - if extract_region is None: - extract_region = get_extents(shape) - for preproc_name in preproc_names: - recipe.data["preprocessors"][preproc_name][ - "extract_region" - ] = extract_region - - variables = recipe.data["diagnostics"]["diagnostic_daily"]["variables"] - var_names = "tas", "pr" - - startyear = get_time(start_time).year - for var_name in var_names: - variables[var_name]["start_year"] = startyear - - endyear = get_time(end_time).year - for var_name in var_names: - variables[var_name]["end_year"] = endyear - - var_names_climatology = "pr_climatology", "tas_climatology" - - startyear_climatology = get_time(start_time_climatology).year - for var_name in var_names_climatology: - variables[var_name]["start_year"] = startyear_climatology - - endyear_climatology = get_time(end_time_climatology).year - for var_name in var_names_climatology: - variables[var_name]["end_year"] = endyear_climatology + @classmethod + def _build_recipe( + cls, + start_time: datetime, + end_time: datetime, + shape: Path, + dataset: Dataset | str | dict = "ERA5", + **model_specific_options, + ): + start_time_climatology = model_specific_options["start_time_climatology"] + end_time_climatology = model_specific_options["end_time_climatology"] + extract_region = model_specific_options["extract_region"] + return build_pcrglobwb_recipe( + start_year=start_time.year, + end_year=end_time.year, + shape=shape, + dataset=dataset, + start_year_climatology=get_time(start_time_climatology).year, + end_year_climatology=get_time(end_time_climatology).year, + extract_region=extract_region, + ) - # generate forcing data and retrieve useful information - recipe_output = run_esmvaltool_recipe(recipe, directory) - # TODO dont open recipe output, but use standard name from ESMValTool - directory, forcing_files = data_files_from_recipe_output(recipe_output) + @classmethod + def _recipe_output_to_forcing_arguments(cls, recipe_output, model_specific_options): + # TODO dont rename recipe output, but use standard name from ESMValTool + return { + "precipitationNC": recipe_output["pr"], + "temperatureNC": recipe_output["tas"], + } + + +def build_pcrglobwb_recipe( + start_year: int, + end_year: int, + shape: Path, + start_year_climatology: int, + end_year_climatology: int, + dataset: Dataset | str | dict, + extract_region: ExtractRegion | None = None, +): + """Build an ESMValTool recipe for PCR-GLOBWB forcing. - # instantiate forcing object based on generated data - generated_forcing = PCRGlobWBForcing( - directory=directory, - start_time=start_time, - end_time=end_time, - shape=shape, - precipitationNC=forcing_files["pr"], - temperatureNC=forcing_files["tas"], + Args: + start_year: The start year of the recipe. + end_year: The end year of the recipe. + shape: The shape of the region to extract. + start_year_climatology: The start year of the climatology. + end_year_climatology: The end year of the climatology. + dataset: Dataset to use for the recipe. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. + extract_region: The region to extract. + When not given uses extents of shape. + + Returns: + The recipe for PCR-GLOBWB forcing. + """ + partial = ( + RecipeBuilder() + .title("PCR-GLOBWB forcing recipe") + .description("PCR-GLOBWB forcing recipe") + .dataset(dataset) + .start(start_year) + .end(end_year) + ) + if extract_region is None: + partial = partial.region_by_shape(shape) + else: + partial = partial.region( + start_longitude=extract_region["start_longitude"], + end_longitude=extract_region["end_longitude"], + start_latitude=extract_region["start_latitude"], + end_latitude=extract_region["end_latitude"], + ) + return ( + partial.add_variable("pr", units="kg m-2 d-1") + .add_variable("tas") + .add_variable( + "pr_climatology", + units="kg m-2 d-1", + stats=ClimateStatistics(operator="mean", period="day"), + short_name="pr", + start_year=start_year_climatology, + end_year=end_year_climatology, + ) + .add_variable( + "tas_climatology", + stats=ClimateStatistics(operator="mean", period="day"), + short_name="tas", + start_year=start_year_climatology, + end_year=end_year_climatology, ) - generated_forcing.save() - return generated_forcing + .script("hydrology/pcrglobwb.py", {"basin": shape.stem}) + .build() + ) diff --git a/src/ewatercycle/plugins/wflow/forcing.py b/src/ewatercycle/plugins/wflow/forcing.py index 60bb9714..9536a07d 100644 --- a/src/ewatercycle/plugins/wflow/forcing.py +++ b/src/ewatercycle/plugins/wflow/forcing.py @@ -1,18 +1,11 @@ """Forcing related functionality for wflow.""" +from datetime import datetime from pathlib import Path -from tempfile import NamedTemporaryFile -from typing import Dict, Literal, Optional +from typing import Dict, Optional -from esmvalcore.experimental import get_recipe -from ruamel.yaml import YAML - -from ewatercycle.base.forcing import ( - DATASETS, - DefaultForcing, - _session, - run_esmvaltool_recipe, -) -from ewatercycle.util import get_extents, get_time, to_absolute_path +from ewatercycle.base.forcing import DefaultForcing +from ewatercycle.esmvaltool.builder import RecipeBuilder +from ewatercycle.esmvaltool.schema import Dataset class WflowForcing(DefaultForcing): @@ -35,8 +28,6 @@ class WflowForcing(DefaultForcing): Inflow (str) = None: Variable name of inflow data in input file. """ - # type ignored because pydantic wants literal in base class while mypy does not - model: Literal["wflow"] = "wflow" # type: ignore netcdfinput: str = "inmaps.nc" Precipitation: str = "/pr" # noqa: N803 EvapoTranspiration: str = "/pet" @@ -46,7 +37,7 @@ class WflowForcing(DefaultForcing): @classmethod def generate( # type: ignore cls, - dataset: str, + dataset: str | Dataset | dict, start_time: str, end_time: str, shape: str, @@ -60,7 +51,11 @@ def generate( # type: ignore `ESMValTool `_. Args: - dataset: Name of the source dataset. See :py:const:`~ewatercycle.base.forcing.DATASETS`. + dataset: Dataset to get forcing data from. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. start_time: Start time of forcing in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. end_time: nd time of forcing in UTC and ISO format string e.g. @@ -68,56 +63,100 @@ def generate( # type: ignore shape: Path to a shape file. Used for spatial selection. directory: Directory in which forcing should be written. If not given will create timestamped directory. - dem_file: Name of the dem_file to use. Also defines the basin - param. + dem_file: Name of the dem_file to use. extract_region: Region specification, dictionary must contain `start_longitude`, `end_longitude`, `start_latitude`, `end_latitude` """ - # load the ESMValTool recipe - recipe_name = "hydrology/recipe_wflow.yml" - recipe = get_recipe(recipe_name) - - basin = to_absolute_path(shape).stem - recipe.data["diagnostics"]["wflow_daily"]["scripts"]["script"]["basin"] = basin - - # model-specific updates - script = recipe.data["diagnostics"]["wflow_daily"]["scripts"]["script"] - script["dem_file"] = dem_file - - if extract_region is None: - extract_region = get_extents(shape, pad=3) - recipe.data["preprocessors"]["rough_cutout"]["extract_region"] = extract_region - - recipe.data["diagnostics"]["wflow_daily"]["additional_datasets"] = [ - DATASETS[dataset] - ] - - variables = recipe.data["diagnostics"]["wflow_daily"]["variables"] - var_names = "tas", "pr", "psl", "rsds", "rsdt" - - startyear = get_time(start_time).year - for var_name in var_names: - variables[var_name]["start_year"] = startyear - - endyear = get_time(end_time).year - for var_name in var_names: - variables[var_name]["end_year"] = endyear - - # generate forcing data and retrieve useful information - recipe_output = run_esmvaltool_recipe(recipe, directory) - forcing_data = recipe_output["wflow_daily/script"].data_files[0] - - forcing_file = forcing_data.path - directory = str(forcing_file.parent) - - # instantiate forcing object based on generated data - generated_forcing = WflowForcing( - directory=directory, + return super(WflowForcing, cls).generate( + dataset=dataset, start_time=start_time, end_time=end_time, shape=shape, - netcdfinput=forcing_file.name, + dem_file=dem_file, + directory=directory, + extract_region=extract_region, + ) + + @classmethod + def _build_recipe( + cls, + start_time: datetime, + end_time: datetime, + shape: Path, + dataset: Dataset | str | dict, + **model_specific_options + ): + extract_region = model_specific_options["extract_region"] + return build_wflow_recipe( + start_year=start_time.year, + end_year=end_time.year, + shape=shape, + dataset=dataset, + dem_file=model_specific_options["dem_file"], + extract_region=extract_region, + ) + + @classmethod + def _recipe_output_to_forcing_arguments(cls, recipe_output, model_specific_options): + first_file = next(iter(recipe_output.values())) + return { + "netcdfinput": first_file, + } + + +def build_wflow_recipe( + start_year: int, + end_year: int, + shape: Path, + dataset: Dataset | str | dict, + dem_file: str, + extract_region: Optional[Dict[str, float]] = None, +): + """Build an ESMValTool recipe for the WFlow hydrological model. + + Args: + start_year: Start year of forcing. + end_year: End year of forcing. + shape: Path to a shape file. Used for spatial selection. + dataset: Dataset to get forcing data from. + When string is given a predefined dataset is looked up in + :py:const:`ewatercycle.esmvaltool.datasets.DATASETS`. + When dict given it is passed to + :py:class:`ewatercycle.esmvaltool.models.Dataset` constructor. + dem_file: Name of the dem_file to use. + extract_region: Region specification, dictionary must + contain `start_longitude`, `end_longitude`, `start_latitude`, + `end_latitude` + """ + partial = ( + RecipeBuilder() + .title("Generate forcing for the WFlow hydrological model") + .dataset(dataset) + .start(start_year) + .end(end_year) + ) + if extract_region is None: + magic_pad = 3 # TODO why 3? + partial = partial.region_by_shape(shape, pad=magic_pad) + else: + partial = partial.region( + start_longitude=extract_region["start_longitude"], + end_longitude=extract_region["end_longitude"], + start_latitude=extract_region["start_latitude"], + end_latitude=extract_region["end_latitude"], + ) + return ( + partial.add_variables(["tas", "pr", "psl", "rsds"]) + .add_variable("orog", mip="fx", start_year=False, end_year=False) + .add_variable("rsdt", mip="CFday") + .script( + "hydrology/wflow.py", + { + "basin": shape.stem, + "dem_file": dem_file, + "regrid": "area_weighted", + }, ) - generated_forcing.save() - return generated_forcing + .build() + ) diff --git a/src/ewatercycle/testing/fixtures.py b/src/ewatercycle/testing/fixtures.py index fe27879a..2c3063c8 100644 --- a/src/ewatercycle/testing/fixtures.py +++ b/src/ewatercycle/testing/fixtures.py @@ -1,3 +1,7 @@ +"""Pytest fixtures for ewatercycle. + +To use fixtures importe them in `conftest.py` file. +""" from pathlib import Path import pytest @@ -6,9 +10,13 @@ from ewatercycle.config import Configuration +def rhine_shape() -> Path: + return Path(__file__).parent / "data" / "Rhine" / "Rhine.shp" + + @pytest.fixture def sample_shape(): - return str(Path(__file__).parent / "data" / "Rhine" / "Rhine.shp") + return str(rhine_shape()) @pytest.fixture diff --git a/src/ewatercycle/testing/helpers.py b/src/ewatercycle/testing/helpers.py new file mode 100644 index 00000000..2fcdad26 --- /dev/null +++ b/src/ewatercycle/testing/helpers.py @@ -0,0 +1,51 @@ +from io import StringIO +from pathlib import Path + +import numpy as np +import pandas as pd +import xarray as xr +from ruamel.yaml import YAML + + +def reyamlify(value: str) -> str: + """Convert value to yaml object and dump it again. + + recipy.to_yaml() can generate a slightly different yaml string + than the expected string. + Call this method on expected string to get consistent results. + + Args: + value: yaml string + + Returns: + yaml string + """ + yaml = YAML(typ="rt") + stream = StringIO() + yaml.dump(yaml.load(value), stream=stream) + return stream.getvalue() + + +def create_netcdf(var_name: str, filename: Path): + """Create a netcdf file with random data. + + Args: + var_name: Variable name + filename: Path to file + + Returns: + Path to file + """ + var = 15 + 8 * np.random.randn(2, 2, 3) + lon = [[-99.83, -99.32], [-99.79, -99.23]] + lat = [[42.25, 42.21], [42.63, 42.59]] + ds = xr.Dataset( + {var_name: (["longitude", "latitude", "time"], var)}, + coords={ + "lon": (["longitude", "latitude"], lon), + "lat": (["longitude", "latitude"], lat), + "time": pd.date_range("2014-09-06", periods=3), + }, + ) + ds.to_netcdf(filename) + return filename diff --git a/src/ewatercycle/util.py b/src/ewatercycle/util.py index c53a91ca..2456fd41 100644 --- a/src/ewatercycle/util.py +++ b/src/ewatercycle/util.py @@ -7,7 +7,6 @@ import numpy as np import xarray as xr from dateutil.parser import parse -from esmvalcore.experimental.recipe_output import RecipeOutput from shapely import geometry @@ -138,32 +137,6 @@ def fit_extents_to_grid(extents, step=0.1, offset=0.05, ndigits=2) -> Dict[str, } -def data_files_from_recipe_output( - recipe_output: RecipeOutput, -) -> Tuple[str, Dict[str, str]]: - """Get data files from a ESMVaLTool recipe output - - Expects first diagnostic task to produce files with single var each. - - Args: - recipe_output: ESMVaLTool recipe output - - Returns: - Tuple with directory of files and a - dict where key is cmor short name and value is relative path to NetCDF file - """ - data_files = list(recipe_output.values())[0].data_files - forcing_files = {} - for data_file in data_files: - dataset = data_file.load_xarray() - var_name = list(dataset.data_vars.keys())[0] - dataset.close() - forcing_files[var_name] = data_file.path.name - # TODO simplify (recipe_output.location) when next esmvalcore release is made - directory = str(data_files[0].path.parent) - return directory, forcing_files - - def to_absolute_path( input_path: Union[str, Path], parent: Optional[Path] = None, diff --git a/tests/plugins/hype/test_forcing.py b/tests/plugins/hype/test_forcing.py index 28c50520..36292773 100644 --- a/tests/plugins/hype/test_forcing.py +++ b/tests/plugins/hype/test_forcing.py @@ -5,26 +5,16 @@ import pytest import xarray as xr from esmvalcore.experimental import Recipe -from esmvalcore.experimental.recipe_output import OutputFile +from esmvalcore.experimental.recipe_info import RecipeInfo +from esmvalcore.experimental.recipe_output import RecipeOutput from ewatercycle.base.forcing import FORCING_YAML -from ewatercycle.forcing import sources +from ewatercycle.plugins.hype.forcing import HypeForcing, build_hype_recipe +from ewatercycle.testing.helpers import reyamlify -HypeForcing = sources["HypeForcing"] - -def test_plot(): - f = HypeForcing( - directory=".", - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - ) - with pytest.raises(NotImplementedError): - f.plot() - - -def create_txt(dir: Path, var_name: str) -> OutputFile: - fn = dir / f"{var_name}.txt" +def create_txt(path: Path, var_name: str) -> Path: + fn = path / f"{var_name}.txt" # Some dummy data shaped as the model expects it lines = [ "DATE 300730 300822", @@ -32,29 +22,31 @@ def create_txt(dir: Path, var_name: str) -> OutputFile: "1990-01-02 -0.308 -0.868", ] fn.write_text("\n".join(lines)) - return OutputFile(fn) + return fn @pytest.fixture def mock_recipe_run(monkeypatch, tmp_path): - """Overload the `run` method on esmvalcore Recipe's.""" + """Mock the `run` method on esmvalcore Recipe's.""" recorder = {} - class MockTaskOutput: - fake_forcing_path = str(tmp_path / "marrmot.mat") - files = ( - create_txt(tmp_path, "Tobs"), - create_txt(tmp_path, "TMINobs"), - create_txt(tmp_path, "TMAXobs"), - create_txt(tmp_path, "Pobs"), - ) + dummy_recipe_output = RecipeOutput( + { + "diagnostic/script": { + create_txt(tmp_path, "Tobs"): {}, + create_txt(tmp_path, "TMINobs"): {}, + create_txt(tmp_path, "TMAXobs"): {}, + create_txt(tmp_path, "Pobs"): {}, + } + }, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) def mock_run(self, session=None): - """Store recipe for inspection and return dummy output.""" + """Record run arguments for inspection and return dummy output.""" nonlocal recorder - recorder["data_during_run"] = self.data recorder["session"] = session - return {"diagnostic_daily/script": MockTaskOutput()} + return dummy_recipe_output monkeypatch.setattr(Recipe, "run", mock_run) return recorder @@ -72,100 +64,6 @@ def forcing(self, mock_recipe_run, sample_shape): shape=sample_shape, ) - @pytest.fixture - def expected_recipe(self): - return { - "datasets": [ - { - "dataset": "ERA5", - "project": "OBS6", - "tier": 3, - "type": "reanaly", - "version": 1, - } - ], - "diagnostics": { - "hype": { - "description": "HYPE input preprocessor for daily " "data", - "scripts": {"script": {"script": "hydrology/hype.py"}}, - "variables": { - "pr": { - "end_year": 1999, - "mip": "day", - "preprocessor": "water", - "start_year": 1989, - }, - "tas": { - "end_year": 1999, - "mip": "day", - "preprocessor": "temperature", - "start_year": 1989, - }, - "tasmax": { - "end_year": 1999, - "mip": "day", - "preprocessor": "temperature", - "start_year": 1989, - }, - "tasmin": { - "end_year": 1999, - "mip": "day", - "preprocessor": "temperature", - "start_year": 1989, - }, - }, - } - }, - "documentation": { - "authors": ["pelupessy_inti", "kalverla_peter"], - "maintainer": ["unmaintained"], - "projects": ["ewatercycle"], - "references": ["acknow_project"], - "title": "Generate forcing for the Hype hydrological model", - }, - "preprocessors": { - "preprocessor": { - "area_statistics": {"operator": "mean"}, - "extract_shape": { - "decomposed": True, - "method": "contains", - }, - }, - "temperature": { - "area_statistics": {"operator": "mean"}, - "convert_units": {"units": "degC"}, - "extract_shape": { - "decomposed": True, - "method": "contains", - }, - }, - "water": { - "area_statistics": {"operator": "mean"}, - "convert_units": {"units": "kg m-2 d-1"}, - "extract_shape": { - "decomposed": True, - "method": "contains", - }, - }, - }, - } - - def test_recipe_configured( - self, forcing, mock_recipe_run, expected_recipe, sample_shape - ): - actual = mock_recipe_run["data_during_run"] - # Remove absolute path so assert is easier - ps = actual["preprocessors"] - actual_shapefile = ps["preprocessor"]["extract_shape"]["shapefile"] - del ps["preprocessor"]["extract_shape"]["shapefile"] - # Remove long description and absolute path so assert is easier - actual_desc = actual["documentation"]["description"] - del actual["documentation"]["description"] - - assert actual == expected_recipe - assert str(actual_shapefile) == sample_shape - assert "Hype" in actual_desc - def test_result(self, forcing, tmp_path, sample_shape): expected = HypeForcing( directory=str(tmp_path), @@ -184,7 +82,6 @@ def test_saved_yaml_content(self, forcing, tmp_path): # shape should is not included in the yaml file expected = dedent( """\ - model: hype start_time: '1989-01-02T00:00:00Z' end_time: '1999-01-02T00:00:00Z' Pobs: Pobs.txt @@ -257,25 +154,102 @@ def test_with_directory(mock_recipe_run, sample_shape, tmp_path): assert mock_recipe_run["session"].session_dir == forcing_dir -def test_load_legacy_forcing(tmp_path): - (tmp_path / FORCING_YAML).write_text( - """\ - !HypeForcing - start_time: '1989-01-02T00:00:00Z' - end_time: '1999-01-02T00:00:00Z' - Pobs: Pobs.txt - TMAXobs: TMAXobs.txt - TMINobs: TMINobs.txt - Tobs: Tobs.txt - """ +def test_build_hype_recipe(sample_shape: str): + recipe = build_hype_recipe( + dataset="ERA5", + start_year=1990, + end_year=2001, + shape=Path(sample_shape), ) - - expected = HypeForcing( - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - directory=tmp_path, + recipe_as_string = recipe.to_yaml() + + # Should look similar to + # https://github.com/ESMValGroup/ESMValTool/blob/main/esmvaltool/recipes/hydrology/recipe_hype.yml + expected = dedent( + f"""\ +documentation: + title: Hype forcing data + description: '' + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: true + area_statistics: + operator: mean + tas: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: true + area_statistics: + operator: mean + convert_units: + units: degC + tasmin: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: true + area_statistics: + operator: mean + convert_units: + units: degC + tasmax: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: true + area_statistics: + operator: mean + convert_units: + units: degC + pr: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: true + area_statistics: + operator: mean + convert_units: + units: kg m-2 d-1 +diagnostics: + diagnostic: + scripts: + script: + script: hydrology/hype.py + variables: + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + tasmin: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmin + tasmax: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmax + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + """ ) - - result = HypeForcing.load(tmp_path) - - assert result == expected + assert recipe_as_string == reyamlify(expected) diff --git a/tests/plugins/lisflood/test_forcing.py b/tests/plugins/lisflood/test_forcing.py index d52094b4..c9534809 100644 --- a/tests/plugins/lisflood/test_forcing.py +++ b/tests/plugins/lisflood/test_forcing.py @@ -1,25 +1,16 @@ +from pathlib import Path from textwrap import dedent from unittest.mock import patch import pytest import xarray as xr from esmvalcore.experimental import Recipe -from esmvalcore.experimental.recipe_output import DataFile +from esmvalcore.experimental.recipe_info import RecipeInfo +from esmvalcore.experimental.recipe_output import DataFile, RecipeOutput from ewatercycle.base.forcing import FORCING_YAML -from ewatercycle.forcing import sources - -LisfloodForcing = sources["LisfloodForcing"] - - -def test_plot(): - f = LisfloodForcing( - directory=".", - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - ) - with pytest.raises(NotImplementedError): - f.plot() +from ewatercycle.plugins.lisflood.forcing import LisfloodForcing, build_lisflood_recipe +from ewatercycle.testing.helpers import reyamlify def create_netcdf(var_name, filename): @@ -34,7 +25,7 @@ def create_netcdf(var_name, filename): name=var_name, ) ds.to_netcdf(filename) - return DataFile(filename) + return filename @pytest.fixture @@ -42,24 +33,27 @@ def mock_recipe_run(monkeypatch, tmp_path): """Overload the `run` method on esmvalcore Recipe's.""" data = {} - # TODO add lisvap input files once implemented, see issue #96 - class MockTaskOutput: - data_files = ( - create_netcdf("pr", tmp_path / "lisflood_pr.nc"), - create_netcdf("tas", tmp_path / "lisflood_tas.nc"), - create_netcdf("tasmax", tmp_path / "lisflood_tasmax.nc"), - create_netcdf("tasmin", tmp_path / "lisflood_tasmin.nc"), - create_netcdf("sfcWind", tmp_path / "lisflood_sfcWind.nc"), - create_netcdf("rsds", tmp_path / "lisflood_rsds.nc"), - create_netcdf("e", tmp_path / "lisflood_e.nc"), - ) + dummy_recipe_output = RecipeOutput( + { + "diagnostic/script": { + # TODO add lisvap input files once implemented, see issue #96 + create_netcdf("pr", tmp_path / "lisflood_pr.nc"): {}, + create_netcdf("tas", tmp_path / "lisflood_tas.nc"): {}, + create_netcdf("tasmax", tmp_path / "lisflood_tasmax.nc"): {}, + create_netcdf("tasmin", tmp_path / "lisflood_tasmin.nc"): {}, + create_netcdf("sfcWind", tmp_path / "lisflood_sfcWind.nc"): {}, + create_netcdf("rsds", tmp_path / "lisflood_rsds.nc"): {}, + create_netcdf("e", tmp_path / "lisflood_e.nc"): {}, + } + }, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) def mock_run(self, session=None): """Store recipe for inspection and return dummy output.""" nonlocal data - data["data_during_run"] = self.data data["session"] = session - return {"diagnostic_daily/script": MockTaskOutput()} + return dummy_recipe_output monkeypatch.setattr(Recipe, "run", mock_run) return data @@ -89,168 +83,6 @@ def forcing(self, mock_recipe_run, sample_shape, sample_target_grid): target_grid=sample_target_grid, ) - @pytest.fixture - def expected_recipe(self): - return { - "datasets": [ - { - "dataset": "ERA5", - "project": "OBS6", - "tier": 3, - "type": "reanaly", - "version": 1, - } - ], - "diagnostics": { - "diagnostic_daily": { - "description": "LISFLOOD input " - "preprocessor for " - "ERA-Interim and ERA5 " - "data", - "scripts": { - "script": { - "catchment": "Rhine", - "script": "hydrology/lisflood.py", - } - }, - "variables": { - "pr": { - "end_year": 1999, - "mip": "day", - "preprocessor": "daily_water", - "start_year": 1989, - }, - "rsds": { - "end_year": 1999, - "mip": "day", - "preprocessor": "daily_radiation", - "start_year": 1989, - }, - "tas": { - "end_year": 1999, - "mip": "day", - "preprocessor": "daily_temperature", - "start_year": 1989, - }, - "tasmax": { - "end_year": 1999, - "mip": "day", - "preprocessor": "daily_temperature", - "start_year": 1989, - }, - "tasmin": { - "end_year": 1999, - "mip": "day", - "preprocessor": "daily_temperature", - "start_year": 1989, - }, - "tdps": { - "end_year": 1999, - "mip": "Eday", - "preprocessor": "daily_temperature", - "start_year": 1989, - }, - "uas": { - "end_year": 1999, - "mip": "day", - "preprocessor": "daily_windspeed", - "start_year": 1989, - }, - "vas": { - "end_year": 1999, - "mip": "day", - "preprocessor": "daily_windspeed", - "start_year": 1989, - }, - }, - } - }, - "documentation": { - "authors": ["verhoeven_stefan", "kalverla_peter", "andela_bouwe"], - "maintainer": ["unmaintained"], - "projects": ["ewatercycle"], - "references": ["acknow_project"], - "description": "Recipe pre-process files for use in the " - "LISFLOOD hydrological model.\n", - "title": "Generate forcing for the Lisflood hydrological " "model", - }, - "preprocessors": { - "daily_radiation": { - "convert_units": {"units": "J m-2 " "day-1"}, - "extract_shape": {"crop": True, "method": "contains"}, - "regrid": { - "scheme": "linear", - "target_grid": { - "end_latitude": 52.25, - "end_longitude": 11.95, - "start_latitude": 46.25, - "start_longitude": 4.05, - "step_latitude": 0.1, - "step_longitude": 0.1, - }, - }, - }, - "daily_temperature": { - "convert_units": {"units": "degC"}, - "extract_shape": {"crop": True, "method": "contains"}, - "regrid": { - "scheme": "linear", - "target_grid": { - "end_latitude": 52.25, - "end_longitude": 11.95, - "start_latitude": 46.25, - "start_longitude": 4.05, - "step_latitude": 0.1, - "step_longitude": 0.1, - }, - }, - }, - "daily_water": { - "convert_units": {"units": "kg m-2 d-1"}, - "extract_shape": {"crop": True, "method": "contains"}, - "regrid": { - "scheme": "linear", - "target_grid": { - "end_latitude": 52.25, - "end_longitude": 11.95, - "start_latitude": 46.25, - "start_longitude": 4.05, - "step_latitude": 0.1, - "step_longitude": 0.1, - }, - }, - }, - "daily_windspeed": { - "extract_shape": {"crop": True, "method": "contains"}, - "regrid": { - "scheme": "linear", - "target_grid": { - "end_latitude": 52.25, - "end_longitude": 11.95, - "start_latitude": 46.25, - "start_longitude": 4.05, - "step_latitude": 0.1, - "step_longitude": 0.1, - }, - }, - }, - "general": { - "extract_shape": {"crop": True, "method": "contains"}, - "regrid": { - "scheme": "linear", - "target_grid": { - "end_latitude": 52.25, - "end_longitude": 11.95, - "start_latitude": 46.25, - "start_longitude": 4.05, - "step_latitude": 0.1, - "step_longitude": 0.1, - }, - }, - }, - }, - } - def test_result(self, forcing, tmp_path, sample_shape): expected = LisfloodForcing( directory=str(tmp_path), @@ -262,27 +94,11 @@ def test_result(self, forcing, tmp_path, sample_shape): ) assert forcing == expected - def test_recipe_configured( - self, forcing, mock_recipe_run, expected_recipe, sample_shape - ): - actual = mock_recipe_run["data_during_run"] - # Remove absolute path so assert is easier - actual_shapefile = actual["preprocessors"]["general"]["extract_shape"][ - "shapefile" - ] - # Will also del other occurrences of shapefile due to extract shape object - # being shared between preprocessors - del actual["preprocessors"]["general"]["extract_shape"]["shapefile"] - - assert actual == expected_recipe - assert actual_shapefile == sample_shape - def test_saved_yaml_content(self, forcing, tmp_path): saved_forcing = (tmp_path / FORCING_YAML).read_text() # shape should is not included in the yaml file expected = dedent( """\ - model: lisflood start_time: '1989-01-02T00:00:00Z' end_time: '1999-01-02T00:00:00Z' PrefixPrecipitation: lisflood_pr.nc @@ -383,48 +199,6 @@ def write_mocked_lisvap_output(*_args, **_kwargs): assert forcing == expected -class TestGenerateForcingWithoutTargetGrid: - def test_recipe_configured(self, mock_recipe_run, sample_shape): - LisfloodForcing.generate( - dataset="ERA5", - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - shape=sample_shape, - ) - - actual = mock_recipe_run["data_during_run"] - - # Extent of sample_shape fitted to 0.1x0.1 grid with 0.05 offset - expected_target_grid = { - "end_latitude": 52.25, - "end_longitude": 11.95, - "start_latitude": 46.25, - "start_longitude": 4.05, - "step_latitude": 0.1, - "step_longitude": 0.1, - } - assert ( - actual["preprocessors"]["general"]["regrid"]["target_grid"] - == expected_target_grid - ) - assert ( - actual["preprocessors"]["daily_water"]["regrid"]["target_grid"] - == expected_target_grid - ) - assert ( - actual["preprocessors"]["daily_temperature"]["regrid"]["target_grid"] - == expected_target_grid - ) - assert ( - actual["preprocessors"]["daily_radiation"]["regrid"]["target_grid"] - == expected_target_grid - ) - assert ( - actual["preprocessors"]["daily_windspeed"]["regrid"]["target_grid"] - == expected_target_grid - ) - - def test_generate_with_directory( mock_recipe_run, sample_shape, tmp_path, sample_target_grid ): @@ -464,3 +238,445 @@ def test_load_legacy_forcing(tmp_path): result = LisfloodForcing.load(tmp_path) assert result == expected + + +def test_build_lisflood_recipe_with_targetgrid(sample_shape: str): + recipe = build_lisflood_recipe( + dataset="ERA5", + start_year=1990, + end_year=2001, + shape=Path(sample_shape), + target_grid={ + "start_longitude": 3, + "start_latitude": 46, + "end_longitude": 12, + "end_latitude": 55, + "step_longitude": 0.1, + "step_latitude": 0.1, + }, + ) + recipe_as_string = recipe.to_yaml() + + # Should look similar to + # https://github.com/ESMValGroup/ESMValTool/blob/main/esmvaltool/recipes/hydrology/recipe_lisflood.yml + expected = dedent( + f"""\ +documentation: + title: Lisflood forcing recipe + description: Lisflood forcing recipe + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + pr: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: kg m-2 d-1 + tas: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: degC + tasmin: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: degC + tasmax: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: degC + tdps: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: degC + uas: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + vas: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + rsds: + regrid: + scheme: linear + target_grid: + start_longitude: 3 + start_latitude: 46 + end_longitude: 12 + end_latitude: 55 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: J m-2 day-1 +diagnostics: + diagnostic: + scripts: + script: + script: hydrology/lisflood.py + catchment: Rhine + variables: + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + tasmin: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmin + tasmax: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmax + tdps: + start_year: 1990 + end_year: 2001 + mip: Eday + preprocessor: tdps + uas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: uas + vas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: vas + rsds: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: rsds + """ + ) + assert recipe_as_string == reyamlify(expected) + + +def test_build_lisflood_recipe_without_targetgrid(sample_shape: str): + recipe = build_lisflood_recipe( + dataset="ERA5", + start_year=1990, + end_year=2001, + shape=Path(sample_shape), + ) + recipe_as_string = recipe.to_yaml() + + # Should look similar to + # https://github.com/ESMValGroup/ESMValTool/blob/main/esmvaltool/recipes/hydrology/recipe_lisflood.yml + expected = dedent( + f"""\ +documentation: + title: Lisflood forcing recipe + description: Lisflood forcing recipe + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + pr: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: kg m-2 d-1 + tas: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: degC + tasmin: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: degC + tasmax: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: degC + tdps: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: degC + uas: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + vas: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + rsds: + regrid: + scheme: linear + target_grid: + start_longitude: 4.05 + start_latitude: 46.25 + end_longitude: 11.95 + end_latitude: 52.25 + step_longitude: 0.1 + step_latitude: 0.1 + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + convert_units: + units: J m-2 day-1 +diagnostics: + diagnostic: + scripts: + script: + script: hydrology/lisflood.py + catchment: Rhine + variables: + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + tasmin: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmin + tasmax: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmax + tdps: + start_year: 1990 + end_year: 2001 + mip: Eday + preprocessor: tdps + uas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: uas + vas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: vas + rsds: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: rsds + """ + ) + assert recipe_as_string == reyamlify(expected) diff --git a/tests/plugins/lisflood/test_model.py b/tests/plugins/lisflood/test_model.py index 45ba3c86..bcf271b7 100644 --- a/tests/plugins/lisflood/test_model.py +++ b/tests/plugins/lisflood/test_model.py @@ -15,11 +15,10 @@ from ewatercycle.forcing import sources from ewatercycle.parameter_sets import example_parameter_sets from ewatercycle.plugins.lisflood.config import XmlConfig +from ewatercycle.plugins.lisflood.forcing import LisfloodForcing from ewatercycle.plugins.lisflood.model import Lisflood from ewatercycle.testing.fake_models import FailingModel -LisfloodForcing = sources["LisfloodForcing"] - @pytest.fixture(scope="session") def mocked_config(tmp_path_factory: TempPathFactory): @@ -40,6 +39,12 @@ def find_values_in_xml(tree, name): return set(values) +# TODO the download can take a long time (> 4 minutes) +# as it downloads over 500Mb +# we could make it quicker by creating +# a fake parameter set and forcing, +# but then how do we make sure the fakes are correct? +@pytest.mark.skip("Too slow") class TestLFlatlonUseCase: @pytest.fixture(scope="session") def parameterset(self, mocked_config): @@ -53,7 +58,7 @@ def generate_forcing(self, tmp_path, parameterset: ParameterSet): forcing_dir = tmp_path / "forcing" forcing_dir.mkdir() meteo_dir = Path(parameterset.directory) / "meteo" - # Create the case where forcing data arenot part of parameter_set + # Create the case where forcing data are not part of parameter_set for file in meteo_dir.glob("*.nc"): shutil.copy(file, forcing_dir / f"my{file.stem}.nc") diff --git a/tests/plugins/marrmot/test_forcing.py b/tests/plugins/marrmot/test_forcing.py index 08070e23..72dc97b3 100644 --- a/tests/plugins/marrmot/test_forcing.py +++ b/tests/plugins/marrmot/test_forcing.py @@ -1,25 +1,15 @@ from pathlib import Path from textwrap import dedent +import numpy as np import pytest from esmvalcore.experimental import Recipe -from esmvalcore.experimental.recipe_output import OutputFile +from esmvalcore.experimental.recipe_info import RecipeInfo +from esmvalcore.experimental.recipe_output import RecipeOutput from ewatercycle.base.forcing import FORCING_YAML -from ewatercycle.forcing import sources - -MarrmotForcing = sources["MarrmotForcing"] - - -def test_plot(): - forcing = MarrmotForcing( - directory=".", - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - forcing_file="marrmot.mat", - ) - with pytest.raises(NotImplementedError): - forcing.plot() +from ewatercycle.plugins.marrmot.forcing import MarrmotForcing, build_marrmot_recipe +from ewatercycle.testing.helpers import reyamlify @pytest.fixture @@ -27,16 +17,20 @@ def mock_recipe_run(monkeypatch, tmp_path): """Overload the `run` method on esmvalcore Recipe's.""" recorder = {} - class MockTaskOutput: - fake_forcing_path = str(tmp_path / "marrmot.mat") - files = (OutputFile(fake_forcing_path),) + dummy_recipe_output = RecipeOutput( + { + "diagnostic/script": { + str(tmp_path / "marrmot.mat"): {}, + } + }, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) def mock_run(self, session=None): """Store recipe for inspection and return dummy output.""" nonlocal recorder - recorder["data_during_run"] = self.data recorder["session"] = session - return {"diagnostic_daily/script": MockTaskOutput()} + return dummy_recipe_output monkeypatch.setattr(Recipe, "run", mock_run) return recorder # noqa: R504 @@ -131,28 +125,11 @@ def test_result(self, forcing, tmp_path, sample_shape): ) assert forcing == expected - def test_recipe_configured( - self, forcing, mock_recipe_run, reference_recipe, sample_shape - ): - actual = mock_recipe_run["data_during_run"] - # Remove long description and absolute path so assert is easier - actual_desc = actual["documentation"]["description"] - del actual["documentation"]["description"] - actual_shapefile = actual["preprocessors"]["daily"]["extract_shape"][ - "shapefile" - ] - del actual["preprocessors"]["daily"]["extract_shape"]["shapefile"] - - assert actual == reference_recipe - assert actual_shapefile == sample_shape - assert "MARRMoT" in actual_desc - def test_saved_yaml_content(self, forcing, tmp_path): saved_forcing = (tmp_path / FORCING_YAML).read_text() # shape should is not included in the yaml file expected = dedent( """\ - model: marrmot start_time: '1989-01-02T00:00:00Z' end_time: '1999-01-02T00:00:00Z' forcing_file: marrmot.mat @@ -223,40 +200,17 @@ def test_generate_with_directory(mock_recipe_run, sample_shape, tmp_path): def test_generate_no_output_raises(monkeypatch, sample_shape): """Should raise when there is no .mat file in output.""" - class MockTaskOutput: - files = () - - def failing_recipe_run(self, session): - return {"diagnostic_daily/script": MockTaskOutput} - - monkeypatch.setattr(Recipe, "run", failing_recipe_run) - - with pytest.raises(FileNotFoundError): - MarrmotForcing.generate( - dataset="ERA5", - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - shape=sample_shape, - ) - - -def test_generate_wrong_output_raises(monkeypatch, sample_shape, tmp_path): - """Should raise when there are more than one .mat files in output.""" - - class MockTaskOutput: - fake_forcing_path1 = str(tmp_path / "marrmot.mat") - fake_forcing_path2 = str(tmp_path / "marrmot.mat") - files = ( - OutputFile(fake_forcing_path1), - OutputFile(fake_forcing_path2), - ) + dummy_recipe_output = RecipeOutput( + {"diagnostic/script": {}}, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) def failing_recipe_run(self, session): - return {"diagnostic_daily/script": MockTaskOutput} + return dummy_recipe_output monkeypatch.setattr(Recipe, "run", failing_recipe_run) - with pytest.raises(FileNotFoundError): + with pytest.raises(ValueError): MarrmotForcing.generate( dataset="ERA5", start_time="1989-01-02T00:00:00Z", @@ -284,3 +238,118 @@ def test_load_legacy_forcing(tmp_path): result = MarrmotForcing.load(tmp_path) assert result == expected + + +def test_build_marrmot_recipe(sample_shape: str): + recipe = build_marrmot_recipe( + dataset="ERA5", + start_year=1990, + end_year=2001, + shape=Path(sample_shape), + ) + recipe_as_string = recipe.to_yaml() + + expected = dedent( + f"""\ +documentation: + title: Generate forcing for the MARRMoT hydrological model + description: Generate forcing for the MARRMoT hydrological model + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + tas: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + pr: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + psl: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + rsds: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + rsdt: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false +diagnostics: + diagnostic: + scripts: + script: + script: hydrology/marrmot.py + basin: Rhine + variables: + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + psl: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: psl + rsds: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: rsds + rsdt: + start_year: 1990 + end_year: 2001 + mip: CFday + preprocessor: rsdt + """ + ) + + assert recipe_as_string == reyamlify(expected) + + +def test_to_xarray(sample_marrmot_forcing_file: str): + directory = Path(sample_marrmot_forcing_file).parent + forcing_file = Path(sample_marrmot_forcing_file).name + forcing = MarrmotForcing( + start_time="1989-01-01T00:00:00Z", + end_time="1992-12-31T00:00:00Z", + directory=str(directory), + forcing_file=forcing_file, + ) + + ds = forcing.to_xarray() + + assert ds.attrs["title"] == "MARRMoT forcing data" + assert ds.precipitation.shape == (1, 1, 1461) + assert ds.temperature.shape == (1, 1, 1461) + assert ds.evspsblpot.shape == (1, 1, 1461) + assert ds.time.values[0] == np.datetime64("1989-01-01T00:00:00.000000000") + assert ds.time.values[-1] == np.datetime64("1992-12-31T00:00:00.000000000") + assert ds.lon.values == [87.49] + assert ds.lat.values == [35.29] diff --git a/tests/plugins/marrmot/test_model_m01.py b/tests/plugins/marrmot/test_model_m01.py index 46e84945..d0ca9fbf 100644 --- a/tests/plugins/marrmot/test_model_m01.py +++ b/tests/plugins/marrmot/test_model_m01.py @@ -55,7 +55,7 @@ def test_str(self, model, forcing_file): actual = str(model) expected_forcing = "".join( [ - "MarrmotForcing(model='marrmot', start_time='1989-01-01T00:00:00Z', ", + "MarrmotForcing(start_time='1989-01-01T00:00:00Z', ", f"end_time='1992-12-31T00:00:00Z', directory={repr(Path(forcing_file).parent)}, ", "shape=None, forcing_file='BMI_testcase_m01_BuffaloRiver_TN_USA.mat')", ] @@ -198,7 +198,7 @@ def generate_forcing(self, forcing_file): return forcing @pytest.fixture - def model(self, generate_forcing, mocked_config): + def model(self, generate_forcing: MarrmotForcing, mocked_config): m = MarrmotM01(forcing=generate_forcing) yield m # if m._bmi: diff --git a/tests/plugins/pcrglobwb/test_forcing.py b/tests/plugins/pcrglobwb/test_forcing.py index 737c510e..3a17dfca 100644 --- a/tests/plugins/pcrglobwb/test_forcing.py +++ b/tests/plugins/pcrglobwb/test_forcing.py @@ -1,33 +1,18 @@ from pathlib import Path from textwrap import dedent -import numpy as np -import pandas as pd import pytest -import xarray as xr from esmvalcore.experimental import Recipe -from esmvalcore.experimental.recipe_output import DataFile +from esmvalcore.experimental.recipe_info import RecipeInfo +from esmvalcore.experimental.recipe_output import RecipeOutput from ewatercycle.base.forcing import FORCING_YAML -from ewatercycle.forcing import sources - -PCRGlobWBForcing = sources["PCRGlobWBForcing"] - - -def create_netcdf(var_name, filename): - var = 15 + 8 * np.random.randn(2, 2, 3) - lon = [[-99.83, -99.32], [-99.79, -99.23]] - lat = [[42.25, 42.21], [42.63, 42.59]] - ds = xr.Dataset( - {var_name: (["longitude", "latitude", "time"], var)}, - coords={ - "lon": (["longitude", "latitude"], lon), - "lat": (["longitude", "latitude"], lat), - "time": pd.date_range("2014-09-06", periods=3), - }, - ) - ds.to_netcdf(filename) - return DataFile(filename) +from ewatercycle.plugins.pcrglobwb.forcing import ( + PCRGlobWBForcing, + build_pcrglobwb_recipe, +) +from ewatercycle.testing.helpers import create_netcdf, reyamlify +from ewatercycle.util import get_extents @pytest.fixture @@ -35,18 +20,22 @@ def mock_recipe_run(monkeypatch, tmp_path): """Overload the `run` method on esmvalcore Recipe's.""" data = {} - class MockTaskOutput: - data_files = ( - create_netcdf("pr", tmp_path / "pcrglobwb_pr.nc"), - create_netcdf("tas", tmp_path / "pcrglobwb_tas.nc"), - ) + dummy_recipe_output = RecipeOutput( + { + "diagnostic/script": { + create_netcdf("pr", tmp_path / "pcrglobwb_pr.nc"): {}, + create_netcdf("tas", tmp_path / "pcrglobwb_tas.nc"): {}, + } + }, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) def mock_run(self, session=None): """Store recipe for inspection and return dummy output.""" nonlocal data data["data_during_run"] = self.data data["session"] = session - return {"diagnostic_daily/script": MockTaskOutput()} + return dummy_recipe_output monkeypatch.setattr(Recipe, "run", mock_run) return data @@ -85,7 +74,7 @@ def test_str(self, forcing, tmp_path, sample_shape): result = str(forcing) expected = "".join( [ - "model='pcrglobwb' start_time='1989-01-02T00:00:00Z' end_time='1999-01-02T00:00:00Z' ", + "start_time='1989-01-02T00:00:00Z' end_time='1999-01-02T00:00:00Z' ", f"directory={repr(tmp_path)} shape={repr(Path(sample_shape))} ", "precipitationNC='pcrglobwb_pr.nc' temperatureNC='pcrglobwb_tas.nc'", ] @@ -99,7 +88,6 @@ def test_saved_yaml_content(self, forcing, tmp_path): # shape should is not included in the yaml file expected = dedent( """\ - model: pcrglobwb start_time: '1989-01-02T00:00:00Z' end_time: '1999-01-02T00:00:00Z' precipitationNC: pcrglobwb_pr.nc @@ -160,3 +148,214 @@ def test_load_legacy_forcing(tmp_path): result = PCRGlobWBForcing.load(tmp_path) assert result == expected + + +def test_build_pcrglobwb_recipe(sample_shape: str): + recipe = build_pcrglobwb_recipe( + start_year=1990, + end_year=2001, + shape=Path(sample_shape), + start_year_climatology=1980, + end_year_climatology=1990, + dataset="ERA5", + ) + recipe_as_string = recipe.to_yaml() + + # Should look similar to + # https://github.com/ESMValGroup/ESMValTool/blob/main/esmvaltool/recipes/hydrology/recipe_pcrglobwb.yml + expected = dedent( + """\ +documentation: + title: PCR-GLOBWB forcing recipe + description: PCR-GLOBWB forcing recipe + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + extract_region: + start_longitude: 4.1 + end_longitude: 11.9 + start_latitude: 46.3 + end_latitude: 52.2 + pr: + extract_region: + start_longitude: 4.1 + end_longitude: 11.9 + start_latitude: 46.3 + end_latitude: 52.2 + convert_units: + units: kg m-2 d-1 + tas: + extract_region: + start_longitude: 4.1 + end_longitude: 11.9 + start_latitude: 46.3 + end_latitude: 52.2 + pr_climatology: + extract_region: + start_longitude: 4.1 + end_longitude: 11.9 + start_latitude: 46.3 + end_latitude: 52.2 + convert_units: + units: kg m-2 d-1 + climate_statistics: + operator: mean + period: day + tas_climatology: + extract_region: + start_longitude: 4.1 + end_longitude: 11.9 + start_latitude: 46.3 + end_latitude: 52.2 + climate_statistics: + operator: mean + period: day +diagnostics: + diagnostic: + scripts: + script: + script: hydrology/pcrglobwb.py + basin: Rhine + variables: + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + pr_climatology: + start_year: 1980 + end_year: 1990 + mip: day + preprocessor: pr_climatology + short_name: pr + tas_climatology: + start_year: 1980 + end_year: 1990 + mip: day + preprocessor: tas_climatology + short_name: tas + """ + ) + assert recipe_as_string == reyamlify(expected) + + +def test_build_pcrglobwb_recipe_with_region(sample_shape: str): + extents = get_extents(sample_shape, 2) + recipe = build_pcrglobwb_recipe( + start_year=1990, + end_year=2001, + shape=Path(sample_shape), + start_year_climatology=1980, + end_year_climatology=1990, + dataset="ERA5", + extract_region={ + "start_longitude": extents["start_longitude"], + "end_longitude": extents["end_longitude"], + "start_latitude": extents["start_latitude"], + "end_latitude": extents["end_latitude"], + }, + ) + recipe_as_string = recipe.to_yaml() + + expected = dedent( + """\ +documentation: + title: PCR-GLOBWB forcing recipe + description: PCR-GLOBWB forcing recipe + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + extract_region: + start_longitude: 2.1 + end_longitude: 13.9 + start_latitude: 44.3 + end_latitude: 54.2 + pr: + extract_region: + start_longitude: 2.1 + end_longitude: 13.9 + start_latitude: 44.3 + end_latitude: 54.2 + convert_units: + units: kg m-2 d-1 + tas: + extract_region: + start_longitude: 2.1 + end_longitude: 13.9 + start_latitude: 44.3 + end_latitude: 54.2 + pr_climatology: + extract_region: + start_longitude: 2.1 + end_longitude: 13.9 + start_latitude: 44.3 + end_latitude: 54.2 + convert_units: + units: kg m-2 d-1 + climate_statistics: + operator: mean + period: day + tas_climatology: + extract_region: + start_longitude: 2.1 + end_longitude: 13.9 + start_latitude: 44.3 + end_latitude: 54.2 + climate_statistics: + operator: mean + period: day +diagnostics: + diagnostic: + scripts: + script: + script: hydrology/pcrglobwb.py + basin: Rhine + variables: + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + pr_climatology: + start_year: 1980 + end_year: 1990 + mip: day + preprocessor: pr_climatology + short_name: pr + tas_climatology: + start_year: 1980 + end_year: 1990 + mip: day + preprocessor: tas_climatology + short_name: tas + """ + ) + assert recipe_as_string == reyamlify(expected) diff --git a/tests/plugins/wflow/test_forcing.py b/tests/plugins/wflow/test_forcing.py index 41218daf..197657ae 100644 --- a/tests/plugins/wflow/test_forcing.py +++ b/tests/plugins/wflow/test_forcing.py @@ -5,12 +5,12 @@ import pytest from esmvalcore.experimental.recipe import Recipe -from esmvalcore.experimental.recipe_output import DataFile +from esmvalcore.experimental.recipe_info import RecipeInfo +from esmvalcore.experimental.recipe_output import RecipeOutput from ewatercycle.base.forcing import FORCING_YAML -from ewatercycle.forcing import sources - -WflowForcing = sources["WflowForcing"] +from ewatercycle.plugins.wflow.forcing import WflowForcing, build_wflow_recipe +from ewatercycle.testing.helpers import create_netcdf, reyamlify @pytest.fixture @@ -18,16 +18,23 @@ def mock_recipe_run(monkeypatch, tmp_path): """Overload the `run` method on esmvalcore Recipe's.""" data = {} - class MockTaskOutput: - fake_forcing_path = str(tmp_path / "wflow_forcing.nc") - data_files = (DataFile(fake_forcing_path),) + dummy_recipe_output = RecipeOutput( + { + "diagnostic/script": { + # create_netcdf() writes single variable while + # actual implementation writes multiple variables + create_netcdf("pr", tmp_path / "wflow_forcing.nc"): {}, + } + }, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) def mock_run(self, session=None): """Store recipe for inspection and return dummy output.""" nonlocal data data["data_during_run"] = self.data data["session"] = session - return {"wflow_daily/script": MockTaskOutput()} + return dummy_recipe_output monkeypatch.setattr(Recipe, "run", mock_run) return data @@ -144,15 +151,11 @@ def test_result(self, forcing, tmp_path, sample_shape): ) assert forcing == expected - def test_recipe_configured(self, forcing, mock_recipe_run, reference_recipe): - assert mock_recipe_run["data_during_run"] == reference_recipe - def test_saved_yaml_content(self, forcing, tmp_path): saved_forcing = (tmp_path / FORCING_YAML).read_text() # shape should is not included in the yaml file expected = dedent( """\ - model: wflow start_time: '1989-01-02T00:00:00Z' end_time: '1999-01-02T00:00:00Z' netcdfinput: wflow_forcing.nc @@ -175,7 +178,7 @@ def test_str(self, forcing, tmp_path, sample_shape): result = str(forcing) expected = "".join( [ - "model='wflow' start_time='1989-01-02T00:00:00Z' end_time='1999-01-02T00:00:00Z' ", + "start_time='1989-01-02T00:00:00Z' end_time='1999-01-02T00:00:00Z' ", f"directory={repr(tmp_path)} shape={repr(Path(sample_shape))} ", "netcdfinput='wflow_forcing.nc' Precipitation='/pr' ", "EvapoTranspiration='/pet' Temperature='/tas' Inflow=None", @@ -226,3 +229,115 @@ def test_load_legacy_forcing(tmp_path): result = WflowForcing.load(tmp_path) assert result == expected + + +def test_build_wflow_recipe(sample_shape: str): + recipe = build_wflow_recipe( + dataset="ERA5", + start_year=1990, + end_year=2001, + shape=Path(sample_shape), + dem_file="wflow_parameterset/meuse/staticmaps/wflow_dem.map", + ) + recipe_as_string = recipe.to_yaml() + + # Should look similar to + # https://github.com/ESMValGroup/ESMValTool/blob/main/esmvaltool/recipes/hydrology/recipe_wflow.yml + expected = dedent( + f"""\ +documentation: + title: Generate forcing for the WFlow hydrological model + description: '' + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + extract_region: + start_longitude: 1.1 + end_longitude: 14.9 + start_latitude: 43.3 + end_latitude: 55.2 + tas: + extract_region: + start_longitude: 1.1 + end_longitude: 14.9 + start_latitude: 43.3 + end_latitude: 55.2 + pr: + extract_region: + start_longitude: 1.1 + end_longitude: 14.9 + start_latitude: 43.3 + end_latitude: 55.2 + psl: + extract_region: + start_longitude: 1.1 + end_longitude: 14.9 + start_latitude: 43.3 + end_latitude: 55.2 + rsds: + extract_region: + start_longitude: 1.1 + end_longitude: 14.9 + start_latitude: 43.3 + end_latitude: 55.2 + orog: + extract_region: + start_longitude: 1.1 + end_longitude: 14.9 + start_latitude: 43.3 + end_latitude: 55.2 + rsdt: + extract_region: + start_longitude: 1.1 + end_longitude: 14.9 + start_latitude: 43.3 + end_latitude: 55.2 +diagnostics: + diagnostic: + scripts: + script: + script: hydrology/wflow.py + basin: Rhine + dem_file: wflow_parameterset/meuse/staticmaps/wflow_dem.map + regrid: area_weighted + variables: + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + psl: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: psl + rsds: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: rsds + orog: + mip: fx + preprocessor: orog + rsdt: + start_year: 1990 + end_year: 2001 + mip: CFday + preprocessor: rsdt + """ + ) + assert recipe_as_string == reyamlify(expected) diff --git a/tests/src/base/test_forcing.py b/tests/src/base/test_forcing.py new file mode 100644 index 00000000..851c793e --- /dev/null +++ b/tests/src/base/test_forcing.py @@ -0,0 +1,151 @@ +from pathlib import Path +from shutil import copytree + +from ewatercycle.base.forcing import FORCING_YAML, GenericDistributedForcing + +# Use GenericDistributedForcing to test abstract DefaultForcing class + + +class TestGenericDistributedForcingWithExternalShape: + def test_save(self, tmp_path: Path, sample_shape: str): + forcing = GenericDistributedForcing( + directory=tmp_path, + shape=sample_shape, + start_time="2000-01-01T00:00:00Z", + end_time="2001-01-01T00:00:00Z", + pr="OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc", + tas="OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc", + tasmin="OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc", + tasmax="OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc", + ) + forcing.save() + + fn = tmp_path / FORCING_YAML + content = fn.read_text() + + expected = """\ +start_time: '2000-01-01T00:00:00Z' +end_time: '2001-01-01T00:00:00Z' +pr: OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc +tas: OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc +tasmin: OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc +tasmax: OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc +""" + + assert content == expected + + +class TestGenericDistributedForcingWithInternalShape: + def test_save(self, tmp_path: Path, sample_shape: str): + # Copy shape to tmp_path + shape_dir = Path(sample_shape).parent + copytree(shape_dir, tmp_path / shape_dir.name) + shape = tmp_path / shape_dir.name / Path(sample_shape).name + + forcing = GenericDistributedForcing( + directory=tmp_path, + shape=shape, + start_time="2000-01-01T00:00:00Z", + end_time="2001-01-01T00:00:00Z", + pr="OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc", + tas="OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc", + tasmin="OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc", + tasmax="OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc", + ) + forcing.save() + + fn = tmp_path / FORCING_YAML + content = fn.read_text() + + expected = """\ +start_time: '2000-01-01T00:00:00Z' +end_time: '2001-01-01T00:00:00Z' +shape: Rhine/Rhine.shp +pr: OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc +tas: OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc +tasmin: OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc +tasmax: OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc +""" + + assert content == expected + + +class TestGenericDistributedForcingWithoutShape: + def test_save(self, tmp_path: Path): + forcing = GenericDistributedForcing( + directory=tmp_path, + start_time="2000-01-01T00:00:00Z", + end_time="2001-01-01T00:00:00Z", + pr="OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc", + tas="OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc", + tasmin="OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc", + tasmax="OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc", + ) + forcing.save() + + fn = tmp_path / FORCING_YAML + content = fn.read_text() + + expected = """\ +start_time: '2000-01-01T00:00:00Z' +end_time: '2001-01-01T00:00:00Z' +pr: OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc +tas: OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc +tasmin: OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc +tasmax: OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc +""" + + assert content == expected + + def test_load(self, tmp_path: Path): + fn = tmp_path / FORCING_YAML + fn.write_text( + """\ +start_time: '2000-01-01T00:00:00Z' +end_time: '2001-01-01T00:00:00Z' +pr: OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc +tas: OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc +tasmin: OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc +tasmax: OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc + """ + ) + + forcing = GenericDistributedForcing.load(tmp_path) + + expected = GenericDistributedForcing( + directory=tmp_path, + start_time="2000-01-01T00:00:00Z", + end_time="2001-01-01T00:00:00Z", + pr="OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc", + tas="OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc", + tasmin="OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc", + tasmax="OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc", + ) + assert forcing == expected + + def test_load_legacy(self, tmp_path: Path): + fn = tmp_path / FORCING_YAML + fn.write_text( + """\ +!GenericDistributedForcing +start_time: '2000-01-01T00:00:00Z' +end_time: '2001-01-01T00:00:00Z' +pr: OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc +tas: OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc +tasmin: OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc +tasmax: OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc + """ + ) + + forcing = GenericDistributedForcing.load(tmp_path) + + expected = GenericDistributedForcing( + directory=tmp_path, + start_time="2000-01-01T00:00:00Z", + end_time="2001-01-01T00:00:00Z", + pr="OBS6_ERA5_reanaly_*_day_pr_2000-2001.nc", + tas="OBS6_ERA5_reanaly_*_day_tas_2000-2001.nc", + tasmin="OBS6_ERA5_reanaly_*_day_tasmin_2000-2001.nc", + tasmax="OBS6_ERA5_reanaly_*_day_tasmax_2000-2001.nc", + ) + assert forcing == expected diff --git a/tests/src/esmvaltool/__init__.py b/tests/src/esmvaltool/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/src/esmvaltool/test_builder.py b/tests/src/esmvaltool/test_builder.py new file mode 100644 index 00000000..d5ad4fd5 --- /dev/null +++ b/tests/src/esmvaltool/test_builder.py @@ -0,0 +1,251 @@ +from pathlib import Path +from textwrap import dedent + +from ewatercycle.esmvaltool.builder import ( + DEFAULT_DIAGNOSTIC_SCRIPT, + RecipeBuilder, + build_generic_distributed_forcing_recipe, + build_generic_lumped_forcing_recipe, +) +from ewatercycle.esmvaltool.schema import Dataset +from ewatercycle.testing.helpers import reyamlify + + +def test_build_esmvaltool_recipe(): + era5 = Dataset(dataset="ERA5", project="OBS6", tier=3, type="reanaly", version=1) + recipe = ( + RecipeBuilder() + .description( + "Recipe to generate forcing for a generic distributed hydrogeological model" + ) + .title("Generic distributed hydrogeological model forcing") + .dataset(era5) + .start(2020) + .end(2021) + .region( + start_longitude=40, + end_longitude=65, + start_latitude=25, + end_latitude=40, + ) + .add_variable("tas", units="degC") + .build() + ) + recipe_as_string = recipe.to_yaml() + + expected = dedent( + f"""\ +documentation: + title: Generic distributed hydrogeological model forcing + description: Recipe to generate forcing for a generic distributed hydrogeological + model + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + extract_region: + start_longitude: 40 + end_longitude: 65 + start_latitude: 25 + end_latitude: 40 + tas: + extract_region: + start_longitude: 40 + end_longitude: 65 + start_latitude: 25 + end_latitude: 40 + convert_units: + units: degC +diagnostics: + diagnostic: + scripts: + script: + script: + {DEFAULT_DIAGNOSTIC_SCRIPT} + variables: + tas: + start_year: 2020 + end_year: 2021 + mip: day + preprocessor: tas + """ + ) + assert recipe_as_string == reyamlify(expected) + + +def test_build_generic_distributed_forcing_recipe(): + recipe = build_generic_distributed_forcing_recipe( + start_year=1990, + end_year=2001, + shape=Path("myshape.shp"), + ) + recipe_as_string = recipe.to_yaml() + + expected = dedent( + f"""\ +documentation: + title: Generic distributed forcing recipe + description: Generic distributed forcing recipe + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + extract_shape: + shapefile: myshape.shp + crop: true + decomposed: false + pr: + extract_shape: + shapefile: myshape.shp + crop: true + decomposed: false + tas: + extract_shape: + shapefile: myshape.shp + crop: true + decomposed: false + tasmin: + extract_shape: + shapefile: myshape.shp + crop: true + decomposed: false + tasmax: + extract_shape: + shapefile: myshape.shp + crop: true + decomposed: false +diagnostics: + diagnostic: + scripts: + script: + script: {DEFAULT_DIAGNOSTIC_SCRIPT} + variables: + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + tasmin: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmin + tasmax: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmax + """ + ) + assert recipe_as_string == reyamlify(expected) + + +def test_build_generic_lumped_forcing_recipe(sample_shape: str): + recipe = build_generic_lumped_forcing_recipe( + start_year=1990, + end_year=2001, + shape=Path(sample_shape), + ) + recipe_as_string = recipe.to_yaml() + + expected = dedent( + f"""\ +documentation: + title: Generic lumped forcing recipe + description: Generic lumped forcing recipe + authors: + - unmaintained + projects: + - ewatercycle +datasets: +- dataset: ERA5 + project: OBS6 + tier: 3 + type: reanaly + version: 1 +preprocessors: + spatial: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + area_statistics: + operator: mean + pr: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + area_statistics: + operator: mean + tas: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + area_statistics: + operator: mean + tasmin: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + area_statistics: + operator: mean + tasmax: + extract_shape: + shapefile: {sample_shape} + crop: true + decomposed: false + area_statistics: + operator: mean +diagnostics: + diagnostic: + scripts: + script: + script: {DEFAULT_DIAGNOSTIC_SCRIPT} + variables: + pr: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: pr + tas: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tas + tasmin: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmin + tasmax: + start_year: 1990 + end_year: 2001 + mip: day + preprocessor: tasmax + """ + ) + assert recipe_as_string == reyamlify(expected) diff --git a/tests/src/esmvaltool/test_run.py b/tests/src/esmvaltool/test_run.py new file mode 100644 index 00000000..f84be981 --- /dev/null +++ b/tests/src/esmvaltool/test_run.py @@ -0,0 +1,53 @@ +from pathlib import Path + +import pytest +import xarray as xr +from esmvalcore.experimental.recipe_output import RecipeInfo, RecipeOutput + +from ewatercycle.esmvaltool.run import _parse_recipe_output + + +def test_parse_recipe_output_with_nc_files(tmp_path: Path): + pr_ds = xr.Dataset({"pr": (["x"], [42])}, coords={"x": [1]}) + pr_fn = tmp_path / "pr.nc" + pr_ds.to_netcdf(pr_fn) + recipe_output = RecipeOutput( + {"diagnostic/script": {pr_fn: {}}}, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) + expected = { + "directory": str(tmp_path), + "pr": "pr.nc", + } + + forcing_files = _parse_recipe_output(recipe_output) + assert forcing_files == expected + + +def test_parse_recipe_output_with_txt_files(tmp_path: Path): + pr_fn = tmp_path / "pr.txt" + recipe_output = RecipeOutput( + {"diagnostic/script": {str(pr_fn): {}}}, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) + expected = { + "directory": str(tmp_path), + "pr": "pr.txt", + } + forcing_files = _parse_recipe_output(recipe_output) + assert forcing_files == expected + + +def test_parse_recipe_output_with_no_files(): + recipe_output = RecipeOutput( + {"diagnostic/script": {}}, + info=RecipeInfo({"diagnostics": {"diagnostic": {}}}, "script"), + ) + with pytest.raises(ValueError): + _parse_recipe_output(recipe_output) + + +def test_parse_recipe_output_with_no_diagnostic(): + recipe_output = RecipeOutput({}, info=RecipeInfo({"diagnostics": {}}, "script")) + with pytest.raises(IndexError): + _parse_recipe_output(recipe_output) diff --git a/tests/src/esmvaltool/test_schema.py b/tests/src/esmvaltool/test_schema.py new file mode 100644 index 00000000..da58f1f3 --- /dev/null +++ b/tests/src/esmvaltool/test_schema.py @@ -0,0 +1,65 @@ +from pathlib import Path + +from ewatercycle.esmvaltool.schema import Recipe +from ewatercycle.testing.helpers import reyamlify + + +def test_recipe_load(tmp_path: Path): + """Test Recipe.load.""" + recipe_path = tmp_path / "recipe.yml" + recipe_path.write_text( + """\ +documentation: + title: Test recipe + description: Test recipe + authors: + - Test author + projects: + - Test project + references: + - Test reference +""" + ) + + recipe = Recipe.load(recipe_path) + + expected = Recipe( + documentation={ + "title": "Test recipe", + "description": "Test recipe", + "authors": ["Test author"], + "projects": ["Test project"], + "references": ["Test reference"], + }, + ) + assert recipe == expected + + +def test_recipe_save(tmp_path: Path): + """Test Recipe.save.""" + recipe = Recipe( + documentation={ + "title": "Test recipe", + "description": "Test recipe", + "authors": ["Test author"], + "projects": ["Test project"], + "references": ["Test reference"], + }, + ) + recipe_path = tmp_path / "recipe.yml" + recipe.save(recipe_path) + + expected = """\ +documentation: + title: Test recipe + description: Test recipe + authors: + - Test author + projects: + - Test project + references: + - Test reference +""" + content = recipe_path.read_text() + + assert content == reyamlify(expected) diff --git a/tests/src/forcing/test_default.py b/tests/src/forcing/test_default.py deleted file mode 100644 index 67bdddff..00000000 --- a/tests/src/forcing/test_default.py +++ /dev/null @@ -1,174 +0,0 @@ -import logging - -import pytest - -from ewatercycle.base.forcing import FORCING_YAML, DefaultForcing -from ewatercycle.forcing import sources - - -def test_generate_unknown_model(sample_shape): - with pytest.raises(NotImplementedError): - sources.DefaultForcing.generate( - target_model="unknown", - dataset="ERA5", - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - shape=sample_shape, - ) - - -@pytest.fixture -def sample_forcing_yaml_legacy_content(): - return "".join( - [ - "!DefaultForcing\n", - "start_time: '1989-01-02T00:00:00Z'\n", - "end_time: '1999-01-02T00:00:00Z'\n", - "shape:\n", - ] - ) - - -@pytest.fixture -def sample_forcing_yaml_content(): - return "".join( - [ - "model: default\n", - "start_time: '1989-01-02T00:00:00Z'\n", - "end_time: '1999-01-02T00:00:00Z'\n", - ] - ) - - -@pytest.fixture -def sample_forcing_yaml_legacy_content_with_shape(): - return "".join( - [ - "!DefaultForcing\n", - "start_time: '1989-01-02T00:00:00Z'\n", - "end_time: '1999-01-02T00:00:00Z'\n", - "shape: myshape.shp\n", - ] - ) - - -@pytest.fixture -def sample_forcing_yaml_content_with_shape(): - return "".join( - [ - "model: default\n", - "start_time: '1989-01-02T00:00:00Z'\n", - "end_time: '1999-01-02T00:00:00Z'\n", - "shape: myshape.shp\n", - ] - ) - - -def test_save_with_shapefile_outside_forcing_dir( - sample_shape, tmp_path, sample_forcing_yaml_content, caplog -): - forcing = DefaultForcing( - directory=str(tmp_path), - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - shape=sample_shape, - ) - with caplog.at_level(logging.INFO): - forcing.save() - - file = tmp_path / FORCING_YAML - written = file.read_text() - expected = sample_forcing_yaml_content - assert written == expected - assert "is not in forcing directory" in caplog.text - - -def test_save_with_shapefile_inside_forcing_dir( - tmp_path, sample_forcing_yaml_content_with_shape, caplog -): - forcing = DefaultForcing( - directory=str(tmp_path), - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - shape=str(tmp_path / "myshape.shp"), - ) - with caplog.at_level(logging.INFO): - forcing.save() - - file = tmp_path / FORCING_YAML - written = file.read_text() - expected = sample_forcing_yaml_content_with_shape - assert written == expected - assert "is not in forcing directory" not in caplog.text - - -def test_save_without_shapefile(tmp_path, sample_forcing_yaml_content): - forcing = DefaultForcing( - directory=str(tmp_path), - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - ) - forcing.save() - - file = tmp_path / FORCING_YAML - written = file.read_text() - expected = sample_forcing_yaml_content - assert written == expected - - -def test_load_given(tmp_path, sample_forcing_yaml_content): - file = tmp_path / FORCING_YAML - file.write_text(sample_forcing_yaml_content) - result = DefaultForcing.load(tmp_path) - expected = DefaultForcing( - directory=str(tmp_path), - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - ) - assert result == expected - - -def test_load_given_legacy_content(tmp_path, sample_forcing_yaml_legacy_content): - file = tmp_path / FORCING_YAML - file.write_text(sample_forcing_yaml_legacy_content) - result = DefaultForcing.load(tmp_path) - expected = DefaultForcing( - directory=str(tmp_path), - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - ) - assert result == expected - - -def test_load_with_shape(tmp_path, sample_forcing_yaml_content_with_shape): - file = tmp_path / FORCING_YAML - file.write_text(sample_forcing_yaml_content_with_shape) - result = DefaultForcing.load(tmp_path) - expected = DefaultForcing( - directory=str(tmp_path), - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - shape=tmp_path / "myshape.shp", - ) - assert result == expected - - -def test_load_with_shape_given_legacy_content( - tmp_path, sample_forcing_yaml_legacy_content_with_shape -): - file = tmp_path / FORCING_YAML - file.write_text(sample_forcing_yaml_legacy_content_with_shape) - result = DefaultForcing.load(tmp_path) - expected = DefaultForcing( - directory=str(tmp_path), - start_time="1989-01-02T00:00:00Z", - end_time="1999-01-02T00:00:00Z", - shape=tmp_path / "myshape.shp", - ) - assert result == expected - - -def test_load_without_yaml(tmp_path): - with pytest.raises(FileNotFoundError) as excinfo: - DefaultForcing.load(tmp_path) - assert str(tmp_path / FORCING_YAML) in str(excinfo.value) diff --git a/tests/src/models/test_abstract.py b/tests/src/models/test_abstract.py index d5ef4f1f..a06d82f3 100644 --- a/tests/src/models/test_abstract.py +++ b/tests/src/models/test_abstract.py @@ -9,7 +9,10 @@ import pytest import xarray as xr from bmipy import Bmi +from grpc4bmi.bmi_optionaldest import OptionalDestBmi from numpy.testing import assert_array_equal +from pydantic.config import ConfigDict +from typing_extensions import Unpack from ewatercycle import CFG from ewatercycle.base.model import eWaterCycleModel @@ -30,14 +33,14 @@ def setup_config(tmp_path: Path): class MockedModel(eWaterCycleModel): - available_versions = ("0.4.2",) + bmi: OptionalDestBmi - def __init__( - self, - version: str = "0.4.2", - parameter_set: Optional[ParameterSet] = None, - ): - super().__init__(version, parameter_set) + def _make_bmi_instance(self) -> OptionalDestBmi: + return self.bmi + + @property + def version(self) -> str: + return "0.4.2" def setup(self, *args, **kwargs) -> Tuple[str, str]: if "bmi" in kwargs: @@ -65,8 +68,8 @@ def _coords_to_indices( return np.array([0]) @property - def parameters(self) -> Iterable[Tuple[str, Any]]: - return [("area", 42)] + def parameters(self) -> dict[str, Any]: + return {"area": 42} @pytest.fixture @@ -85,28 +88,17 @@ def bmi(MockedBmi): @pytest.fixture def model(bmi: Bmi): - mocked_model = MockedModel() - mocked_model.setup(bmi=bmi) + mocked_model = MockedModel(bmi=bmi) + mocked_model.setup() return mocked_model def test_construct(): with pytest.raises(TypeError) as excinfo: - AbstractModel(version="0.4.2") + eWaterCycleModel() msg = str(excinfo.value) assert "Can't instantiate abstract class" in msg - assert "setup" in msg - assert "parameters" in msg - - -def test_construct_with_unsupported_version(): - with pytest.raises(ValueError) as excinfo: - MockedModel(version="1.2.3") - - assert ( - "Supplied version 1.2.3 is not supported by this model. " - "Available versions are ('0.4.2',)." in str(excinfo.value) - ) + assert "_make_bmi_instance" in msg def test_setup(model):