EcoExtreML · BSchilperoort · Aug 8, 2023 · Aug 7, 2023 · Aug 7, 2023 · Aug 7, 2023
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -17,7 +17,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ['ubuntu-latest', 'macos-latest', 'windows-latest']
-        python-version: ['3.8', '3.9', '3.10']
+        python-version: ['3.9', '3.10', '3.11']
     env:
       MPLBACKEND: Agg  # https://github.com/orgs/community/discussions/26434
     steps:

diff --git a/docs/index.md b/docs/index.md
@@ -0,0 +1,10 @@
+# Zampy
+
+A tool for downloading Land Surface Model input data.
+
+### Name origin
+
+Named after *Zam*; [the Avestan language term for the Zoroastrian concept of "earth"](https://en.wikipedia.org/wiki/Zam).
+
+## How to use Zampy
+See the section ["using Zampy"](using_zampy.md).
diff --git a/docs/using_zampy.md b/docs/using_zampy.md
@@ -0,0 +1,49 @@
+# Using Zampy
+
+## Installing Zampy
+Zampy can be installed by doing:
+```bash
+pip install zampy git+https://github.com/EcoExtreML/zampy
+```
+
+## Configuration
+Zampy needs to be configured with a simple configuration file.
+
+You need to create this file under your -*user's home*-/.config directory: `~/.config/zampy/zampy_config.yml`, and should contain the following:
+
+```yaml
+working_directory: /path_to_a_working_directory/  #for example: /home/bart/Zampy
+```
+
+## Formulating a recipe
+A "recipe" is a file with `yml` extension and has the following structure:
+
+```yaml
+name: "test_recipe"
+
+download:
+  years: [2020, 2020]
+  bbox: [54, 6, 50, 3] # NESW
+
+  datasets:
+    era5:
+      variables:
+        - 10m_v_component_of_wind
+        - surface_pressure
+
+convert:
+  convention: ALMA
+  frequency: 1H  # outputs at 1 hour frequency. Pandas-like freq-keyword.
+  resolution: 0.5  # output resolution in degrees.
+```
+
+You can specify multiple datasets and multiple variables per dataset.
+
+## Running a recipe
+Save this recipe to disk and run the following code in your shell:
+
+```bash
+zampy --filename /home/username/path_to_file/simple_recipe.yml
+```
+
+This will execute the recipe (i.e. download, ingest, convert, resample and save the data).
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -0,0 +1,57 @@
+site_name: Zampy Documentation
+
+theme:
+  name: material
+  highlightjs: true
+  hljs_languages:
+    - yaml
+    - python
+    - bash
+  features:
+  - navigation.instant
+  - navigation.tabs
+  - navigation.tabs.sticky
+  - content.code.copy
+
+  palette:
+    # Palette toggle for light mode
+    - scheme: default
+      toggle:
+        icon: material/weather-sunny
+        name: Switch to dark mode
+      primary: light green
+      accent: green
+
+    # Palette toggle for dark mode
+    - scheme: slate
+      toggle:
+        icon: material/weather-night
+        name: Switch to light mode
+      primary: blue grey
+      accent: teal
+
+plugins:
+  - mkdocs-jupyter:
+        include_source: True
+  - search
+  - mkdocstrings:
+      handlers:
+        python:
+          options:
+            docstring_style: google
+            docstring_options:
+              ignore_init_summary: no
+            merge_init_into_class: yes
+            show_submodules: no
+
+markdown_extensions:
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
+
+extra:
+  generator: false
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ name = "zampy"
 description = "python package for getting Land Surface Model input data."
 readme = "README.md"
 license = "Apache-2.0"
-requires-python = ">=3.8, <3.11"
+requires-python = ">=3.9, <3.12"
 authors = [
   {email = "[email protected]"},
   {name = "Bart Schilperoort, Yang Liu, Fakhereh Alidoost"}
@@ -43,17 +43,18 @@ classifiers = [
   "Operating System :: OS Independent",
   "Programming Language :: Python",
   "Programming Language :: Python :: 3 :: Only",
-  "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
 ]
 dependencies = [
   "requests",
+  "pyyaml",
   "netcdf4",
   "numpy",
   "pandas",
   "matplotlib",
   "xarray",
+  "scipy",  # required for xarray.interpolate
   "rioxarray",  # required for TIFF files
   "tqdm",
   "dask[diagnostics]",
@@ -66,6 +67,9 @@ dependencies = [
 ]
 dynamic = ["version"]
 
+[project.scripts]
+zampy="zampy.cli:run_recipe"
+
 [project.optional-dependencies]
 dev = [
   "bump2version",
@@ -75,10 +79,19 @@ dev = [
   "mypy",
   "types-requests", # type stubs for request lib
   "types-urllib3", # type stubs for url lib
+  "types-PyYAML",
   "pytest",
   "pytest-cov",
+  "pytest-mock",
   "pre-commit",
 ]
+docs = [
+    "mkdocs",
+    "mkdocs-material",
+    "mkdocs-jupyter",
+    "mkdocstrings[python]",
+    "mkdocs-gen-files",
+]
 
 [tool.hatch.envs.default]
 features = ["dev"]
@@ -99,6 +112,13 @@ coverage = [
   "pytest --cov --cov-report term --cov-report xml --junitxml=xunit-result.xml tests/",
 ]
 
+[tool.hatch.envs.docs]
+features = ["docs"]
+
+[tool.hatch.envs.docs.scripts]
+build = ["mkdocs build"]
+serve = ["mkdocs serve"]
+
 # [tool.hatch.envs.conda]
 # type = "conda"
 # python = "3.10"

diff --git a/sonar-project.properties b/sonar-project.properties
@@ -10,4 +10,4 @@ sonar.links.ci=https://github.com/EcoExtreML/zampy/actions
 sonar.python.coverage.reportPaths=coverage.xml
 sonar.python.xunit.reportPath=xunit-result.xml
 sonar.python.pylint.reportPaths=pylint-report.txt
-sonar.python.version=3.8, 3.9, 3.10
+sonar.python.version=3.9, 3.10, 3.11
diff --git a/src/zampy/cli.py b/src/zampy/cli.py
@@ -0,0 +1,17 @@
+"""Implements CLI interface for Zampy."""
+from pathlib import Path
+import click
+from zampy.recipe import RecipeManager
+
+
+@click.command()
+@click.argument("recipe", type=click.Path(exists=True, path_type=Path))
+def run_recipe(recipe: Path) -> None:
+    """Run the recipe using the CLI."""
+    click.echo(f"Executing recipe: {recipe}")
+    rm = RecipeManager(recipe)
+    rm.run()
+
+
+if __name__ == "__main__":
+    run_recipe()
diff --git a/src/zampy/datasets/__init__.py b/src/zampy/datasets/__init__.py
@@ -6,3 +6,11 @@
 
 
 __all__ = ["dataset_protocol", "validation", "EthCanopyHeight", "ERA5"]
+
+
+# This object tracks which datasets are available.
+DATASETS: dict[str, type[dataset_protocol.Dataset]] = {
+    # All lowercase key.
+    "era5": ERA5,
+    "eth_canopy_height": EthCanopyHeight,
+}
diff --git a/src/zampy/recipe.py b/src/zampy/recipe.py
@@ -0,0 +1,133 @@
+""""All functionality to read and execute Zampy recipes."""
+from pathlib import Path
+from typing import Any
+import numpy as np
+import yaml
+from zampy.datasets import DATASETS
+from zampy.datasets import converter
+from zampy.datasets.dataset_protocol import Dataset
+from zampy.datasets.dataset_protocol import SpatialBounds
+from zampy.datasets.dataset_protocol import TimeBounds
+
+
+def recipe_loader(recipe_path: Path) -> dict:
+    """Load the yaml recipe into a dictionary, and do some validation."""
+    with recipe_path.open() as f:
+        recipe: dict = yaml.safe_load(f)
+
+    if not all(key in recipe.keys() for key in ["name", "download", "convert"]):
+        msg = (
+            "One of the following items are missing from the recipe:\n"
+            "name, download, convert."
+        )
+        raise ValueError(msg)
+
+    if "datasets" not in recipe["download"].keys():
+        msg = "No dataset entry found in the recipe."
+        raise ValueError(msg)
+
+    if not all(
+        key in recipe["convert"].keys()
+        for key in ["convention", "frequency", "resolution"]
+    ):
+        msg = (
+            "One of the following items are missing from the recipe:\n"
+            "name, download, convert."
+        )
+        raise ValueError(msg)
+
+    return recipe
+
+
+def config_loader() -> dict:
+    """Load the zampty config and validate the contents."""
+    config_path = Path.home() / ".config" / "zampy" / "zampy_config.yml"
+
+    if not config_path.exists():
+        msg = f"No config file was found at '{config_path}'"
+        raise FileNotFoundError(msg)
+
+    with config_path.open() as f:
+        config: dict = yaml.safe_load(f)
+
+    if not isinstance(config, dict) or "working_directory" not in config.keys():
+        msg = "No `working_directory` key found in the config file."
+        raise ValueError(msg)
+
+    return config
+
+
+class RecipeManager:
+    """The recipe manager is used to get the required info, and then run the recipe."""
+
+    def __init__(self, recipe_path: Path) -> None:
+        """Instantiate the recipe manager, using a prepared recipe."""
+        # Load & parse recipe
+        recipe = recipe_loader(recipe_path)
+
+        self.start_year, self.end_year = recipe["download"]["years"]
+        self.timebounds = TimeBounds(
+            np.datetime64(f"{self.start_year}-01-01T00:00"),
+            np.datetime64(f"{self.end_year}-12-31T23:59"),
+        )
+        self.spatialbounds = SpatialBounds(*recipe["download"]["bbox"])
+
+        self.datasets: dict[str, Any] = recipe["download"]["datasets"]
+
+        self.convention = recipe["convert"]["convention"]
+        self.frequency = recipe["convert"]["frequency"]
+        self.resolution = recipe["convert"]["resolution"]
+
+        # Load & parse config
+        config = config_loader()
+        self.download_dir = Path(config["working_directory"]) / "download"
+        self.ingest_dir = Path(config["working_directory"]) / "ingest"
+        self.data_dir = (
+            Path(config["working_directory"]) / "output" / str(recipe["name"])
+        )
+
+        # Create required directories if they do not exist yet:
+        for dir in [self.data_dir, self.download_dir, self.ingest_dir]:
+            dir.mkdir(parents=True, exist_ok=True)
+
+    def run(self) -> None:
+        """Run the full recipe."""
+        for dataset_name in self.datasets:
+            _dataset = DATASETS[dataset_name.lower()]
+            dataset: Dataset = _dataset()
+            variables: list[str] = self.datasets[dataset_name]["variables"]
+
+            # Download datset
+            dataset.download(
+                download_dir=self.download_dir,
+                time_bounds=self.timebounds,
+                spatial_bounds=self.spatialbounds,
+                variable_names=variables,
+            )
+
+            dataset.ingest(self.download_dir, self.ingest_dir)
+
+            ds = dataset.load(
+                ingest_dir=self.ingest_dir,
+                time_bounds=self.timebounds,
+                spatial_bounds=self.spatialbounds,
+                variable_names=variables,
+                resolution=self.resolution,
+                regrid_method="flox",
+            )
+
+            ds = converter.convert(ds, dataset, convention=self.convention)
+
+            ds = ds.resample(time=self.frequency).mean()
+
+            comp = dict(zlib=True, complevel=5)
+            encoding = {var: comp for var in ds.data_vars}
+            fname = (  # e.g. "era5_2010-2020.nc"
+                f"{dataset_name.lower()}_{self.start_year}-{self.end_year}.nc"
+            )
+            ds.to_netcdf(path=self.data_dir / fname, encoding=encoding)
+
+        print(
+            "Finished running the recipe. Output data can be found at:\n"
+            f"    {self.data_dir}"
+        )
diff --git a/src/zampy/utils/regrid.py b/src/zampy/utils/regrid.py
@@ -105,7 +105,7 @@ def _groupby_regrid(
     ds_out = ds_out.swap_dims(
         {"latitude_bins": "latitude", "longitude_bins": "longitude"}
     )
-    ds_out = ds_out.drop(["latitude_bins", "longitude_bins"])
+    ds_out = ds_out.drop_vars(["latitude_bins", "longitude_bins"])
     return ds_out.transpose("time", "latitude", "longitude", ...)