Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add soil temperature and soil moisture to ERA5 data, STEMMUS_SCOPE recipe #53

Merged
merged 22 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
dd8cf80
Add soil temperature and soil moisture to ERA5 data
BSchilperoort Feb 8, 2024
711f3eb
Fix incorrect name in SPLIT_VARIABLES constant
BSchilperoort Feb 9, 2024
6686d58
Use Dask distributed by default: avoids memory issues
BSchilperoort Jun 14, 2024
5a9bbcd
Validate recipe with datasets before downloading
BSchilperoort Jun 14, 2024
d396e48
Delete in-memory dataset after saving to reduce mem usage
BSchilperoort Jun 14, 2024
33c5cd9
Add appropriate time bounds to canopy height data set
BSchilperoort Jul 10, 2024
0e654c9
New formatting
BSchilperoort Jul 10, 2024
adf73de
Make tests use dask distributed scheduler
BSchilperoort Jul 10, 2024
014d632
Ignore typing in test folder
BSchilperoort Jul 17, 2024
f26a700
Add interpolation for datasets with lower frequency than recipe target
BSchilperoort Jul 17, 2024
aa650d3
Update formatting and linting (new rules)
BSchilperoort Jul 17, 2024
1fe26d0
Try dask submit/result for Windows
BSchilperoort Jul 26, 2024
b0908b2
Try to fix windows again
BSchilperoort Jul 26, 2024
f09e2d7
Use context manager to ensure file closure
BSchilperoort Jul 26, 2024
8d38759
Try unlinking files manually
BSchilperoort Jul 26, 2024
ecbdb40
Change how tempdir works on windows
BSchilperoort Jul 26, 2024
f8e0e63
Allow skipping downloads
BSchilperoort Jul 26, 2024
e206125
Actually run fast tests first
BSchilperoort Jul 26, 2024
6577542
Finalize windows fix
BSchilperoort Jul 26, 2024
2a447c3
Add stemmus scope input data recipe
BSchilperoort Jul 26, 2024
4b44e75
Fix failing tests: segfault occured due to data not being available
BSchilperoort Jul 29, 2024
690694b
Apparently Windows dislikes single quotes '
BSchilperoort Jul 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
run: |
python3 -m pip install --upgrade pip hatch
- name: Run fast tests first
run: hatch run test
run: hatch run fast-test
- name: Run full test suite & coverage
run: hatch run test
- name: Verify that we can build the package
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ format = [
"ruff check src/ tests/ --fix --exit-non-zero-on-fix",
"lint",
]
fast-test = ["pytest -m 'not slow'"]
fast-test = ["pytest -m \"not slow\""]
test = [
"pytest ./src/zampy/ ./tests/ --doctest-modules --doctest-ignore-import-errors",
]
Expand All @@ -131,10 +131,11 @@ markers = [
ignore_missing_imports = true
disallow_untyped_defs = true
python_version = "3.10"
exclude = "tests"

[tool.ruff]
line-length = 88
exclude = ["docs", "build"]
exclude = ["docs", "build", "tests"]
target-version = "py310"

[tool.ruff.lint]
Expand Down
41 changes: 41 additions & 0 deletions recipes/STEMMUS_SCOPE_input.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# config (folder, login info etc goes to a ~/.zampy/config file)
name: "STEMMUS_SCOPE_input"

download:
time: ["2020-01-01", "2020-06-30"]
bbox: [60, 10, 50, 0] # NESW
datasets:
era5_land:
variables:
- air_temperature
- dewpoint_temperature
- soil_temperature
- soil_moisture
era5:
variables:
- total_precipitation
- surface_thermal_radiation_downwards
- surface_solar_radiation_downwards
- surface_pressure
- eastward_component_of_wind
- northward_component_of_wind
eth_canopy_height:
variables:
- height_of_vegetation
fapar_lai:
variables:
- leaf_area_index
land_cover:
variables:
- land_cover
prism_dem_90:
variables:
- elevation
cams:
variables:
- co2_concentration

convert:
convention: ALMA
frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword.
resolution: 0.25 # output resolution in degrees.
1 change: 1 addition & 0 deletions src/zampy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""zampy."""

from zampy import datasets


Expand Down
8 changes: 6 additions & 2 deletions src/zampy/cli.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
"""Implements CLI interface for Zampy."""

from pathlib import Path
import click
import dask.distributed
from zampy.recipe import RecipeManager


@click.command()
@click.argument("recipe", type=click.Path(exists=True, path_type=Path))
def run_recipe(recipe: Path) -> None:
@click.option("--skip-download", is_flag=True)
def run_recipe(recipe: Path, skip_download: bool) -> None:
"""Run the recipe using the CLI."""
click.echo(f"Executing recipe: {recipe}")
rm = RecipeManager(recipe)
rm = RecipeManager(recipe, skip_download)
rm.run()


if __name__ == "__main__":
dask.distributed.Client()
run_recipe()
8 changes: 8 additions & 0 deletions src/zampy/conventions/ALMA.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,13 @@
"land_cover": {
"variable": "land_cover",
"units": ""
},
"soil_temperature": {
"variable": "SoilTemp",
"units": "kelvin"
},
"soil_moisture": {
"variable": "SoilMoist",
"units": "kilogram/meter**3"
}
}
1 change: 1 addition & 0 deletions src/zampy/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Datasets implementations."""

from zampy.datasets import dataset_protocol
from zampy.datasets import validation
from zampy.datasets.catalog import DATASETS
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/catalog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Catalog of datasets."""

from zampy.datasets import dataset_protocol
from zampy.datasets.cams import CAMS
from zampy.datasets.era5 import ERA5
Expand Down
74 changes: 73 additions & 1 deletion src/zampy/datasets/cds_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""CDS utilities used by ECMWF datasets."""

from copy import copy
from pathlib import Path
import cdsapi
import numpy as np
Expand Down Expand Up @@ -44,6 +45,21 @@
"21:00", "22:00", "23:00",
] # fmt: skip

SPLIT_VARIABLES = {
"soil_temperature": (
"soil_temperature_level_1",
"soil_temperature_level_2",
"soil_temperature_level_3",
"soil_temperature_level_4",
),
"soil_moisture": (
"volumetric_soil_water_layer_1",
"volumetric_soil_water_layer_2",
"volumetric_soil_water_layer_3",
"volumetric_soil_water_layer_4",
),
}


def cds_request(
dataset: str,
Expand Down Expand Up @@ -226,6 +242,12 @@ def retrieve_era5(
# create list of year/month pairs
year_month_pairs = time_bounds_to_year_month(time_bounds)

variables = copy(variables) # Prevent original input from being modified in-place
for split_var in SPLIT_VARIABLES:
if split_var in variables:
variables.remove(split_var)
variables.extend(SPLIT_VARIABLES[split_var])

for (year, month), variable in product(
year_month_pairs, variables, position=0, leave=True
):
Expand Down Expand Up @@ -354,7 +376,8 @@ def convert_to_zampy(
print(f"File '{ncfile.name}' already exists, skipping...")
else:
ds = parse_nc_file(file)

# Rename the vswl data:
ncfile = Path(str(ncfile).replace("volumetric_soil_water", "soil_moisture"))
ds.to_netcdf(path=ncfile)


Expand All @@ -373,6 +396,28 @@ def convert_to_zampy(
"co2": "co2_concentration",
}

VAR_REFERENCE_MULTI_LAYER = {
"stl1": "soil_temperature",
"stl2": "soil_temperature",
"stl3": "soil_temperature",
"stl4": "soil_temperature",
"swvl1": "soil_moisture",
"swvl2": "soil_moisture",
"swvl3": "soil_moisture",
"swvl4": "soil_moisture",
}

LAYER_BOUNDS = {
"stl1": [[0.0, 7.0]],
"stl2": [[7.0, 28.0]],
"stl3": [[28.0, 100.0]],
"stl4": [[100.0, 289.0]],
"swvl1": [[0.0, 7.0]],
"swvl2": [[7.0, 28.0]],
"swvl3": [[28.0, 100.0]],
"swvl4": [[100.0, 289.0]],
}

WATER_DENSITY = 997.0 # kg/m3


Expand Down Expand Up @@ -416,6 +461,33 @@ def parse_nc_file(file: Path) -> xr.Dataset:
variable_name
].desc

if variable in VAR_REFERENCE_MULTI_LAYER:
if ( # Soil temperature/moisture routine
str(variable).startswith("stl") or str(variable).startswith("swvl")
):
if str(variable).startswith("swvl"):
varname = "soil_moisture"
standard_name = "moisture_content_of_soil_layer"
ds[variable] *= WATER_DENSITY
ds[variable].attrs.update({"units": "kg m**-3"})
else:
varname = "soil_temperature"
standard_name = "temperature_in_ground"

da = ds[variable]
name = str(da.name)
da = da.expand_dims({"depth": [np.mean(LAYER_BOUNDS[name])]})
da = da.rename(varname)
da.attrs.update(
{
"long_name": varname.replace("_", " "),
"standard_name": standard_name,
}
)

ds = da.to_dataset()
ds["depth_bounds"] = (("depth", "nv"), LAYER_BOUNDS[name])

# TODO: add dataset attributes.

return ds
1 change: 1 addition & 0 deletions src/zampy/datasets/converter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Dataset formatter for different conventions."""

import json
import warnings
from pathlib import Path
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/dataset_protocol.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Outline of the dataset protocol."""

import json
import shutil
from dataclasses import dataclass
Expand Down
17 changes: 16 additions & 1 deletion src/zampy/datasets/era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,30 @@ class ERA5Land(ECMWFDataset): # noqa: D101
raw_variables = [
Variable(name="t2m", unit=unit_registry.kelvin),
Variable(name="d2m", unit=unit_registry.kelvin),
Variable(name="st", unit=unit_registry.kelvin),
Variable(name="swvl", unit=unit_registry.fraction),
]

# variable names used in cdsapi downloading request
cds_var_names = {
"air_temperature": "2m_temperature",
"dewpoint_temperature": "2m_dewpoint_temperature",
"soil_temperature_level_1": "soil_temperature_level_1", # Note: split variables
"soil_temperature_level_2": "soil_temperature_level_2",
"soil_temperature_level_3": "soil_temperature_level_3",
"soil_temperature_level_4": "soil_temperature_level_4",
"volumetric_soil_water_layer_1": "volumetric_soil_water_layer_1",
"volumetric_soil_water_layer_2": "volumetric_soil_water_layer_2",
"volumetric_soil_water_layer_3": "volumetric_soil_water_layer_3",
"volumetric_soil_water_layer_4": "volumetric_soil_water_layer_4",
}

variable_names = list(cds_var_names.keys())
variable_names = [
"air_temperature",
"dewpoint_temperature",
"soil_temperature",
"soil_moisture",
]

variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names]

Expand Down
14 changes: 11 additions & 3 deletions src/zampy/datasets/eth_canopy_height.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""ETH canopy height dataset."""

import gzip
from pathlib import Path
import numpy as np
Expand Down Expand Up @@ -269,10 +270,17 @@ def parse_tiff_file(file: Path, sd_file: bool = False) -> xr.Dataset:
da = da.isel(band=0) # get rid of band dim
da = da.drop_vars(["band", "spatial_ref"]) # drop unnecessary coords
ds = da.to_dataset()
ds = ds.assign_coords( # halfway in the year
{"time": np.datetime64("2020-07-01").astype("datetime64[ns]")}
ds = xr.concat( # Cover entirety of 2020
(
ds.assign_coords(
{"time": np.datetime64("2020-01-01").astype("datetime64[ns]")}
),
ds.assign_coords(
{"time": np.datetime64("2021-01-01").astype("datetime64[ns]")}
),
),
dim="time",
)
ds = ds.expand_dims("time")
ds = ds.rename(
{
"band_data": "height_of_vegetation_standard_deviation"
Expand Down
8 changes: 7 additions & 1 deletion src/zampy/datasets/fapar_lai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Implementation of the FAPAR LAI dataset."""

import os
import shutil
import tempfile
import zipfile
Expand Down Expand Up @@ -119,7 +120,11 @@ def ingest(

# netCDF files follow CF-1.6, only unpacking the archives is required.
for file in zip_files:
with tempfile.TemporaryDirectory(dir=tmp_path) as _tmpdir:
with tempfile.TemporaryDirectory(
dir=tmp_path,
# cleanup fails on windows. No clear idea on how to fix this.
ignore_cleanup_errors=True if os.name == "nt" else False,
) as _tmpdir:
tmpdir = Path(_tmpdir)

extract_fapar_zip(
Expand Down Expand Up @@ -257,6 +262,7 @@ def ingest_ncfile(ncfile: Path, ingest_folder: Path) -> None:
path=ingest_folder / ncfile.name,
encoding={"leaf_area_index": {"zlib": True, "complevel": 3}},
)
ds.close() # explicitly close to release file to system (for Windows)


def extract_fapar_zip(
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/prism_dem.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Prism DEM dataset."""

import gzip
import tarfile
from pathlib import Path
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Shared utilities from datasets."""

import urllib.request
from pathlib import Path
import requests
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Checks for user input validation."""

from pathlib import Path
from zampy.datasets.dataset_protocol import Dataset
from zampy.datasets.dataset_protocol import SpatialBounds
Expand Down
Loading
Loading