diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0807701..97efe94 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] env: MPLBACKEND: Agg # https://github.com/orgs/community/discussions/26434 steps: diff --git a/README.md b/README.md index 7f9c930..f714470 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ To download the following datasets, users need access to CDS via cdsapi: - ERA5 - ERA5 land - LAI +- land cover First, you need to be a registered user on *CDS* via the [registration page](https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome). diff --git a/demo/land_cover_dataset_demo.ipynb b/demo/land_cover_dataset_demo.ipynb new file mode 100644 index 0000000..fc6533f --- /dev/null +++ b/demo/land_cover_dataset_demo.ipynb @@ -0,0 +1,1124 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handle land cover dataset with Zampy\n", + "Demo notebook for developers." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/yangliu/mambaforge/envs/ecoextreml/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from zampy.datasets.catalog import LandCover\n", + "from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n", + "download_dir = work_dir / \"download\"\n", + "ingest_dir = work_dir / \"ingest\"\n", + "times = TimeBounds(np.datetime64(\"2010-01-01T00:00:00\"), np.datetime64(\"2011-01-31T23:00:00\"))\n", + "bbox_demo = SpatialBounds(54, 56, 1, 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 2/2 [00:00<00:00, 3.90it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'land-cover_LCCS_MAP_300m_2011.zip' already exists, skipping...\n", + "File 'land-cover_LCCS_MAP_300m_2010.zip' already exists, skipping...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "land_cover_dataset = LandCover()\n", + "land_cover_dataset.download(\n", + " download_dir=download_dir,\n", + " time_bounds=times,\n", + " spatial_bounds=bbox_demo,\n", + " variable_names=[\"land_cover\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data ingestion to the unified format in `zampy`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'land-cover_LCCS_MAP_300m_2011.nc' already exists, skipping...\n", + "File 'land-cover_LCCS_MAP_300m_2010.nc' already exists, skipping...\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this step could take some time\n", + "land_cover_dataset.ingest(download_dir, ingest_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ds = land_cover_dataset.load(\n", + " ingest_dir=ingest_dir,\n", + " time_bounds=times,\n", + " spatial_bounds=bbox_demo,\n", + " variable_names=[\"land_cover\"],\n", + " resolution=1.0,\n", + " regrid_method=\"most_common\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:     (time: 2, latitude: 54, longitude: 54)\n",
+       "Coordinates:\n",
+       "  * time        (time) datetime64[ns] 2010-01-01 2011-01-01\n",
+       "  * latitude    (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n",
+       "  * longitude   (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n",
+       "Data variables:\n",
+       "    land_cover  (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n",
+       "Attributes: (12/38)\n",
+       "    id:                         ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n",
+       "    title:                      Land Cover Map of ESA CCI brokered by CDS\n",
+       "    summary:                    This dataset characterizes the land cover of ...\n",
+       "    type:                       ESACCI-LC-L4-LCCS-Map-300m-P1Y\n",
+       "    project:                    Climate Change Initiative - European Space Ag...\n",
+       "    references:                 http://www.esa-landcover-cci.org/\n",
+       "    ...                         ...\n",
+       "    geospatial_lon_max:         180\n",
+       "    spatial_resolution:         300m\n",
+       "    geospatial_lat_units:       degrees_north\n",
+       "    geospatial_lat_resolution:  0.002778\n",
+       "    geospatial_lon_units:       degrees_east\n",
+       "    geospatial_lon_resolution:  0.002778
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2, latitude: 54, longitude: 54)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2010-01-01 2011-01-01\n", + " * latitude (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n", + " * longitude (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n", + "Data variables:\n", + " land_cover (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n", + "Attributes: (12/38)\n", + " id: ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n", + " title: Land Cover Map of ESA CCI brokered by CDS\n", + " summary: This dataset characterizes the land cover of ...\n", + " type: ESACCI-LC-L4-LCCS-Map-300m-P1Y\n", + " project: Climate Change Initiative - European Space Ag...\n", + " references: http://www.esa-landcover-cci.org/\n", + " ... ...\n", + " geospatial_lon_max: 180\n", + " spatial_resolution: 300m\n", + " geospatial_lat_units: degrees_north\n", + " geospatial_lat_resolution: 0.002778\n", + " geospatial_lon_units: degrees_east\n", + " geospatial_lon_resolution: 0.002778" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "land_cover renamed to land_cover.\n", + "Conversion of dataset 'land-cover' following ALMA convention is complete!\n" + ] + } + ], + "source": [ + "from zampy.datasets import converter\n", + "\n", + "ds_convert = converter.convert(ds, land_cover_dataset, \"ALMA\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:     (time: 2, latitude: 54, longitude: 54)\n",
+       "Coordinates:\n",
+       "  * time        (time) datetime64[ns] 2010-01-01 2011-01-01\n",
+       "  * latitude    (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n",
+       "  * longitude   (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n",
+       "Data variables:\n",
+       "    land_cover  (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n",
+       "Attributes: (12/38)\n",
+       "    id:                         ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n",
+       "    title:                      Land Cover Map of ESA CCI brokered by CDS\n",
+       "    summary:                    This dataset characterizes the land cover of ...\n",
+       "    type:                       ESACCI-LC-L4-LCCS-Map-300m-P1Y\n",
+       "    project:                    Climate Change Initiative - European Space Ag...\n",
+       "    references:                 http://www.esa-landcover-cci.org/\n",
+       "    ...                         ...\n",
+       "    geospatial_lon_max:         180\n",
+       "    spatial_resolution:         300m\n",
+       "    geospatial_lat_units:       degrees_north\n",
+       "    geospatial_lat_resolution:  0.002778\n",
+       "    geospatial_lon_units:       degrees_east\n",
+       "    geospatial_lon_resolution:  0.002778
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2, latitude: 54, longitude: 54)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2010-01-01 2011-01-01\n", + " * latitude (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n", + " * longitude (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n", + "Data variables:\n", + " land_cover (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n", + "Attributes: (12/38)\n", + " id: ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n", + " title: Land Cover Map of ESA CCI brokered by CDS\n", + " summary: This dataset characterizes the land cover of ...\n", + " type: ESACCI-LC-L4-LCCS-Map-300m-P1Y\n", + " project: Climate Change Initiative - European Space Ag...\n", + " references: http://www.esa-landcover-cci.org/\n", + " ... ...\n", + " geospatial_lon_max: 180\n", + " spatial_resolution: 300m\n", + " geospatial_lat_units: degrees_north\n", + " geospatial_lat_resolution: 0.002778\n", + " geospatial_lon_units: degrees_east\n", + " geospatial_lon_resolution: 0.002778" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_convert" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ecoextreml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/available_datasets.md b/docs/available_datasets.md index 5b40124..bf47da7 100644 --- a/docs/available_datasets.md +++ b/docs/available_datasets.md @@ -39,3 +39,8 @@ You can add these yourself by creating a pull request, or open an issue to reque Note: model level is set to "60" and all steps are included for downloading. For more information, see [their webpage](https://ads.atmosphere.copernicus.eu/cdsapp#!/dataset/cams-global-ghg-reanalysis-egg4). + +=== "Land cover classification gridded maps" + - `land_cover` + + For more information, see [their webpage](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover). diff --git a/pyproject.toml b/pyproject.toml index e787454..08bd5e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ name = "zampy" description = "python package for getting Land Surface Model input data." readme = "README.md" license = "Apache-2.0" -requires-python = ">=3.9, <3.12" +requires-python = ">=3.10, <3.12" authors = [ {email = "b.schilperoort@esciencecenter.nl"}, {name = "Bart Schilperoort, Yang Liu, Fakhereh Alidoost"} @@ -43,7 +43,6 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] @@ -65,6 +64,7 @@ dependencies = [ "pint-xarray", "flox", # For better groupby methods. "cdsapi", + "xarray-regrid", # for land cover data regridding ] dynamic = ["version"] @@ -136,11 +136,11 @@ testpaths = ["tests"] [tool.mypy] ignore_missing_imports = true disallow_untyped_defs = true -python_version = "3.9" +python_version = "3.10" [tool.black] line-length = 88 -target-version = ['py39', 'py310', 'py311'] +target-version = ['py310', 'py311'] include = '\.pyi?$' [tool.ruff] @@ -171,7 +171,7 @@ line-length = 88 exclude = ["docs", "build"] # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -target-version = "py39" +target-version = "py310" [tool.ruff.per-file-ignores] "tests/**" = ["D"] diff --git a/src/zampy/conventions/ALMA.json b/src/zampy/conventions/ALMA.json index 2971ee0..f5c04ea 100644 --- a/src/zampy/conventions/ALMA.json +++ b/src/zampy/conventions/ALMA.json @@ -76,5 +76,9 @@ "co2_concentration": { "variable": "CO2air", "units": "kilogram/kilogram" + }, + "land_cover": { + "variable": "land_cover", + "units": "" } } \ No newline at end of file diff --git a/src/zampy/datasets/catalog.py b/src/zampy/datasets/catalog.py index 579835d..cf869c4 100644 --- a/src/zampy/datasets/catalog.py +++ b/src/zampy/datasets/catalog.py @@ -4,6 +4,7 @@ from zampy.datasets.era5 import ERA5 from zampy.datasets.era5 import ERA5Land from zampy.datasets.eth_canopy_height import EthCanopyHeight +from zampy.datasets.land_cover import LandCover from zampy.datasets.prism_dem import PrismDEM30 from zampy.datasets.prism_dem import PrismDEM90 @@ -17,4 +18,5 @@ "eth_canopy_height": EthCanopyHeight, "prism_dem_30": PrismDEM30, "prism_dem_90": PrismDEM90, + "land_cover": LandCover, } diff --git a/src/zampy/datasets/cds_utils.py b/src/zampy/datasets/cds_utils.py index d6aec09..ff03511 100644 --- a/src/zampy/datasets/cds_utils.py +++ b/src/zampy/datasets/cds_utils.py @@ -19,11 +19,13 @@ "reanalysis-era5-single-levels": "era5", "reanalysis-era5-land": "era5-land", "cams-global-ghg-reanalysis-egg4": "cams", + "satellite-land-cover": "land-cover", } SERVER_API = { "era5": "cdsapi", "era5-land": "cdsapi", "cams": "adsapi", + "land-cover": "cdsapi", } CONFIG_PATH = Path.home() / ".config" / "zampy" / "zampy_config.yml" @@ -87,6 +89,56 @@ def cds_request( ) +def cds_request_land_cover( + dataset: str, + time_bounds: TimeBounds, + path: Path, + overwrite: bool, +) -> None: + """Download land cover data via CDS API. + + To raise a request via CDS API using `zampy`, user needs to set up the + zampy configuration file `zampy_config.yml` following the instructions on + https://github.com/EcoExtreML/zampy/blob/main/README.md#instructions-for-cds-datasets-eg-era5. + + Args: + dataset: Dataset name for retrieval via `cdsapi`. + time_bounds: Zampy time bounds object. + path: File path to which the data should be saved. + overwrite: If an existing file (of the same size!) should be overwritten. + """ + fname = PRODUCT_FNAME[dataset] + + url, api_key = cds_api_key(fname) + + c = cdsapi.Client( + url=url, + key=api_key, + verify=True, + quiet=True, + ) + + years_months = time_bounds_to_year_month(time_bounds) + years = {year for (year, _) in years_months} + + for year in tqdm(years): + if int(year) < 2016: + version = "v2.0.7cds" + else: + version = "v2.1.1" + r = c.retrieve( + dataset, + { + "variable": "all", + "format": "zip", + "year": year, + "version": version, + }, + ) + fpath = path / f"{fname}_LCCS_MAP_300m_{year}.zip" + _check_and_download(r, fpath, overwrite) + + def cds_api_key(product_name: str) -> tuple[str, str]: """Load url and CDS/ADS API key. diff --git a/src/zampy/datasets/converter.py b/src/zampy/datasets/converter.py index f641950..6692be8 100644 --- a/src/zampy/datasets/converter.py +++ b/src/zampy/datasets/converter.py @@ -2,7 +2,6 @@ import json import warnings from pathlib import Path -from typing import Union import cf_xarray.units # noqa: F401 import pint_xarray # noqa: F401 import xarray as xr @@ -14,7 +13,7 @@ conventions_path = Path(__file__).resolve().parents[1] / "conventions" -def check_convention(convention: Union[str, Path]) -> None: +def check_convention(convention: str | Path) -> None: """Check if the given convention is supported.""" if isinstance(convention, str): if convention.upper() not in CONVENTIONS: @@ -33,9 +32,7 @@ def check_convention(convention: Union[str, Path]) -> None: print(f"Starting data conversion to the convention defined in '{convention}'") -def convert( - data: xr.Dataset, dataset: Dataset, convention: Union[str, Path] -) -> xr.Dataset: +def convert(data: xr.Dataset, dataset: Dataset, convention: str | Path) -> xr.Dataset: """Convert a loaded dataset to the specified convention. Args: diff --git a/src/zampy/datasets/dataset_protocol.py b/src/zampy/datasets/dataset_protocol.py index 1b1de29..097abdc 100644 --- a/src/zampy/datasets/dataset_protocol.py +++ b/src/zampy/datasets/dataset_protocol.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from pathlib import Path from typing import Any -from typing import Optional from typing import Protocol import numpy as np import xarray as xr @@ -19,7 +18,7 @@ class Variable: name: str unit: Any # pint unit. typing has issues with pint 0.21 - desc: Optional[str] = "" + desc: str | None = "" @dataclass diff --git a/src/zampy/datasets/ecmwf_dataset.py b/src/zampy/datasets/ecmwf_dataset.py index 4ceb1e7..2254ae5 100644 --- a/src/zampy/datasets/ecmwf_dataset.py +++ b/src/zampy/datasets/ecmwf_dataset.py @@ -1,7 +1,6 @@ """Base module for datasets available on CDS.""" from pathlib import Path -from typing import Union import xarray as xr from zampy.datasets import cds_utils from zampy.datasets import converter @@ -129,7 +128,7 @@ def load( def convert( self, ingest_dir: Path, - convention: Union[str, Path], + convention: str | Path, ) -> bool: converter.check_convention(convention) ingest_folder = ingest_dir / self.name diff --git a/src/zampy/datasets/eth_canopy_height.py b/src/zampy/datasets/eth_canopy_height.py index 7845651..c82e686 100644 --- a/src/zampy/datasets/eth_canopy_height.py +++ b/src/zampy/datasets/eth_canopy_height.py @@ -1,7 +1,6 @@ """ETH canopy height dataset.""" import gzip from pathlib import Path -from typing import Union import numpy as np import xarray as xr from zampy.datasets import converter @@ -109,7 +108,7 @@ def ingest( sd_files = list(download_folder.glob(sd_file_pattern)) is_sd_file = len(data_files) * [False] + len(sd_files) * [True] - for file, sd_file in zip(data_files + sd_files, is_sd_file): + for file, sd_file in zip(data_files + sd_files, is_sd_file, strict=True): convert_tiff_to_netcdf( ingest_folder, file=file, @@ -144,7 +143,7 @@ def load( def convert( self, ingest_dir: Path, - convention: Union[str, Path], + convention: str | Path, ) -> bool: converter.check_convention(convention) ingest_folder = ingest_dir / self.name @@ -188,7 +187,7 @@ def get_filenames(bounds: SpatialBounds, sd_file: bool = False) -> list[str]: fnames = [""] * len(lats) - for i, (lat, lon) in enumerate(zip(lats, lons)): + for i, (lat, lon) in enumerate(zip(lats, lons, strict=True)): lat_ = int(lat // step * step) lon_ = int(lon // step * step) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py new file mode 100644 index 0000000..66e8f13 --- /dev/null +++ b/src/zampy/datasets/land_cover.py @@ -0,0 +1,248 @@ +"""Land cover classification dataset.""" + +from pathlib import Path +from tempfile import TemporaryDirectory +from zipfile import ZipFile +import numpy as np +import xarray as xr +import xarray_regrid +from zampy.datasets import cds_utils +from zampy.datasets import converter +from zampy.datasets import validation +from zampy.datasets.dataset_protocol import SpatialBounds +from zampy.datasets.dataset_protocol import TimeBounds +from zampy.datasets.dataset_protocol import Variable +from zampy.datasets.dataset_protocol import copy_properties_file +from zampy.datasets.dataset_protocol import write_properties_file +from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP +from zampy.reference.variables import unit_registry + + +## Ignore missing class/method docstrings: they are implemented in the Dataset class. +# ruff: noqa: D102 + + +class LandCover: + """Land cover classification gridded maps.""" + + name = "land-cover" + time_bounds = TimeBounds(np.datetime64("1992-01-01"), np.datetime64("2020-12-31")) + spatial_bounds = SpatialBounds(90, 180, -90, -180) + crs = "EPSG:4326" + + raw_variables = [ + Variable(name="lccs_class", unit=unit_registry.dimensionless), + ] + variable_names = ["land_cover"] + variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names] + + license = "ESA CCI licence; licence-to-use-copernicus-products; VITO licence" + + bib = """ + @article{buchhorn2020copernicus, + title={Copernicus global land cover layers—collection 2}, + author={Buchhorn, Marcel et al.}, + journal={Remote Sensing}, + volume={12}, + number={6}, + pages={1044}, + year={2020}, + publisher={MDPI} + } + """ + + data_url = "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview" + + cds_dataset = "satellite-land-cover" + + def __init__(self) -> None: + """Init.""" + pass + + def download( + self, + download_dir: Path, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + variable_names: list[str], + overwrite: bool = False, + ) -> bool: + validation.validate_download_request( + self, + download_dir, + time_bounds, + spatial_bounds, + variable_names, + ) + + download_folder = download_dir / self.name + download_folder.mkdir(parents=True, exist_ok=True) + + cds_utils.cds_request_land_cover( + dataset=self.cds_dataset, + time_bounds=time_bounds, + path=download_folder, + overwrite=overwrite, + ) + + write_properties_file( + download_folder, spatial_bounds, time_bounds, variable_names + ) + + return True + + def ingest( + self, + download_dir: Path, + ingest_dir: Path, + overwrite: bool = False, + ) -> bool: + download_folder = download_dir / self.name + ingest_folder = ingest_dir / self.name + ingest_folder.mkdir(parents=True, exist_ok=True) + + archive_file_pattern = f"{self.name}_*.zip" + archive_files = list(download_folder.glob(archive_file_pattern)) + + for file in archive_files: + unzip_raw_to_netcdf( + ingest_folder, + file=file, + overwrite=overwrite, + ) + + copy_properties_file(download_folder, ingest_folder) + + return True + + def load( + self, + ingest_dir: Path, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + resolution: float, + regrid_method: str, # Unused in land-cover dataset + variable_names: list[str], + ) -> xr.Dataset: + files: list[Path] = [] + for var in variable_names: + if var not in self.variable_names: + msg = ( + "One or more variables are not in this dataset.\n" + f"Please check input. Dataset: '{self.name}'\n" + f"Variables: '{variable_names}'" + ) + raise ValueError(msg) + files = list((ingest_dir / self.name).glob(f"{self.name}_*.nc")) + + ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) + ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) + new_grid = xarray_regrid.Grid( + north=spatial_bounds.north, + east=spatial_bounds.east, + south=spatial_bounds.south, + west=spatial_bounds.west, + resolution_lat=resolution, + resolution_lon=resolution, + ) + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + + ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + + return ds_regrid + + def convert( + self, + ingest_dir: Path, + convention: str | Path, + ) -> bool: + converter.check_convention(convention) + ingest_folder = ingest_dir / self.name + + data_file_pattern = "land-cover_LCCS_MAP_*.nc" + + data_files = list(ingest_folder.glob(data_file_pattern)) + + for file in data_files: + print(f"Start processing file `{file.name}`.") + ds = xr.open_dataset(file) + ds = converter.convert(ds, dataset=self, convention=convention) + + return True + + +def unzip_raw_to_netcdf( + ingest_folder: Path, + file: Path, + overwrite: bool = False, +) -> None: + """Convert a downloaded zip netcdf file to a standard CF/Zampy netCDF file. + + Args: + ingest_folder: Folder where the files have to be written to. + file: Path to the land cover .zip archive. + overwrite: Overwrite all existing files. If False, file that already exist will + be skipped. + """ + ncfile = ingest_folder / file.with_suffix(".nc").name + if ncfile.exists() and not overwrite: + print(f"File '{ncfile.name}' already exists, skipping...") + else: + ds = extract_netcdf_to_zampy(file) + ds.to_netcdf(path=ncfile) + + +def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: + """Extract zipped data and convert to zampy format. + + Since the native resolution of land cover field is too high + in general, in this function the loaded land cover data + are regridded. They are regrid to a resoltuion of 0.05 degree. + + Args: + file: Path to the land cover .zip archive. + + Returns: + Coarse land cover data in zampy format. + """ + with TemporaryDirectory() as temp_dir: + unzip_folder = Path(temp_dir) + with ZipFile(file, "r") as zip_object: + zipped_file_name = zip_object.namelist()[0] + zip_object.extract(zipped_file_name, path=unzip_folder) + + # only keep land cover class variable + with xr.open_dataset(unzip_folder / zipped_file_name) as ds: + var_list = [var for var in ds.data_vars] + raw_variable = "lccs_class" + var_list.remove(raw_variable) + ds = ds.drop_vars(var_list) # noqa: PLW2901 + + ds = ds.sortby(["lat", "lon"]) # noqa: PLW2901 + ds = ds.rename({"lat": "latitude", "lon": "longitude"}) # noqa: PLW2901 + new_grid = xarray_regrid.Grid( + north=90, + east=180, + south=-90, + west=-180, + resolution_lat=0.05, + resolution_lon=0.05, + ) + + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + + ds_regrid = ds.regrid.most_common( + target_dataset, time_dim="time", max_mem=1e9 + ) + + # rename variable to follow the zampy convention + variable_name = "land_cover" + ds_regrid = ds_regrid.rename({raw_variable: variable_name}) + ds_regrid[variable_name].attrs["units"] = str( + VARIABLE_REFERENCE_LOOKUP[variable_name].unit + ) + ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ + variable_name + ].desc + + return ds_regrid diff --git a/src/zampy/datasets/prism_dem.py b/src/zampy/datasets/prism_dem.py index 17f47b2..3d37e7d 100644 --- a/src/zampy/datasets/prism_dem.py +++ b/src/zampy/datasets/prism_dem.py @@ -3,7 +3,6 @@ import tarfile from pathlib import Path from typing import Literal -from typing import Union import numpy as np import xarray as xr from rasterio.io import MemoryFile @@ -153,7 +152,7 @@ def preproc(ds: xr.Dataset) -> xr.Dataset: def convert( self, ingest_dir: Path, - convention: Union[str, Path], + convention: str | Path, ) -> bool: converter.check_convention(convention) ingest_folder = ingest_dir / self.name @@ -301,7 +300,7 @@ def get_archive_filenames( else: raise ValueError("Unknown glo_number.") - for i, (lat, lon) in enumerate(zip(lats, lons)): + for i, (lat, lon) in enumerate(zip(lats, lons, strict=True)): lat_ = int(lat // step * step) lon_ = int(lon // step * step) diff --git a/src/zampy/datasets/utils.py b/src/zampy/datasets/utils.py index 1b27975..ee15680 100644 --- a/src/zampy/datasets/utils.py +++ b/src/zampy/datasets/utils.py @@ -1,8 +1,6 @@ """Shared utilities from datasets.""" import urllib.request from pathlib import Path -from typing import Optional -from typing import Union import requests from tqdm import tqdm @@ -11,8 +9,8 @@ class TqdmUpdate(tqdm): """Wrap a tqdm progress bar to be updateable by urllib.request.urlretrieve.""" def update_to( - self, b: int = 1, bsize: int = 1, tsize: Optional[int] = None - ) -> Union[bool, None]: + self, b: int = 1, bsize: int = 1, tsize: int | None = None + ) -> bool | None: """Update the progress bar. Args: diff --git a/src/zampy/reference/variables.py b/src/zampy/reference/variables.py index e84bb78..fd1fca6 100644 --- a/src/zampy/reference/variables.py +++ b/src/zampy/reference/variables.py @@ -17,6 +17,7 @@ def unit_registration() -> UnitRegistry: "kilogram_per_square_meter_second = kilogram/(meter**2*second)" ) unit_registry.define("milimeter_per_second = watt/meter**2") + unit_registry.define("dimensionless = []") return unit_registry @@ -53,6 +54,7 @@ def unit_registration() -> UnitRegistry: Variable("latitude", unit=unit_registry.degree_north), Variable("longitude", unit=unit_registry.degree_east), Variable("elevation", unit=unit_registry.meter), + Variable("land_cover", unit=unit_registry.dimensionless), ) VARIABLE_REFERENCE_LOOKUP = {var.name: var for var in VARIABLE_REFERENCE} diff --git a/tests/test_cds_utils.py b/tests/test_cds_utils.py index 2309071..c70f805 100644 --- a/tests/test_cds_utils.py +++ b/tests/test_cds_utils.py @@ -123,6 +123,37 @@ def test_cds_request_cams_co2(mock_retrieve, valid_path_config): ) +@patch("cdsapi.Client.retrieve") +def test_cds_request_land_cover(mock_retrieve, valid_path_config): + """ "Test cds request for downloading data from CDS server.""" + dataset = "satellite-land-cover" + time_bounds = TimeBounds( + np.datetime64("1996-01-01T00:00:00"), np.datetime64("1996-12-31T00:00:00") + ) + path = Path(__file__).resolve().parent + overwrite = True + + # create a dummy .cdsapirc + patching = patch("zampy.datasets.cds_utils.CONFIG_PATH", valid_path_config) + with patching: + cds_utils.cds_request_land_cover( + dataset, + time_bounds, + path, + overwrite, + ) + + mock_retrieve.assert_called_with( + dataset, + { + "variable": "all", + "format": "zip", + "year": "1996", + "version": "v2.0.7cds", + }, + ) + + def test_cds_api_key_config_exist(valid_path_config): """Test zampy config exists.""" patching = patch("zampy.datasets.cds_utils.CONFIG_PATH", valid_path_config) diff --git a/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip b/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip new file mode 100644 index 0000000..2438560 Binary files /dev/null and b/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip differ diff --git a/tests/test_data/land-cover/properties.json b/tests/test_data/land-cover/properties.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_datasets/test_land_cover.py b/tests/test_datasets/test_land_cover.py new file mode 100644 index 0000000..8b4e8c8 --- /dev/null +++ b/tests/test_datasets/test_land_cover.py @@ -0,0 +1,139 @@ +"""Unit test for land cover dataset.""" + +import json +from pathlib import Path +from unittest.mock import patch +import numpy as np +import pytest +import xarray as xr +import zampy.datasets.land_cover +from zampy.datasets.catalog import LandCover +from zampy.datasets.dataset_protocol import SpatialBounds +from zampy.datasets.dataset_protocol import TimeBounds +from . import data_folder + + +@pytest.fixture(scope="function") +def valid_path_config(tmp_path_factory): + """Create a dummy .zampy_config file.""" + fn = tmp_path_factory.mktemp("usrhome") / "zampy_config.yml" + with open(fn, mode="w", encoding="utf-8") as f: + f.write("cdsapi:\n url: a\n key: 123:abc-def\n") + f.write("adsapi:\n url: a\n key: 123:abc-def") + return fn + + +@pytest.fixture(scope="function") +def dummy_dir(tmp_path_factory): + """Create a dummpy directory for testing.""" + return tmp_path_factory.mktemp("data") + + +class TestLandCover: + """Test the LandCover class.""" + + @patch("cdsapi.Client.retrieve") + def test_download(self, mock_retrieve, valid_path_config, dummy_dir): + """Test download functionality. + Here we mock the downloading and save property file to a fake path. + """ + times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-12-31")) + bbox = SpatialBounds(54, 56, 1, 3) + variable = ["land_cover"] + download_dir = Path(dummy_dir, "download") + + land_cover_dataset = LandCover() + # create a dummy .cdsapirc + patching = patch("zampy.datasets.cds_utils.CONFIG_PATH", valid_path_config) + with patching: + land_cover_dataset.download( + download_dir=download_dir, + time_bounds=times, + spatial_bounds=bbox, + variable_names=variable, + overwrite=True, + ) + + # make sure that the download is called + mock_retrieve.assert_called_once_with( + "satellite-land-cover", + { + "variable": "all", + "format": "zip", + "year": "1996", + "version": "v2.0.7cds", + }, + ) + + # check property file + with (download_dir / "land-cover" / "properties.json").open( + mode="r", encoding="utf-8" + ) as file: + json_dict = json.load(file) + # check property + assert json_dict["variable_names"] == variable + + def ingest_dummy_data(self, temp_dir): + """Ingest dummy zip data to nc for other tests.""" + land_cover_dataset = LandCover() + land_cover_dataset.ingest(download_dir=data_folder, ingest_dir=Path(temp_dir)) + ds = xr.load_dataset( + Path( + temp_dir, + "land-cover", + "land-cover_LCCS_MAP_300m_1996.nc", + ) + ) + + return ds, land_cover_dataset + + def test_ingest(self, dummy_dir): + """Test ingest function.""" + ds, _ = self.ingest_dummy_data(dummy_dir) + assert isinstance(ds, xr.Dataset) + + def test_load(self, dummy_dir): + """Test load function.""" + times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-12-31")) + bbox = SpatialBounds(39, -107, 37, -109) + variable = ["land_cover"] + + _, land_cover_dataset = self.ingest_dummy_data(dummy_dir) + + ds = land_cover_dataset.load( + ingest_dir=Path(dummy_dir), + time_bounds=times, + spatial_bounds=bbox, + variable_names=variable, + resolution=1.0, + regrid_method="most_common", + ) + + # we assert the regridded coordinates + expected_lat = [37.0, 38.0, 39.0] + expected_lon = [-109.0, -108.0, -107.0] + + np.testing.assert_allclose(ds.latitude.values, expected_lat) + np.testing.assert_allclose(ds.longitude.values, expected_lon) + + def test_convert(self, dummy_dir): + """Test convert function.""" + _, land_cover_dataset = self.ingest_dummy_data(dummy_dir) + land_cover_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") + # TODO: finish this test when the function is complete. + + +def test_unzip_raw_to_netcdf(): + ds = zampy.datasets.land_cover.extract_netcdf_to_zampy( + data_folder / "land-cover/land-cover_LCCS_MAP_300m_1996.zip" + ) + assert isinstance(ds, xr.Dataset) + + +def test_extract_netcdf_to_zampy(dummy_dir): + zampy.datasets.land_cover.unzip_raw_to_netcdf( + Path(dummy_dir), + data_folder / "land-cover/land-cover_LCCS_MAP_300m_1996.zip", + ) + dataset_path = Path(dummy_dir) / "land-cover_LCCS_MAP_300m_1996.nc" + assert dataset_path.exists()