Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fapar-lai dataset #40

Merged
merged 6 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/available_datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,8 @@ You can add these yourself by creating a pull request, or open an issue to reque
- `land_cover`

For more information, see [their webpage](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover).

=== FAPAR Leaf Area Index
- `leaf_area_index`

For more info see [their webpage](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar).
4 changes: 4 additions & 0 deletions src/zampy/conventions/ALMA.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
"variable": "CO2air",
"units": "kilogram/kilogram"
},
"leaf_area_index": {
"variable": "LAI",
"units": "fraction"
},
"land_cover": {
"variable": "land_cover",
"units": ""
Expand Down
2 changes: 2 additions & 0 deletions src/zampy/datasets/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from zampy.datasets.era5 import ERA5
from zampy.datasets.era5 import ERA5Land
from zampy.datasets.eth_canopy_height import EthCanopyHeight
from zampy.datasets.fapar_lai import FaparLAI
from zampy.datasets.land_cover import LandCover
from zampy.datasets.prism_dem import PrismDEM30
from zampy.datasets.prism_dem import PrismDEM90
Expand All @@ -18,5 +19,6 @@
"eth_canopy_height": EthCanopyHeight,
"prism_dem_30": PrismDEM30,
"prism_dem_90": PrismDEM90,
"fapar_lai": FaparLAI,
"land_cover": LandCover,
}
1 change: 1 addition & 0 deletions src/zampy/datasets/cds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
SERVER_API = {
"era5": "cdsapi",
"era5-land": "cdsapi",
"fapar-lai": "cdsapi",
"cams": "adsapi",
"land-cover": "cdsapi",
}
Expand Down
292 changes: 292 additions & 0 deletions src/zampy/datasets/fapar_lai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
"""Implementation of the FAPAR LAI dataset."""

import shutil
import tempfile
import zipfile
from pathlib import Path
from typing import Any
import cdsapi
import numpy as np
import pandas as pd
import xarray as xr
import xarray_regrid
from tqdm import tqdm
from zampy.datasets import cds_utils
from zampy.datasets import converter
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
from zampy.datasets.dataset_protocol import TimeBounds
from zampy.datasets.dataset_protocol import Variable
from zampy.datasets.dataset_protocol import copy_properties_file
from zampy.datasets.dataset_protocol import write_properties_file
from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP
from zampy.reference.variables import unit_registry


## Ignore missing class/method docstrings: they are implemented in the Dataset class.
# ruff: noqa: D102


class FaparLAI: # noqa: D101
name = "fapar-lai"
time_bounds = TimeBounds(np.datetime64("1998-04-10"), np.datetime64("2020-06-30"))
spatial_bounds = SpatialBounds(90, 180, -90, -180)
crs = "EPSG:4326"

raw_variables = [Variable(name="lai", unit=unit_registry.fraction)]
variable_names = ["leaf_area_index"]
variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names]

license = "non-standard"
license_url = (
"https://cds.climate.copernicus.eu/api/v2/terms/static/vito-proba-v.pdf"
)

bib = """
@misc{https://doi.org/10.24381/cds.7e59b01a,
doi = {10.24381/CDS.7E59B01A},
url = {https://cds.climate.copernicus.eu/doi/10.24381/cds.7e59b01a},
author = {{Copernicus Climate Change Service}},
title = {Leaf area index and fraction absorbed of photosynthetically active radiation 10-daily gridded data from 1981 to present},
publisher = {ECMWF},
year = {2018}
}
""" # noqa: E501

def __init__(self) -> None:
"""Init."""
pass

def download(
self,
download_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
variable_names: list[str],
overwrite: bool = False,
) -> bool:
validation.validate_download_request(
self,
download_dir,
time_bounds,
spatial_bounds,
variable_names,
)

download_folder = download_dir / self.name
download_folder.mkdir(parents=True, exist_ok=True)

url, api_key = cds_utils.cds_api_key("fapar-lai")

client = cdsapi.Client(
url=url,
key=api_key,
verify=True,
quiet=True,
)

years_months = get_year_month_pairs(time_bounds)
for year, month in tqdm(years_months):
print(f"Downloading FAPAR LAI data for {year}-{month}...")
download_fapar_lai(
client, year, month, download_folder, spatial_bounds, overwrite
)

write_properties_file(
download_folder, spatial_bounds, time_bounds, variable_names
)

return True

def ingest(
self,
download_dir: Path,
ingest_dir: Path,
overwrite: bool = False,
) -> bool:
download_folder = download_dir / self.name
ingest_folder = ingest_dir / self.name
ingest_folder.mkdir(parents=True, exist_ok=True)

zip_file_pattern = "*.zip"
zip_files = sorted(download_folder.glob(zip_file_pattern))

# Create tmp dir. See https://github.com/EcoExtreML/zampy/issues/36
# TODO: Fix in a nice way.
tmp_path = download_dir.parent / "tmp"
tmp_path.mkdir(parents=True, exist_ok=True)

# netCDF files follow CF-1.6, only unpacking the archives is required.
for file in zip_files:
with tempfile.TemporaryDirectory(dir=tmp_path) as _tmpdir:
tmpdir = Path(_tmpdir)

extract_fapar_zip(
zip_file=file,
ingest_folder=ingest_folder,
extract_dir=tmpdir,
overwrite=overwrite,
)

for ncfile in tmpdir.glob("*.nc"):
ingest_ncfile(ncfile, ingest_folder)

copy_properties_file(download_folder, ingest_folder)

return True

def load(
self,
ingest_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str, # should be deprecated.
variable_names: list[str],
) -> xr.Dataset:
files = list((ingest_dir / self.name).glob("*.nc"))

ds = xr.open_mfdataset(files, parallel=True)
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end))

target_dataset = create_regridding_ds(spatial_bounds, resolution)
ds = ds.regrid.linear(target_dataset)

return ds

def convert( # Will be removed, see issue #43.
self,
ingest_dir: Path,
convention: str | Path,
) -> bool:
converter.check_convention(convention)
ingest_folder = ingest_dir / self.name

data_file_pattern = f"{self.name}_*.nc"

data_files = list(ingest_folder.glob(data_file_pattern))

for file in data_files:
# start conversion process
print(f"Start processing file `{file.name}`.")
ds = xr.open_dataset(file, chunks={"latitude": 2000, "longitude": 2000})
ds = converter.convert(ds, dataset=self, convention=convention)

return True


def create_regridding_ds(
spatial_bounds: SpatialBounds, resolution: float
) -> xr.Dataset:
"""Create dataset to use with xarray-regrid regridding.

Args:
spatial_bounds: Spatial bounds of the new dataset.
resolution: Latitude and longitude resolution of the new dataset.

Returns:
The dataset ready to be used in regridding.
"""
new_grid = xarray_regrid.Grid(
north=spatial_bounds.north,
east=spatial_bounds.east,
south=spatial_bounds.south,
west=spatial_bounds.west,
resolution_lat=resolution,
resolution_lon=resolution,
)
return xarray_regrid.create_regridding_dataset(new_grid)


def get_year_month_pairs(time_bounds: TimeBounds) -> list[tuple[int, int]]:
"""Get the year and month pairs covering the input time bounds."""
start = pd.to_datetime(time_bounds.start)
end = pd.to_datetime(time_bounds.end)

year_month = []
for year in range(start.year, end.year + 1):
for month in range(1, 13):
before_first_month = year == start.year and month < start.month
after_last_month = year == end.year and month > end.month
if not before_first_month and not after_last_month:
year_month.append((year, month))

return year_month


def get_lai_days(year: int, month: int) -> list[str]:
"""Get the days that the FAPAR LAI is available for a certain year and month."""
if month in [1, 3, 5, 7, 8, 10, 12]:
return ["10", "20", "31"]
if month == 2:
if year in [2000, 2004, 2008, 2012, 2016, 2020]:
return ["10", "20", "29"]
else:
return ["10", "20", "28"]
return ["10", "20", "30"]


def download_fapar_lai(
client: cdsapi.Client,
year: int,
month: int,
download_dir: Path,
spatial_bounds: SpatialBounds | None = None,
overwrite: bool = False,
) -> None:
"""Download the FAPAR LAI data from CDS."""
request: dict[str, Any] = {
"format": "zip",
"variable": "lai",
"horizontal_resolution": "1km",
"product_version": "V3",
"satellite": "spot" if year < 2014 else "proba",
"sensor": "vgt",
"month": f"{month:0>2}",
"nominal_day": get_lai_days(year, month),
"year": f"{year}",
}

if spatial_bounds is not None:
request["area"] = [
spatial_bounds.north,
spatial_bounds.west,
spatial_bounds.south,
spatial_bounds.east,
]

fpath = download_dir / f"satellite-lai-fapar_{year}-{month}.zip"

retrieval = client.retrieve("satellite-lai-fapar", request)

cds_utils._check_and_download(retrieval, fpath, overwrite)


def ingest_ncfile(ncfile: Path, ingest_folder: Path) -> None:
"""Ingest the 'raw' netCDF file to the Zampy standard format."""
print(f"Converting file {ncfile.name}...")
ds = xr.open_dataset(ncfile, decode_times=False, chunks={"lat": 5000, "lon": 5000})
ds = ds.rename(
{
"LAI": "leaf_area_index",
"lat": "latitude",
"lon": "longitude",
}
)
ds["leaf_area_index"].attrs["units"] = "fraction"
ds[["leaf_area_index"]].to_netcdf(
path=ingest_folder / ncfile.name,
encoding={"leaf_area_index": {"zlib": True, "complevel": 3}},
)


def extract_fapar_zip(
zip_file: Path, ingest_folder: Path, extract_dir: Path, overwrite: bool
) -> None:
"""Extract the FAPAR LAI zip file, if not all files already exist."""
with zipfile.ZipFile(zip_file, "r") as zf:
file_list = zf.namelist()
if all((ingest_folder / fname).exists() for fname in file_list) and not overwrite:
print(f"Files '{file_list}' already exist, skipping...")
else:
shutil.unpack_archive(zip_file, extract_dir=extract_dir, format="zip")
1 change: 1 addition & 0 deletions src/zampy/reference/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def unit_registration() -> UnitRegistry:
Variable("latitude", unit=unit_registry.degree_north),
Variable("longitude", unit=unit_registry.degree_east),
Variable("elevation", unit=unit_registry.meter),
Variable("leaf_area_index", unit=unit_registry.fraction),
Variable("land_cover", unit=unit_registry.dimensionless),
)

Expand Down
Empty file.
Binary file not shown.
Loading