Skip to content

Commit

Permalink
Added support to select location and time range (#50)
Browse files Browse the repository at this point in the history
* get forcing file from location

* check location format for bbox

* get forcing file with given site name

* add start time and end time

* update forcing_filename in all relevant functions

* slice forcing file based on the given time range

* replace NumberOfTimeSteps with start/end time

* fix save module

* update run model notebook

* fix save module

* fix typo

* fix old tests

* fix linter

* add tests for locations and time check

* add test for get forcing file

* test to check minutes

* add doc string

* fix linter

* Update PyStemmusScope/forcing_io.py

Co-authored-by: Bart Schilperoort <[email protected]>

* revise regex and use parametrize for test

* silent linter

* Refactored loc parsing with re.findall. Support +/- numbers

* Added support for "NA" start/end time input.

* Pin prospector version

* Fix linter/sonarcloud/pyroma issues

* Apply Sarah's suggestions

Co-authored-by: SarahAlidoost <[email protected]>

* Added "validate config" function to config_io

* Refactoring to remove "forcing_filename" kwarg

* Last changes based on Sarah's review, Updated notebook

* Apply Sarah's suggestions from code review

Co-authored-by: SarahAlidoost <[email protected]>

Co-authored-by: Bart Schilperoort <[email protected]>
Co-authored-by: Bart Schilperoort <[email protected]>
Co-authored-by: SarahAlidoost <[email protected]>
  • Loading branch information
4 people authored Dec 21, 2022
1 parent 7e008ea commit 031b99c
Show file tree
Hide file tree
Showing 18 changed files with 459 additions and 175 deletions.
36 changes: 29 additions & 7 deletions PyStemmusScope/config_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import os
import shutil
import time
from pathlib import Path
from typing import Union
from . import utils


Expand All @@ -29,9 +31,24 @@ def read_config(path_to_config_file):
(key, val) = line.split("=")
config[key] = val.rstrip('\n')

validate_config(config)

return config

def create_io_dir(forcing_filename, config):

def validate_config(config: Union[Path, dict]):
if isinstance(config, Path):
config = read_config(config)
elif not isinstance(config, dict):
raise ValueError("The input to validate_config should be either a Path or dict"
f" object, but a {type(config)} object was passed.")

# TODO: add check if the input data directories/file exist, and return clear error to user.
_ = utils.check_location_fmt(config["Location"])
utils.check_time_fmt(config["StartTime"], config["EndTime"])


def create_io_dir(config):
"""Create input directory and copy required files.
Work flow executor to create work directory and all sub-directories.
Expand All @@ -41,7 +58,12 @@ def create_io_dir(forcing_filename, config):
"""
# get start time with the format Y-M-D-HM
timestamp = time.strftime('%Y-%m-%d-%H%M')
station_name = forcing_filename.split('_')[0]

loc, fmt = utils.check_location_fmt(config["Location"])
if fmt == "site":
station_name = loc
else:
raise NotImplementedError()

# create input directory
work_dir = utils.to_absolute_path(config['WorkDir'])
Expand All @@ -60,11 +82,12 @@ def create_io_dir(forcing_filename, config):
logger.info("%s", message)

# update config file for ForcingFileName and InputPath
config_file_path = _update_config_file(forcing_filename, input_dir, output_dir,
config_file_path = _update_config_file(input_dir, output_dir,
config, station_name, timestamp)

return str(input_dir), str(output_dir), config_file_path


def _copy_data(input_dir, config):
"""Copy required data to the work directory.
Expand All @@ -83,7 +106,8 @@ def _copy_data(input_dir, config):
# copy input_data.xlsx
shutil.copy(str(config["input_data"]), str(input_dir))

def _update_config_file(nc_file, input_dir, output_dir, config, station_name, timestamp): #pylint: disable=too-many-arguments

def _update_config_file(input_dir, output_dir, config, station_name, timestamp):
"""Update config file for each station.
Create config file for each forcing/station under the work directory.
Expand All @@ -102,9 +126,7 @@ def _update_config_file(nc_file, input_dir, output_dir, config, station_name, ti
config_file_path = input_dir / f"{station_name}_{timestamp}_config.txt"
with open(config_file_path, 'w', encoding="utf8") as f:
for key, value in config.items():
if key == "ForcingFileName":
update_entry = f"{key}={nc_file}\n"
elif key == "InputPath":
if key == "InputPath":
update_entry = f"{key}={str(input_dir)}/\n"
elif key == "OutputPath":
update_entry = f"{key}={str(output_dir)}/\n"
Expand Down
64 changes: 53 additions & 11 deletions PyStemmusScope/forcing_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ def _write_matlab_ascii(fname, data, ncols):
np.savetxt(fname, data, multi_fmt)


def read_forcing_data(forcing_file):
def read_forcing_data(forcing_file, start_time, end_time):
"""Reads the forcing data from the provided netCDF file, and applies the required
unit conversions before returning the read data.
Args:
forcing_file (Path): Path to the netCDF file containing the forcing data
start_time (str): Start of time range in ISO format string e.g. 'YYYY-MM-DDTHH:MM:SS'.
end_time (str): End of time range in ISO format string e.g. 'YYYY-MM-DDTHH:MM:SS'.
Returns:
dict: Dictionary containing the different variables required by STEMMUS_SCOPE
Expand All @@ -38,6 +40,14 @@ def read_forcing_data(forcing_file):
# remove the x and y coordinates from the data variables to make the numpy arrays 1D
ds_forcing = ds_forcing.squeeze(['x', 'y'])

# check if time range is covered by forcing
# if so, return a subset of forcing matching the given time range
ds_forcing = _slice_forcing_file(
ds_forcing,
start_time,
end_time,
)

data = {}

# Expected time format is days (as floating point) since Jan 1st 00:00.
Expand Down Expand Up @@ -138,7 +148,7 @@ def write_meteo_file(data, fname):
_write_matlab_ascii(fname, meteo_file_data, ncols=len(meteo_data_vars))


def prepare_global_variables(data, input_path, config):
def prepare_global_variables(data, input_path):
"""Function to read and calculate global variables for STEMMUS_SCOPE from the
forcing data. Data will be written to a Matlab binary file (v7.3), under the name
'forcing_globals.mat' in the specified input directory.
Expand All @@ -149,10 +159,7 @@ def prepare_global_variables(data, input_path, config):
input_path (Path): Path to which the file should be written to.
config (dict): The PyStemmusScope configuration dictionary.
"""
if config['NumberOfTimeSteps'] != 'NA':
total_duration = min(int(config['NumberOfTimeSteps']), data['total_timesteps'])
else:
total_duration = data['total_timesteps']
total_duration = data['total_timesteps']

matfile_vars = ['latitude', 'longitude', 'elevation', 'IGBP_veg_long',
'reference_height', 'canopy_height', 'DELT', 'sitename']
Expand All @@ -165,8 +172,11 @@ def prepare_global_variables(data, input_path, config):


def prepare_forcing(config):
"""Function to prepare the forcing files required by STEMMUS_SCOPE. The input
directory should be taken from the model configuration file.
"""Function to prepare the forcing files required by STEMMUS_SCOPE.
The input directory should be taken from the model configuration file.
A subset of forcing file will be generated if the time range is covered
by the time of existing forcing file.
Args:
config (dict): The PyStemmusScope configuration dictionary.
Expand All @@ -175,8 +185,8 @@ def prepare_forcing(config):
input_path = Path(config["InputPath"])

# Read the required data from the forcing file into a dictionary
forcing_file = Path(config["ForcingPath"]) / config["ForcingFileName"]
data = read_forcing_data(forcing_file)
forcing_file = utils.get_forcing_file(config)
data = read_forcing_data(forcing_file, config["StartTime"], config["EndTime"])

# Write the single-column ascii '.dat' files to the input directory
write_dat_files(data, input_path)
Expand All @@ -189,4 +199,36 @@ def prepare_forcing(config):

# Write the remaining variables (without time dependency) to the matlab v7.3
# file 'forcing_globals.mat'
prepare_global_variables(data, input_path, config)
prepare_global_variables(data, input_path)


def _slice_forcing_file(ds_forcing, start_time, end_time):
"""Get the subset of forcing file based on time range in config
Also check if the desired time range is covered by forcing file.
Args:
ds_forcing (xr.Dataset): Dataset of forcing file.
start_time (str): Start of time range in ISO format string e.g. 'YYYY-MM-DDTHH:MM:SS'.
If "NA", start time will be the first timestamp of the forcing input data.
end_time (str): End of time range in ISO format string e.g. 'YYYY-MM-DDTHH:MM:SS'.
If "NA", end time will be the last timestamp of the forcing input data.
Returns:
Forcing dataset, sliced with the start and end time.
"""
start_time = None if start_time == "NA" else np.datetime64(start_time)
end_time = None if end_time == "NA" else np.datetime64(end_time)

start_time_forcing = ds_forcing.coords["time"].values[0]
end_time_forcing = ds_forcing.coords["time"].values[-1]

start_time_valid = start_time >= start_time_forcing if start_time else True
end_time_valid = end_time <= end_time_forcing if end_time else True
if not (start_time_valid and end_time_valid):
raise ValueError(
f"Given time range (from {start_time} to {end_time}) cannot be covered by"
f"the time range of forcing file (from {start_time_forcing} to "
f"{end_time_forcing}).")

return ds_forcing.sel(time=slice(start_time, end_time))
39 changes: 10 additions & 29 deletions PyStemmusScope/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,16 @@
`STEMMUS_SCOPE_Processing repository <https://github.com/EcoExtreML/STEMMUS_SCOPE_Processing>`_
"""

import logging
from pathlib import Path
from typing import Dict
from typing import List
from typing import Union
import numpy as np
import pandas as pd
import xarray as xr
from PyStemmusScope import config_io
from PyStemmusScope import forcing_io
from PyStemmusScope import utils
from . import variable_conversion as vc


Expand Down Expand Up @@ -64,24 +63,6 @@ def _select_forcing_variables(forcing_dict: Dict, forcing_var: str, alma_var: st
return data_array


def _shorten_data_array(data: Union[xr.DataArray, xr.Dataset], time_steps: str)-> Union[xr.DataArray, xr.Dataset]:
"""Shorten data based on time_steps.
Args:
data(xr.DataArray or xr.Dataset): data to be shortend.
time_steps(str): number of time steps to shorten.
Returns:
xr.DataArray or xr.Dataset: subset of data with the lenght of time equal to time_steps.
"""

if time_steps != "NA":
time_length = int(time_steps)
data = data.isel(time=np.arange(0, time_length))

return data


def _prepare_soil_data(file_name: str, var_name: str, time: List) -> xr.DataArray:
"""Return simulated soil temperature and soil moisture as `xr.DataArray`.
Expand Down Expand Up @@ -250,7 +231,7 @@ def _update_dataset_attrs_dims(dataset: xr.Dataset, forcing_dict: Dict) -> xr.Da

return dataset


#pylint: disable=too-many-locals
def to_netcdf(config_file: str, cf_filename: str) -> str:
"""Save csv files generated by Stemmus_Scope model to a netcdf file using
information provided by ALMA conventions.
Expand All @@ -262,9 +243,8 @@ def to_netcdf(config_file: str, cf_filename: str) -> str:
Returns:
str: path to a csv file under the output directory.
"""

# read config file
config = config_io.read_config(config_file)
forcing_filename = utils.get_forcing_file(config)

# list of required forcing variables, Alma_short_name: forcing_io_name, # model_name
var_names = {
Expand All @@ -280,11 +260,13 @@ def to_netcdf(config_file: str, cf_filename: str) -> str:

# read forcing file into a dict
forcing_dict = forcing_io.read_forcing_data(
Path(config["ForcingPath"]) / config["ForcingFileName"]
forcing_filename,
config["StartTime"],
config["EndTime"],
)

# get time info
time = _shorten_data_array(forcing_dict["time"], config["NumberOfTimeSteps"])
time = forcing_dict["time"].values

# read convention file
conventions = pd.read_csv(cf_filename)
Expand All @@ -298,14 +280,13 @@ def to_netcdf(config_file: str, cf_filename: str) -> str:
if alma_name in var_names:
# select data
data_array = _select_forcing_variables(forcing_dict, var_names[alma_name], alma_name)
data_array = _shorten_data_array(data_array, config["NumberOfTimeSteps"])

# create data array
elif alma_name in {"SoilTemp", "SoilMoist"}:
data_array = _prepare_soil_data(file_name, alma_name, time.values)
data_array = _prepare_soil_data(file_name, alma_name, time)
else:
data_array = _prepare_simulated_data(
file_name, df["short_name_STEMMUS-SCOPE"], alma_name, time.values
file_name, df["short_name_STEMMUS-SCOPE"], alma_name, time
)

# update attributes of array
Expand All @@ -328,6 +309,6 @@ def to_netcdf(config_file: str, cf_filename: str) -> str:

# # save to nc file
nc_filename = Path(config["OutputPath"]) / f"{Path(config['OutputPath']).stem}_STEMMUS_SCOPE.nc"

dataset.to_netcdf(path= nc_filename)

return str(nc_filename)
4 changes: 2 additions & 2 deletions PyStemmusScope/soil_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,10 @@ def prepare_soil_data(config):
config (dict): The PyStemmusScope configuration dictionary.
"""

forcing_file = Path(config["ForcingPath"]) / config["ForcingFileName"]
forcing_file = utils.get_forcing_file(config)

# Data missing at ID-Pag site. See github.com/EcoExtreML/STEMMUS_SCOPE/issues/77
if config["ForcingFileName"].startswith("ID"):
if config["Location"].startswith("ID"):
lat, lon = -1., 112.
else:
lat, lon = _retrieve_latlon(forcing_file)
Expand Down
38 changes: 20 additions & 18 deletions PyStemmusScope/stemmus_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import subprocess
from pathlib import Path
from typing import Dict
from typing import Tuple
from . import config_io
from . import forcing_io
from . import soil_io
Expand Down Expand Up @@ -111,9 +112,10 @@ def __init__(self, config_file: str, model_src_path: str, interpreter: str = Non
def setup(
self,
WorkDir: str = None,
ForcingFileName: str = None,
NumberOfTimeSteps: str = None,
) -> str:
Location: str = None,
StartTime: str = None,
EndTime: str = None,
) -> Tuple[str, str]:
"""Configure model run.
1. Creates config file and input/output directories based on the config template.
Expand All @@ -122,34 +124,35 @@ def setup(
Args:
WorkDir: path to a directory where input/output directories should be created.
ForcingFileName: forcing file name. Forcing file should be in netcdf format.
NumberOfTimeSteps: total number of time steps in which model runs. It can be
`NA` or a number. Example `10` runs the model for 10 time steps.
StartTime: Start time of the model run. It must be in
ISO format (e.g. 2007-01-01T00:00).
EndTime: End time of the model run. It must be in ISO format (e.g. 2007-01-01T00:00).
Returns:
Paths to config file and input/output directories
Path to the config file
"""
# update config template if needed
if WorkDir:
self._config["WorkDir"] = WorkDir

if ForcingFileName:
self._config["ForcingFileName"] = ForcingFileName
if Location:
self._config["Location"] = Location

if NumberOfTimeSteps:
self._config["NumberOfTimeSteps"] = NumberOfTimeSteps
if StartTime:
self._config["StartTime"] = StartTime

if EndTime:
self._config["EndTime"] = EndTime

# validate config *before* directory creation
config_io.validate_config(self._config)

# create customized config file and input/output directories for model run
_, _, self.cfg_file = config_io.create_io_dir(
self._config["ForcingFileName"], self._config
)
_, _, self.cfg_file = config_io.create_io_dir(self._config)

# read the run config file
self._config = config_io.read_config(self.cfg_file)

# prepare forcing data
forcing_io.prepare_forcing(self._config)

# prepare soil data
soil_io.prepare_soil_data(self._config)

return str(self.cfg_file)
Expand Down Expand Up @@ -192,7 +195,6 @@ def run(self) -> str:
result = _run_sub_process(args, self.model_src)
return result


@property
def config(self) -> Dict:
"""Return the configurations for this model."""
Expand Down
Loading

0 comments on commit 031b99c

Please sign in to comment.