Skip to content

Commit

Permalink
Passing all tests except lint and examples
Browse files Browse the repository at this point in the history
  • Loading branch information
bdestombe committed Oct 16, 2023
1 parent 1825050 commit 74a0a8b
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 302 deletions.
6 changes: 0 additions & 6 deletions src/dtscalibration/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from dtscalibration.datastore import DataStore
from dtscalibration.datastore_utils import check_dims
from dtscalibration.datastore_utils import get_netcdf_encoding
from dtscalibration.datastore_utils import merge_double_ended
from dtscalibration.datastore_utils import shift_double_ended
from dtscalibration.datastore_utils import suggest_cable_shift_double_ended
from dtscalibration.io import open_datastore
from dtscalibration.io import open_mf_datastore
from dtscalibration.io import read_apsensing_files
from dtscalibration.io import read_sensornet_files
from dtscalibration.io import read_sensortran_files
Expand All @@ -18,9 +15,6 @@

__version__ = "2.0.0"
__all__ = [
"DataStore",
"open_datastore",
"open_mf_datastore",
"read_apsensing_files",
"read_sensornet_files",
"read_sensortran_files",
Expand Down
8 changes: 7 additions & 1 deletion src/dtscalibration/datastore_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __repr__(self):
# 'xarray' is prepended. so we remove it and add 'dtscalibration'
s = xr.core.formatting.dataset_repr(self._obj)
name_module = type(self._obj).__name__
preamble_new = "<dtscalibration.%s>" % name_module
preamble_new = f"<dtscalibration.{name_module}>"

# Add sections to new preamble
preamble_new += "\nSections:"
Expand Down Expand Up @@ -268,6 +268,12 @@ def get_default_encoding(self, time_chunks_from_key=None):

return encoding

def get_timeseries_keys(self):
"""
Returns a list of the keys of the time series variables.
"""
return [k for k, v in self._obj.data_vars.items() if v.dims == ("time",)]

def ufunc_per_section(
self,
sections=None,
Expand Down
64 changes: 12 additions & 52 deletions src/dtscalibration/datastore_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,45 +459,8 @@ def check_deprecated_kwargs(kwargs):
pass


# def check_timestep_allclose(ds: "DataStore", eps: float = 0.05) -> None:
# """
# Check if all timesteps are of equal size. For now it is not possible to calibrate
# over timesteps if the acquisition time of timesteps varies, as the Stokes variance
# would change over time.

# The acquisition time is stored for single ended measurements in userAcquisitionTime,
# for double ended measurements in userAcquisitionTimeFW and userAcquisitionTimeBW.

# Parameters
# ----------
# ds : DataStore
# eps : float
# Default accepts 1% of relative variation between min and max acquisition time.

# Returns
# -------
# """
# dt = ds["userAcquisitionTimeFW"].data
# dtmin = dt.min()
# dtmax = dt.max()
# dtavg = (dtmin + dtmax) / 2
# assert (dtmax - dtmin) / dtavg < eps, (
# "Acquisition time is Forward channel not equal for all time steps"
# )

# if "userAcquisitionTimeBW" in ds:
# dt = ds["userAcquisitionTimeBW"].data
# dtmin = dt.min()
# dtmax = dt.max()
# dtavg = (dtmin + dtmax) / 2
# assert (dtmax - dtmin) / dtavg < eps, (
# "Acquisition time Backward channel is not equal for all time steps"
# )
# pass


def get_netcdf_encoding(
ds: "DataStore", zlib: bool = True, complevel: int = 5, **kwargs
ds: xr.Dataset, zlib: bool = True, complevel: int = 5, **kwargs
) -> dict:
"""Get default netcdf compression parameters. The same for each data variable.
Expand Down Expand Up @@ -788,12 +751,12 @@ def get_params_from_pval_single_ended(


def merge_double_ended(
ds_fw: "DataStore",
ds_bw: "DataStore",
ds_fw: xr.Dataset,
ds_bw: xr.Dataset,
cable_length: float,
plot_result: bool = True,
verbose: bool = True,
) -> "DataStore":
) -> xr.Dataset:
"""
Some measurements are not set up on the DTS-device as double-ended
meausurements. This means that the two channels have to be merged manually.
Expand Down Expand Up @@ -858,11 +821,11 @@ def merge_double_ended(


def merge_double_ended_times(
ds_fw: "DataStore",
ds_bw: "DataStore",
ds_fw: xr.Dataset,
ds_bw: xr.Dataset,
verify_timedeltas: bool = True,
verbose: bool = True,
) -> tuple["DataStore", "DataStore"]:
) -> tuple[xr.Dataset, xr.Dataset]:
"""Helper for `merge_double_ended()` to deal with missing measurements. The
number of measurements of the forward and backward channels might get out
of sync if the device shuts down before the measurement of the last channel
Expand Down Expand Up @@ -997,8 +960,8 @@ def merge_double_ended_times(


def shift_double_ended(
ds: "DataStore", i_shift: int, verbose: bool = True
) -> "DataStore":
ds: xr.Dataset, i_shift: int, verbose: bool = True
) -> xr.Dataset:
"""
The cable length was initially configured during the DTS measurement. For double ended
measurements it is important to enter the correct length so that the forward channel and the
Expand Down Expand Up @@ -1031,8 +994,6 @@ def shift_double_ended(
ds2 : DataStore object
With a shifted x-axis
"""
from dtscalibration import DataStore

assert isinstance(i_shift, (int, np.integer))

nx = ds.x.size
Expand Down Expand Up @@ -1074,11 +1035,11 @@ def shift_double_ended(
if not_included and verbose:
print("I dont know what to do with the following data", not_included)

return DataStore(data_vars=d2_data, coords=d2_coords, attrs=ds.attrs)
return xr.Dataset(data_vars=d2_data, coords=d2_coords, attrs=ds.attrs)


def suggest_cable_shift_double_ended(
ds: "DataStore",
ds: xr.Dataset,
irange: npt.NDArray[np.int_],
plot_result: bool = True,
**fig_kwargs,
Expand Down Expand Up @@ -1107,8 +1068,7 @@ def suggest_cable_shift_double_ended(
Parameters
----------
ds : DataSore object
DataStore object that needs to be shifted
ds : Xarray Dataset
irange : array-like
a numpy array with data of type int. Containing all the shift index
that are tested.
Expand Down
182 changes: 0 additions & 182 deletions src/dtscalibration/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,188 +20,6 @@
from dtscalibration.io_utils import ziphandle_to_filepathlist


def open_datastore(
filename_or_obj,
group=None,
decode_cf=True,
mask_and_scale=None,
decode_times=True,
concat_characters=True,
decode_coords=True,
engine=None,
chunks=None,
lock=None,
cache=None,
drop_variables=None,
backend_kwargs=None,
load_in_memory=False,
**kwargs,
):
"""Load and decode a datastore from a file or file-like object. Most
arguments are passed to xarray.open_dataset().
Parameters
----------
filename_or_obj : str, Path, file or xarray.backends.*Dataset
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). File-like objects are opened
with scipy.io.netcdf (only netCDF3 supported).
group : str, optional
Path to the netCDF4 group in the given file to open (only works for
netCDF4 files).
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. mask_and_scale defaults to True except for the
pseudonetcdf backend.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
decode_coords : bool, optional
If True, decode the 'coordinates' attribute to identify coordinates in
the resulting dataset.
engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio',
'pseudonetcdf'}, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
'netcdf4'.
chunks : int or dict, optional
If chunks is provided, it used to load the new dataset into dask
arrays. ``chunks={}`` loads the dataset with dask using a single
chunk for all arrays.
lock : False, True or threading.Lock, optional
If chunks is provided, this argument is passed on to
:py:func:`dask.array.from_array`. By default, a global lock is
used when reading data from netCDF files with the netcdf4 and h5netcdf
engines to avoid issues with concurrent access when using dask's
multithreaded backend.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
drop_variables: string or iterable, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
backend_kwargs: dictionary, optional
A dictionary of keyword arguments to pass on to the backend. This
may be useful when backend options would improve performance or
allow user control of dataset processing.
Returns
-------
dataset : Dataset
The newly created dataset.
See Also
--------
xarray.open_dataset
xarray.load_dataset
"""

xr_kws = inspect.signature(xr.open_dataset).parameters.keys()

ds_kwargs = {k: v for k, v in kwargs.items() if k not in xr_kws}

if chunks is None:
chunks = {}

with xr.open_dataset(
filename_or_obj,
group=group,
decode_cf=decode_cf,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
concat_characters=concat_characters,
decode_coords=decode_coords,
engine=engine,
chunks=chunks,
lock=lock,
cache=cache,
drop_variables=drop_variables,
backend_kwargs=backend_kwargs,
) as ds_xr:
ds = Dataset(
data_vars=ds_xr.data_vars,
coords=ds_xr.coords,
attrs=ds_xr.attrs,
**ds_kwargs,
)

# to support deprecated st_labels
ds = ds.rename_labels(assertion=False)

if load_in_memory:
if "cache" in kwargs:
raise TypeError("cache has no effect in this context")
return ds.load()

else:
return ds


def open_mf_datastore(
path=None, paths=None, combine="by_coords", load_in_memory=False, **kwargs
):
"""
Open a datastore from multiple netCDF files. This script assumes the
datastore was split along the time dimension. But only variables with a
time dimension should be concatenated in the time dimension. Other
options from xarray do not support this.
Parameters
----------
combine : {'by_coords', 'nested'}, optional
Leave it at by_coords
path : str
A file path to the stored netcdf files with an asterisk in the
filename to list all. Ensure you have leading zeros in the file
numbering.
paths : list
Define you own list of file paths.
Returns
-------
dataset : Dataset
The newly created dataset.
"""
from xarray.backends.api import open_mfdataset

if paths is None:
paths = sorted(glob.glob(path))
assert paths, "No files match found with: " + path

with open_mfdataset(paths=paths, combine=combine, **kwargs) as xds:
ds = Dataset(data_vars=xds.data_vars, coords=xds.coords, attrs=xds.attrs)

# to support deprecated st_labels
ds = ds.rename_labels(assertion=False)

if load_in_memory:
if "cache" in kwargs:
raise TypeError("cache has no effect in this context")
return ds.load()

else:
return ds


def read_silixa_files(
filepathlist=None,
directory=None,
Expand Down
Loading

0 comments on commit 74a0a8b

Please sign in to comment.