From 62cf7819bf5bde3952fe1786b2c491abeaca6b64 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 21 Jul 2023 11:32:43 +0200 Subject: [PATCH 01/16] Also verify timedeltas between forward and backward when merging single ended to double ended --- src/dtscalibration/datastore_utils.py | 36 ++++++++++++++++++++++++--- tests/test_datastore.py | 2 +- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 1e1d3a79..692a322e 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -156,7 +156,7 @@ def merge_double_ended(ds_fw, ds_bw, cable_length, plot_result=True, verbose=Tru return ds -def merge_double_ended_times(ds_fw, ds_bw, verbose=True): +def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True): """Helper for `merge_double_ended()` to deal with missing measurements. The number of measurements of the forward and backward channels might get out of sync if the device shuts down before the measurement of the last channel @@ -189,6 +189,10 @@ def merge_double_ended_times(ds_fw, ds_bw, verbose=True): DataStore object representing the forward measurement channel ds_bw : DataSore object DataStore object representing the backward measurement channel + verify_timedeltas : bool + Check whether times between forward and backward measurements are similar to those of neighboring measurements + verbose : bool + Print additional information to screen Returns ------- @@ -205,8 +209,16 @@ def merge_double_ended_times(ds_fw, ds_bw, verbose=True): assert ds_fw.attrs['forwardMeasurementChannel'] < ds_bw.attrs['forwardMeasurementChannel'], \ "ds_fw and ds_bw are swapped" + # Are all dt's within 1.5 seconds from one another? if (ds_bw.time.size == ds_fw.time.size) and np.all(ds_bw.time.values > ds_fw.time.values): - return ds_fw, ds_bw + if verify_timedeltas: + dt_ori = (ds_bw.time.values - ds_fw.time.values) / np.array(1000000000, dtype='timedelta64[ns]') + dt_all_close = np.allclose(dt_ori, dt_ori[0], atol=1.5, rtol=0.) + else: + dt_all_close = True + + if dt_all_close: + return ds_fw, ds_bw iuse_chfw = list() iuse_chbw = list() @@ -232,7 +244,25 @@ def merge_double_ended_times(ds_fw, ds_bw, verbose=True): if verbose: print(f"Missing forward measurement beween {ds_bw.time.values[ind]} and {ds_bw.time.values[ind_next]}") - return ds_fw.isel(time=iuse_chfw), ds_bw.isel(time=iuse_chbw) + # throw out is dt differs from its neighbors + if verify_timedeltas: + dt = (ds_bw.isel(time=iuse_chbw).time.values - ds_fw.isel(time=iuse_chfw).time.values) /\ + np.array(1000000000, dtype='timedelta64[ns]') + leaveout = np.zeros_like(dt, dtype=bool) + leaveout[1:-1] = np.isclose(dt[:-2], dt[2:], atol=1.5, rtol=0.) * ~np.isclose(dt[:-2], dt[1:-1], atol=1.5, rtol=0.) + iuse_chfw2 = np.array(iuse_chfw)[~leaveout] + iuse_chbw2 = np.array(iuse_chbw)[~leaveout] + + if verbose: + for itfw, itbw in zip(np.array(iuse_chfw)[leaveout], np.array(iuse_chbw)[leaveout]): + print(f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values} do not " + f"belong together as their timedelta is larger than their neighboring timedeltas. Thrown out.") + + else: + iuse_chfw2 = iuse_chfw + iuse_chbw2 = iuse_chbw + + return ds_fw.isel(time=iuse_chfw2), ds_bw.isel(time=iuse_chbw2) # pylint: disable=too-many-locals diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 8f1e3534..11aaf66a 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -649,7 +649,7 @@ def test_merge_double_ended(): ([1, 2], [], [1, 2]), ([], [1, 2], [1, 2]), ([1], [2], [1, 2]), - pytest.param([2], [1], [1, 2], marks=pytest.mark.xfail) + ([2], [1], [1, 2]) ]) def test_merge_double_ended_times(inotinfw, inotinbw, inotinout): """ From 17d796335ca93eec21fbffe75f3cdb8a8a1c9c5e Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 19:41:53 +0200 Subject: [PATCH 02/16] Minor update to verification_timedeltas in merge_double_ended_times timedelta64[ns] to timedelta64[s] Co-authored-by: Bart Schilperoort --- src/dtscalibration/datastore_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 692a322e..20e87227 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -247,7 +247,7 @@ def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True) # throw out is dt differs from its neighbors if verify_timedeltas: dt = (ds_bw.isel(time=iuse_chbw).time.values - ds_fw.isel(time=iuse_chfw).time.values) /\ - np.array(1000000000, dtype='timedelta64[ns]') + np.timedelta64(1, "s") leaveout = np.zeros_like(dt, dtype=bool) leaveout[1:-1] = np.isclose(dt[:-2], dt[2:], atol=1.5, rtol=0.) * ~np.isclose(dt[:-2], dt[1:-1], atol=1.5, rtol=0.) iuse_chfw2 = np.array(iuse_chfw)[~leaveout] From 3606e85eed6de8b0d4275a8fb5831ab33a87ad7a Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 19:48:52 +0200 Subject: [PATCH 03/16] Minor update to verification_timedeltas in merge_double_ended_times Rearrange print statements. Suggested by @BSchilperoort Co-authored-by: Bart Schilperoort --- src/dtscalibration/datastore_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 20e87227..7be2a056 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -255,8 +255,11 @@ def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True) if verbose: for itfw, itbw in zip(np.array(iuse_chfw)[leaveout], np.array(iuse_chbw)[leaveout]): - print(f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values} do not " - f"belong together as their timedelta is larger than their neighboring timedeltas. Thrown out.") + print( + "The following measurements do not belong together, as the time difference\n" + "between the\forward and backward measurements is more than 1.5 seconds\n" + "larger than the neighboring measurements.\n" + f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values}" else: iuse_chfw2 = iuse_chfw From 17ee04d74ef999aef4427c47ad81b8a9e4e765c4 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 19:52:47 +0200 Subject: [PATCH 04/16] Use brackets instead of linebreaks --- src/dtscalibration/datastore_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 7be2a056..282cf435 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -246,8 +246,9 @@ def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True) # throw out is dt differs from its neighbors if verify_timedeltas: - dt = (ds_bw.isel(time=iuse_chbw).time.values - ds_fw.isel(time=iuse_chfw).time.values) /\ - np.timedelta64(1, "s") + dt = ( + (ds_bw.isel(time=iuse_chbw).time.values - ds_fw.isel(time=iuse_chfw).time.values) / + np.timedelta64(1, "s")) leaveout = np.zeros_like(dt, dtype=bool) leaveout[1:-1] = np.isclose(dt[:-2], dt[2:], atol=1.5, rtol=0.) * ~np.isclose(dt[:-2], dt[1:-1], atol=1.5, rtol=0.) iuse_chfw2 = np.array(iuse_chfw)[~leaveout] From a6b374d8c34add0cf0d79451e27e59f13f477fdf Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 19:54:52 +0200 Subject: [PATCH 05/16] Forgotten closing bracket in print statement --- src/dtscalibration/datastore_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 282cf435..f5cb92c7 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -260,7 +260,7 @@ def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True) "The following measurements do not belong together, as the time difference\n" "between the\forward and backward measurements is more than 1.5 seconds\n" "larger than the neighboring measurements.\n" - f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values}" + f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values}") else: iuse_chfw2 = iuse_chfw From e53bde69be513834a4cfbae6ddb18728b60509b3 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 22 Jul 2023 18:23:55 +0200 Subject: [PATCH 06/16] Remove restrictions for xarray and pandas versions. Removed resample function. The resample function from dtscalibration was blocking the ability to upgrade to recent versions of xarray and pandas. Resampling using the xarray functions is now proposed, see example notebook 2. See #167 --- ...unctions_slice_mean_max_std_resample.ipynb | 499 +++++++++++++++++- pyproject.toml | 4 +- src/dtscalibration/datastore.py | 93 ---- 3 files changed, 473 insertions(+), 123 deletions(-) diff --git a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb index 1e61a3a9..628f7ab6 100644 --- a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb +++ b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:57.302425Z", @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:58.948063Z", @@ -50,7 +50,19 @@ "shell.execute_reply": "2022-04-06T08:08:59.144710Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files were found, each representing a single timestep\n", + "4 recorded vars were found: LAF, ST, AST, TMP\n", + "Recorded at 1461 points along the cable\n", + "The measurement is single ended\n", + "Reading the data from disk\n" + ] + } + ], "source": [ "filepath = os.path.join(\"..\", \"..\", \"tests\", \"data\", \"single_ended\")\n", "\n", @@ -67,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.171097Z", @@ -76,14 +88,428 @@ "shell.execute_reply": "2022-04-06T08:08:59.200765Z" } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'st' (x: 1461, time: 3)>\n",
+       "array([[-8.05791e-01,  4.28741e-01, -5.13021e-01],\n",
+       "       [-4.58870e-01, -1.24484e-01,  9.68469e-03],\n",
+       "       [ 4.89174e-01, -9.57734e-02,  5.62837e-02],\n",
+       "       ...,\n",
+       "       [ 4.68457e+01,  4.72201e+01,  4.79139e+01],\n",
+       "       [ 3.76634e+01,  3.74649e+01,  3.83160e+01],\n",
+       "       [ 2.79879e+01,  2.78331e+01,  2.88055e+01]])\n",
+       "Coordinates:\n",
+       "  * x                  (x) float64 -80.74 -80.62 -80.49 ... 104.6 104.7 104.8\n",
+       "    filename           (time) <U31 'channel 2_20180504132202074.xml' ... 'cha...\n",
+       "    filename_tstamp    (time) int64 20180504132202074 ... 20180504132303723\n",
+       "    timestart          (time) datetime64[ns] 2018-05-04T12:22:02.710000 ... 2...\n",
+       "    timeend            (time) datetime64[ns] 2018-05-04T12:22:32.710000 ... 2...\n",
+       "  * time               (time) datetime64[ns] 2018-05-04T12:22:17.710000 ... 2...\n",
+       "    acquisitiontimeFW  (time) timedelta64[ns] 00:00:30 00:00:30 00:00:30\n",
+       "Attributes:\n",
+       "    name:         st\n",
+       "    description:  Stokes intensity\n",
+       "    units:        -
" + ], + "text/plain": [ + "\n", + "array([[-8.05791e-01, 4.28741e-01, -5.13021e-01],\n", + " [-4.58870e-01, -1.24484e-01, 9.68469e-03],\n", + " [ 4.89174e-01, -9.57734e-02, 5.62837e-02],\n", + " ...,\n", + " [ 4.68457e+01, 4.72201e+01, 4.79139e+01],\n", + " [ 3.76634e+01, 3.74649e+01, 3.83160e+01],\n", + " [ 2.79879e+01, 2.78331e+01, 2.88055e+01]])\n", + "Coordinates:\n", + " * x (x) float64 -80.74 -80.62 -80.49 ... 104.6 104.7 104.8\n", + " filename (time) " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "ds[\"tmp\"].plot(figsize=(12, 8));" ] @@ -111,7 +548,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.267698Z", @@ -129,7 +566,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.275670Z", @@ -147,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.281530Z", @@ -173,7 +610,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.290091Z", @@ -190,7 +627,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.296109Z", @@ -213,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.302128Z", @@ -237,7 +674,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.308603Z", @@ -254,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.314626Z", @@ -273,23 +710,29 @@ "metadata": {}, "source": [ "## 4 Downsample (time dimension)\n", - "We currently have measurements at 3 time steps, with 30.001 seconds inbetween. For our next exercise we would like to down sample the measurements to 2 time steps with 47 seconds inbetween. The calculated variances are not valid anymore. We use the function `resample_datastore`." + "We currently have measurements at 3 time steps, with 30.001 seconds inbetween. For our next exercise we would like to down sample the measurements to 2 time steps with 47 seconds inbetween. The calculated variances are not valid anymore. We use the function `resample` from xarray." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.320236Z", - "iopub.status.busy": "2022-04-06T08:08:59.320029Z", - "iopub.status.idle": "2022-04-06T08:08:59.341833Z", - "shell.execute_reply": "2022-04-06T08:08:59.341341Z" - } - }, + "execution_count": 28, + "metadata": {}, "outputs": [], "source": [ - "ds_resampled = ds.resample_datastore(how=\"mean\", time=\"47S\")" + "# We use the logic from xarray to resample. However, it returns an xarray dataset type\n", + "import xarray as xr\n", + "ds_xarray = xr.Dataset(ds).resample(time=\"47S\").mean()\n", + "\n", + "# Therefore we convert it back to the dtscalibration Datastore type.\n", + "from dtscalibration import DataStore\n", + "ds_resampled2 = DataStore(ds_xarray)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the resample function from dtscalibration has been removed in v2.0.1. The above example works versions from before 2.0.1 as well. Starting with version 2.0.1 the `xr.Dataset(ds).resample()` may become `ds.resample()`." ] }, { @@ -355,7 +798,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -369,7 +812,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.11" + "version": "3.10.10" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 4aa15779..28eff9f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ classifiers = [ ] dependencies = [ "numpy", - "xarray<=2022.03.0", + "xarray", "pyyaml", "xmltodict", "scipy", @@ -63,7 +63,7 @@ dependencies = [ "toolz", "matplotlib", "netCDF4<=1.5.8", - "pandas<2", + "pandas", ] dynamic = ["version"] diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 4e92a3af..b2f854d4 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -373,99 +373,6 @@ def timeseries_keys(self): time_dim = self.get_time_dim() return [k for k, v in self.data_vars.items() if v.dims == (time_dim,)] - def resample_datastore( - self, - how, - freq=None, - dim=None, - skipna=None, - closed=None, - label=None, - origin='start_day', - offset=None, - keep_attrs=True, - **indexer): - """Returns a resampled DataStore. Always define the how. - Handles both downsampling and upsampling. If any intervals contain no - values from the original object, they will be given the value ``NaN``. - Parameters - ---------- - freq - dim - how : str - Any function that is available via groupby. E.g., 'mean' - http://pandas.pydata.org/pandas-docs/stable/groupby.html#groupby - -dispatch - skipna : bool, optional - Whether to skip missing values when aggregating in downsampling. - closed : 'left' or 'right', optional - Side of each interval to treat as closed. - label : 'left or 'right', optional - Side of each interval to use for labeling. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '24H' frequency, base could - range from 0 through 23. - keep_attrs : bool, optional - If True, the object's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new - object will be returned without attributes. - **indexer : {dim: freq} - Dictionary with a key indicating the dimension name to resample - over and a value corresponding to the resampling frequency. - Returns - ------- - resampled : same type as caller - This object resampled. - """ - import pandas as pd - from xarray.core.dataarray import DataArray - - RESAMPLE_DIM = '__resample_dim__' - - if (freq and indexer) or (dim and indexer): - raise TypeError( - "If passing an 'indexer' then 'dim' " - "and 'freq' should not be used") - - if indexer: - dim, freq = indexer.popitem() - - if isinstance(dim, str): - dim = self[dim] - else: - raise TypeError( - "Dimension name should be a string; " - "was passed %r" % dim) - - if how is None: - how = 'mean' - - group = DataArray(dim.data, [(dim.dims, dim.data)], name=RESAMPLE_DIM) - grouper = pd.Grouper( - freq=freq, - how=how, - closed=closed, - label=label, - origin=origin, - offset=offset) - gb = self._groupby_cls(self, group, grouper=grouper) - if isinstance(how, str): - f = getattr(gb, how) - if how in ['first', 'last']: - result = f(skipna=skipna, keep_attrs=False) - elif how == 'count': - result = f(dim=dim.name, keep_attrs=False) - else: - result = f(dim=dim.name, skipna=skipna, keep_attrs=False) - else: - result = gb.reduce(how, dim=dim.name, keep_attrs=False) - result = result.rename({RESAMPLE_DIM: dim.name}) - - attrs = self.attrs if keep_attrs else None - return DataStore( - data_vars=result.data_vars, coords=result.coords, attrs=attrs) - def to_netcdf( self, path=None, From cb076450f74e3943d0be74133846c1ea465746fd Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 22 Jul 2023 18:25:48 +0200 Subject: [PATCH 07/16] Removed output example notebook 2 --- ...unctions_slice_mean_max_std_resample.ipynb | 469 +----------------- 1 file changed, 16 insertions(+), 453 deletions(-) diff --git a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb index 628f7ab6..1d3d883a 100644 --- a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb +++ b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:57.302425Z", @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:58.948063Z", @@ -50,19 +50,7 @@ "shell.execute_reply": "2022-04-06T08:08:59.144710Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files were found, each representing a single timestep\n", - "4 recorded vars were found: LAF, ST, AST, TMP\n", - "Recorded at 1461 points along the cable\n", - "The measurement is single ended\n", - "Reading the data from disk\n" - ] - } - ], + "outputs": [], "source": [ "filepath = os.path.join(\"..\", \"..\", \"tests\", \"data\", \"single_ended\")\n", "\n", @@ -79,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.171097Z", @@ -88,428 +76,14 @@ "shell.execute_reply": "2022-04-06T08:08:59.200765Z" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'st' (x: 1461, time: 3)>\n",
-       "array([[-8.05791e-01,  4.28741e-01, -5.13021e-01],\n",
-       "       [-4.58870e-01, -1.24484e-01,  9.68469e-03],\n",
-       "       [ 4.89174e-01, -9.57734e-02,  5.62837e-02],\n",
-       "       ...,\n",
-       "       [ 4.68457e+01,  4.72201e+01,  4.79139e+01],\n",
-       "       [ 3.76634e+01,  3.74649e+01,  3.83160e+01],\n",
-       "       [ 2.79879e+01,  2.78331e+01,  2.88055e+01]])\n",
-       "Coordinates:\n",
-       "  * x                  (x) float64 -80.74 -80.62 -80.49 ... 104.6 104.7 104.8\n",
-       "    filename           (time) <U31 'channel 2_20180504132202074.xml' ... 'cha...\n",
-       "    filename_tstamp    (time) int64 20180504132202074 ... 20180504132303723\n",
-       "    timestart          (time) datetime64[ns] 2018-05-04T12:22:02.710000 ... 2...\n",
-       "    timeend            (time) datetime64[ns] 2018-05-04T12:22:32.710000 ... 2...\n",
-       "  * time               (time) datetime64[ns] 2018-05-04T12:22:17.710000 ... 2...\n",
-       "    acquisitiontimeFW  (time) timedelta64[ns] 00:00:30 00:00:30 00:00:30\n",
-       "Attributes:\n",
-       "    name:         st\n",
-       "    description:  Stokes intensity\n",
-       "    units:        -
" - ], - "text/plain": [ - "\n", - "array([[-8.05791e-01, 4.28741e-01, -5.13021e-01],\n", - " [-4.58870e-01, -1.24484e-01, 9.68469e-03],\n", - " [ 4.89174e-01, -9.57734e-02, 5.62837e-02],\n", - " ...,\n", - " [ 4.68457e+01, 4.72201e+01, 4.79139e+01],\n", - " [ 3.76634e+01, 3.74649e+01, 3.83160e+01],\n", - " [ 2.79879e+01, 2.78331e+01, 2.88055e+01]])\n", - "Coordinates:\n", - " * x (x) float64 -80.74 -80.62 -80.49 ... 104.6 104.7 104.8\n", - " filename (time) " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "ds[\"tmp\"].plot(figsize=(12, 8));" ] @@ -548,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.267698Z", @@ -566,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.275670Z", @@ -584,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.281530Z", @@ -610,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.290091Z", @@ -627,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.296109Z", @@ -650,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.302128Z", @@ -674,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.308603Z", @@ -691,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.314626Z", @@ -715,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ From a502d77827fcb37ef543b3ea678c0c96456a5d08 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 22 Jul 2023 18:31:53 +0200 Subject: [PATCH 08/16] Updated resample test --- tests/test_datastore.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 11aaf66a..b5851803 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -556,12 +556,14 @@ def read_data_from_fp_numpy(fp): def test_resample_datastore(): + import xarray as xr + filepath = data_dir_single_ended ds = read_silixa_files( directory=filepath, timezone_netcdf='UTC', file_ext='*.xml') assert ds.time.size == 3 - ds_resampled = ds.resample_datastore(how='mean', time="47S") + ds_resampled = DataStore(xr.Dataset(ds).resample(time="47S").mean()) assert ds_resampled.time.size == 2 assert ds_resampled.st.dims == ('x', 'time'), 'The dimension have to ' \ From 7fa3b9c65348a213dbf9dc9d92e8d4c65417dc86 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 22 Jul 2023 18:43:49 +0200 Subject: [PATCH 09/16] Removed deprecated pandas read_csv argument from import ts example New versions of pandas don't support the squeeze argument --- docs/notebooks/09Import_timeseries.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/notebooks/09Import_timeseries.ipynb b/docs/notebooks/09Import_timeseries.ipynb index 9c7229ed..41c81427 100644 --- a/docs/notebooks/09Import_timeseries.ipynb +++ b/docs/notebooks/09Import_timeseries.ipynb @@ -86,9 +86,9 @@ "outputs": [], "source": [ "ts = pd.read_csv(\n", - " filepath, sep=\",\", index_col=0, parse_dates=True, squeeze=True, engine=\"python\"\n", - ") # the latter 2 kwargs are to ensure a pd.Series is returned\n", - "ts = ts.tz_localize(\"Europe/Amsterdam\") # set the timezone" + " filepath, sep=\",\", index_col=0, parse_dates=True\n", + ")[\"Pt100 2\"] # See pandas' read_csv documentation for more options\n", + "ts = ts.tz_localize(\"Europe/Amsterdam\") # Set the timezone" ] }, { From 5304e24c92b0ae7d0900774a0c55144887ba7882 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 20:06:18 +0200 Subject: [PATCH 10/16] Update pyproject.toml to deprecate 3.8 and include 3.11 --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 28eff9f9..291af88b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ name = "dtscalibration" description = "A Python package to load raw DTS files, perform a calibration, and plot the result." readme = "README.rst" license = "BSD-3-Clause" -requires-python = ">=3.8, <3.11" +requires-python = ">=3.9, <=3.11" authors = [ {email = "bdestombe@gmail.com"}, {name = "Bas des Tombe, Bart Schilperoort"} @@ -46,9 +46,9 @@ classifiers = [ "Operating System :: POSIX", "Operating System :: Microsoft :: Windows", "Programming Language :: Python", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Utilities", ] dependencies = [ @@ -118,7 +118,7 @@ build = [ features = ["dev"] [[tool.hatch.envs.matrix_test.matrix]] -python = ["3.8", "3.9", "3.10"] +python = ["3.9", "3.10", "3.11"] [tool.hatch.envs.matrix_test.scripts] test = ["pytest ./src/ ./tests/",] # --doctest-modules From 083c8bbc7112a69f6f497fca5ed6e8ec44c6db41 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 20:13:44 +0200 Subject: [PATCH 11/16] Update actions to deprecate 3.8 and include 3.11 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1a3844fa..3faac415 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} From ee60c94973e47a28c67fb09330b467e1bcf6717c Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 21:00:25 +0200 Subject: [PATCH 12/16] Removed py3.11 because netcdf4 does not support it --- .github/workflows/build.yml | 2 +- pyproject.toml | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3faac415..ba481194 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.9', '3.10'] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} diff --git a/pyproject.toml b/pyproject.toml index 291af88b..1e896d88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ name = "dtscalibration" description = "A Python package to load raw DTS files, perform a calibration, and plot the result." readme = "README.rst" license = "BSD-3-Clause" -requires-python = ">=3.9, <=3.11" +requires-python = ">=3.9, <3.11" authors = [ {email = "bdestombe@gmail.com"}, {name = "Bas des Tombe, Bart Schilperoort"} @@ -48,7 +48,6 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Topic :: Utilities", ] dependencies = [ @@ -118,7 +117,7 @@ build = [ features = ["dev"] [[tool.hatch.envs.matrix_test.matrix]] -python = ["3.9", "3.10", "3.11"] +python = ["3.9", "3.10"] [tool.hatch.envs.matrix_test.scripts] test = ["pytest ./src/ ./tests/",] # --doctest-modules From dfe71bd2539111d42c8c86f93ea643d5ee4f0720 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Wed, 26 Jul 2023 09:23:00 +0200 Subject: [PATCH 13/16] Update docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb Co-authored-by: Bart Schilperoort --- ...Common_DataStore_functions_slice_mean_max_std_resample.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb index 1d3d883a..4b7bb081 100644 --- a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb +++ b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb @@ -295,7 +295,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note that the resample function from dtscalibration has been removed in v2.0.1. The above example works versions from before 2.0.1 as well. Starting with version 2.0.1 the `xr.Dataset(ds).resample()` may become `ds.resample()`." + "Note that the resample function from dtscalibration has been removed in v2.1.0. The above example works versions from before 2.0.1 as well. Starting with version 2.1.0 the `xr.Dataset(ds).resample()` may become `ds.resample()`." ] }, { From 5167fad5057e57da3c14e92f1596c330e9756179 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Wed, 26 Jul 2023 09:44:56 +0200 Subject: [PATCH 14/16] Streamline resample implementation. Raise error for resample_datastore() --- CHANGELOG.rst | 4 ++++ ...re_functions_slice_mean_max_std_resample.ipynb | 15 ++------------- src/dtscalibration/datastore.py | 5 +++++ 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 93592290..3a9bbb23 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,10 @@ Bug fixes * Single-ended measurements with `fix_alpha` failed due to a bug introduced in v2.0.0 ([#173](https://github.com/dtscalibration/python-dts-calibration/pull/173)). +Removed + +* Removed ds.resample_datastore() in favor of using xarray's resample function. See example notebook 2. + 2.0.0 (2023-05-24) ------------------ diff --git a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb index 4b7bb081..e230e9fa 100644 --- a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb +++ b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb @@ -282,20 +282,9 @@ "metadata": {}, "outputs": [], "source": [ - "# We use the logic from xarray to resample. However, it returns an xarray dataset type\n", - "import xarray as xr\n", - "ds_xarray = xr.Dataset(ds).resample(time=\"47S\").mean()\n", - "\n", - "# Therefore we convert it back to the dtscalibration Datastore type.\n", + "# We use the logic from xarray to resample. However, it returns an xarray dataset type. Therefore we convert it back to the dtscalibration Datastore type.\n", "from dtscalibration import DataStore\n", - "ds_resampled2 = DataStore(ds_xarray)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the resample function from dtscalibration has been removed in v2.1.0. The above example works versions from before 2.0.1 as well. Starting with version 2.1.0 the `xr.Dataset(ds).resample()` may become `ds.resample()`." + "ds_resampled = DataStore(ds.resample(time=\"47S\").mean())" ] }, { diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index b2f854d4..fe403bd0 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -5503,6 +5503,11 @@ def func(a): return out + def resample_datastore(*args, **kwargs): + raise "ds.resample_datastore() is deprecated. Use " \ + "from dtscalibration import DataStore; DataStore(ds.resample()) " \ + "instead. See example notebook 2." + class ParameterIndexDoubleEnded: """ From f78777507bc9e28c9026a100d351a9d1036afcfa Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 27 Jul 2023 20:33:43 +0200 Subject: [PATCH 15/16] get_time_dim conflicts with the resample function of newer xarray versions --- src/dtscalibration/datastore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index fe403bd0..cec07a83 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -103,7 +103,7 @@ def __init__(self, *args, autofill_dim_attrs=True, **kwargs): ideal_dim.append('x') all_dim.pop(all_dim.index('x')) - time_dim = self.get_time_dim() + time_dim = "time" if time_dim: if time_dim in all_dim: ideal_dim.append(time_dim) From ee6a919a273d6bdd77ff91d26bb06becab99cc19 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 27 Jul 2023 22:31:44 +0200 Subject: [PATCH 16/16] Small change to resample test --- tests/test_datastore.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_datastore.py b/tests/test_datastore.py index b5851803..8df6a0a6 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -556,14 +556,12 @@ def read_data_from_fp_numpy(fp): def test_resample_datastore(): - import xarray as xr - filepath = data_dir_single_ended ds = read_silixa_files( directory=filepath, timezone_netcdf='UTC', file_ext='*.xml') assert ds.time.size == 3 - ds_resampled = DataStore(xr.Dataset(ds).resample(time="47S").mean()) + ds_resampled = DataStore(ds.resample(time="47S").mean()) assert ds_resampled.time.size == 2 assert ds_resampled.st.dims == ('x', 'time'), 'The dimension have to ' \