From ba4018496ab60fc6c745bb6013c906dc094bd808 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 21 Jul 2023 11:32:43 +0200 Subject: [PATCH 01/14] Also verify timedeltas between forward and backward when merging single ended to double ended --- src/dtscalibration/datastore_utils.py | 36 ++++++++++++++++++++++++--- tests/test_datastore.py | 2 +- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 1e1d3a79..692a322e 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -156,7 +156,7 @@ def merge_double_ended(ds_fw, ds_bw, cable_length, plot_result=True, verbose=Tru return ds -def merge_double_ended_times(ds_fw, ds_bw, verbose=True): +def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True): """Helper for `merge_double_ended()` to deal with missing measurements. The number of measurements of the forward and backward channels might get out of sync if the device shuts down before the measurement of the last channel @@ -189,6 +189,10 @@ def merge_double_ended_times(ds_fw, ds_bw, verbose=True): DataStore object representing the forward measurement channel ds_bw : DataSore object DataStore object representing the backward measurement channel + verify_timedeltas : bool + Check whether times between forward and backward measurements are similar to those of neighboring measurements + verbose : bool + Print additional information to screen Returns ------- @@ -205,8 +209,16 @@ def merge_double_ended_times(ds_fw, ds_bw, verbose=True): assert ds_fw.attrs['forwardMeasurementChannel'] < ds_bw.attrs['forwardMeasurementChannel'], \ "ds_fw and ds_bw are swapped" + # Are all dt's within 1.5 seconds from one another? if (ds_bw.time.size == ds_fw.time.size) and np.all(ds_bw.time.values > ds_fw.time.values): - return ds_fw, ds_bw + if verify_timedeltas: + dt_ori = (ds_bw.time.values - ds_fw.time.values) / np.array(1000000000, dtype='timedelta64[ns]') + dt_all_close = np.allclose(dt_ori, dt_ori[0], atol=1.5, rtol=0.) + else: + dt_all_close = True + + if dt_all_close: + return ds_fw, ds_bw iuse_chfw = list() iuse_chbw = list() @@ -232,7 +244,25 @@ def merge_double_ended_times(ds_fw, ds_bw, verbose=True): if verbose: print(f"Missing forward measurement beween {ds_bw.time.values[ind]} and {ds_bw.time.values[ind_next]}") - return ds_fw.isel(time=iuse_chfw), ds_bw.isel(time=iuse_chbw) + # throw out is dt differs from its neighbors + if verify_timedeltas: + dt = (ds_bw.isel(time=iuse_chbw).time.values - ds_fw.isel(time=iuse_chfw).time.values) /\ + np.array(1000000000, dtype='timedelta64[ns]') + leaveout = np.zeros_like(dt, dtype=bool) + leaveout[1:-1] = np.isclose(dt[:-2], dt[2:], atol=1.5, rtol=0.) * ~np.isclose(dt[:-2], dt[1:-1], atol=1.5, rtol=0.) + iuse_chfw2 = np.array(iuse_chfw)[~leaveout] + iuse_chbw2 = np.array(iuse_chbw)[~leaveout] + + if verbose: + for itfw, itbw in zip(np.array(iuse_chfw)[leaveout], np.array(iuse_chbw)[leaveout]): + print(f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values} do not " + f"belong together as their timedelta is larger than their neighboring timedeltas. Thrown out.") + + else: + iuse_chfw2 = iuse_chfw + iuse_chbw2 = iuse_chbw + + return ds_fw.isel(time=iuse_chfw2), ds_bw.isel(time=iuse_chbw2) # pylint: disable=too-many-locals diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 8f1e3534..11aaf66a 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -649,7 +649,7 @@ def test_merge_double_ended(): ([1, 2], [], [1, 2]), ([], [1, 2], [1, 2]), ([1], [2], [1, 2]), - pytest.param([2], [1], [1, 2], marks=pytest.mark.xfail) + ([2], [1], [1, 2]) ]) def test_merge_double_ended_times(inotinfw, inotinbw, inotinout): """ From c9705086c483ddceb071161f608a6d3e077bac53 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 22 Jul 2023 18:23:55 +0200 Subject: [PATCH 02/14] Remove restrictions for xarray and pandas versions. Removed resample function. The resample function from dtscalibration was blocking the ability to upgrade to recent versions of xarray and pandas. Resampling using the xarray functions is now proposed, see example notebook 2. See #167 --- ...unctions_slice_mean_max_std_resample.ipynb | 499 +++++++++++++++++- pyproject.toml | 4 +- src/dtscalibration/datastore.py | 93 ---- 3 files changed, 473 insertions(+), 123 deletions(-) diff --git a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb index 1e61a3a9..628f7ab6 100644 --- a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb +++ b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:57.302425Z", @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:58.948063Z", @@ -50,7 +50,19 @@ "shell.execute_reply": "2022-04-06T08:08:59.144710Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 files were found, each representing a single timestep\n", + "4 recorded vars were found: LAF, ST, AST, TMP\n", + "Recorded at 1461 points along the cable\n", + "The measurement is single ended\n", + "Reading the data from disk\n" + ] + } + ], "source": [ "filepath = os.path.join(\"..\", \"..\", \"tests\", \"data\", \"single_ended\")\n", "\n", @@ -67,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.171097Z", @@ -76,14 +88,428 @@ "shell.execute_reply": "2022-04-06T08:08:59.200765Z" } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'st' (x: 1461, time: 3)>\n",
+       "array([[-8.05791e-01,  4.28741e-01, -5.13021e-01],\n",
+       "       [-4.58870e-01, -1.24484e-01,  9.68469e-03],\n",
+       "       [ 4.89174e-01, -9.57734e-02,  5.62837e-02],\n",
+       "       ...,\n",
+       "       [ 4.68457e+01,  4.72201e+01,  4.79139e+01],\n",
+       "       [ 3.76634e+01,  3.74649e+01,  3.83160e+01],\n",
+       "       [ 2.79879e+01,  2.78331e+01,  2.88055e+01]])\n",
+       "Coordinates:\n",
+       "  * x                  (x) float64 -80.74 -80.62 -80.49 ... 104.6 104.7 104.8\n",
+       "    filename           (time) <U31 'channel 2_20180504132202074.xml' ... 'cha...\n",
+       "    filename_tstamp    (time) int64 20180504132202074 ... 20180504132303723\n",
+       "    timestart          (time) datetime64[ns] 2018-05-04T12:22:02.710000 ... 2...\n",
+       "    timeend            (time) datetime64[ns] 2018-05-04T12:22:32.710000 ... 2...\n",
+       "  * time               (time) datetime64[ns] 2018-05-04T12:22:17.710000 ... 2...\n",
+       "    acquisitiontimeFW  (time) timedelta64[ns] 00:00:30 00:00:30 00:00:30\n",
+       "Attributes:\n",
+       "    name:         st\n",
+       "    description:  Stokes intensity\n",
+       "    units:        -
" + ], + "text/plain": [ + "\n", + "array([[-8.05791e-01, 4.28741e-01, -5.13021e-01],\n", + " [-4.58870e-01, -1.24484e-01, 9.68469e-03],\n", + " [ 4.89174e-01, -9.57734e-02, 5.62837e-02],\n", + " ...,\n", + " [ 4.68457e+01, 4.72201e+01, 4.79139e+01],\n", + " [ 3.76634e+01, 3.74649e+01, 3.83160e+01],\n", + " [ 2.79879e+01, 2.78331e+01, 2.88055e+01]])\n", + "Coordinates:\n", + " * x (x) float64 -80.74 -80.62 -80.49 ... 104.6 104.7 104.8\n", + " filename (time) " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "ds[\"tmp\"].plot(figsize=(12, 8));" ] @@ -111,7 +548,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.267698Z", @@ -129,7 +566,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.275670Z", @@ -147,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.281530Z", @@ -173,7 +610,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.290091Z", @@ -190,7 +627,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.296109Z", @@ -213,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.302128Z", @@ -237,7 +674,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.308603Z", @@ -254,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.314626Z", @@ -273,23 +710,29 @@ "metadata": {}, "source": [ "## 4 Downsample (time dimension)\n", - "We currently have measurements at 3 time steps, with 30.001 seconds inbetween. For our next exercise we would like to down sample the measurements to 2 time steps with 47 seconds inbetween. The calculated variances are not valid anymore. We use the function `resample_datastore`." + "We currently have measurements at 3 time steps, with 30.001 seconds inbetween. For our next exercise we would like to down sample the measurements to 2 time steps with 47 seconds inbetween. The calculated variances are not valid anymore. We use the function `resample` from xarray." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.320236Z", - "iopub.status.busy": "2022-04-06T08:08:59.320029Z", - "iopub.status.idle": "2022-04-06T08:08:59.341833Z", - "shell.execute_reply": "2022-04-06T08:08:59.341341Z" - } - }, + "execution_count": 28, + "metadata": {}, "outputs": [], "source": [ - "ds_resampled = ds.resample_datastore(how=\"mean\", time=\"47S\")" + "# We use the logic from xarray to resample. However, it returns an xarray dataset type\n", + "import xarray as xr\n", + "ds_xarray = xr.Dataset(ds).resample(time=\"47S\").mean()\n", + "\n", + "# Therefore we convert it back to the dtscalibration Datastore type.\n", + "from dtscalibration import DataStore\n", + "ds_resampled2 = DataStore(ds_xarray)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the resample function from dtscalibration has been removed in v2.0.1. The above example works versions from before 2.0.1 as well. Starting with version 2.0.1 the `xr.Dataset(ds).resample()` may become `ds.resample()`." ] }, { @@ -355,7 +798,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -369,7 +812,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.11" + "version": "3.10.10" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 4aa15779..28eff9f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ classifiers = [ ] dependencies = [ "numpy", - "xarray<=2022.03.0", + "xarray", "pyyaml", "xmltodict", "scipy", @@ -63,7 +63,7 @@ dependencies = [ "toolz", "matplotlib", "netCDF4<=1.5.8", - "pandas<2", + "pandas", ] dynamic = ["version"] diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 4e92a3af..b2f854d4 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -373,99 +373,6 @@ def timeseries_keys(self): time_dim = self.get_time_dim() return [k for k, v in self.data_vars.items() if v.dims == (time_dim,)] - def resample_datastore( - self, - how, - freq=None, - dim=None, - skipna=None, - closed=None, - label=None, - origin='start_day', - offset=None, - keep_attrs=True, - **indexer): - """Returns a resampled DataStore. Always define the how. - Handles both downsampling and upsampling. If any intervals contain no - values from the original object, they will be given the value ``NaN``. - Parameters - ---------- - freq - dim - how : str - Any function that is available via groupby. E.g., 'mean' - http://pandas.pydata.org/pandas-docs/stable/groupby.html#groupby - -dispatch - skipna : bool, optional - Whether to skip missing values when aggregating in downsampling. - closed : 'left' or 'right', optional - Side of each interval to treat as closed. - label : 'left or 'right', optional - Side of each interval to use for labeling. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '24H' frequency, base could - range from 0 through 23. - keep_attrs : bool, optional - If True, the object's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new - object will be returned without attributes. - **indexer : {dim: freq} - Dictionary with a key indicating the dimension name to resample - over and a value corresponding to the resampling frequency. - Returns - ------- - resampled : same type as caller - This object resampled. - """ - import pandas as pd - from xarray.core.dataarray import DataArray - - RESAMPLE_DIM = '__resample_dim__' - - if (freq and indexer) or (dim and indexer): - raise TypeError( - "If passing an 'indexer' then 'dim' " - "and 'freq' should not be used") - - if indexer: - dim, freq = indexer.popitem() - - if isinstance(dim, str): - dim = self[dim] - else: - raise TypeError( - "Dimension name should be a string; " - "was passed %r" % dim) - - if how is None: - how = 'mean' - - group = DataArray(dim.data, [(dim.dims, dim.data)], name=RESAMPLE_DIM) - grouper = pd.Grouper( - freq=freq, - how=how, - closed=closed, - label=label, - origin=origin, - offset=offset) - gb = self._groupby_cls(self, group, grouper=grouper) - if isinstance(how, str): - f = getattr(gb, how) - if how in ['first', 'last']: - result = f(skipna=skipna, keep_attrs=False) - elif how == 'count': - result = f(dim=dim.name, keep_attrs=False) - else: - result = f(dim=dim.name, skipna=skipna, keep_attrs=False) - else: - result = gb.reduce(how, dim=dim.name, keep_attrs=False) - result = result.rename({RESAMPLE_DIM: dim.name}) - - attrs = self.attrs if keep_attrs else None - return DataStore( - data_vars=result.data_vars, coords=result.coords, attrs=attrs) - def to_netcdf( self, path=None, From 6f5ff25f1f7731e7fd4547576ce43c960dac3ac3 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 22 Jul 2023 18:25:48 +0200 Subject: [PATCH 03/14] Removed output example notebook 2 --- ...unctions_slice_mean_max_std_resample.ipynb | 469 +----------------- 1 file changed, 16 insertions(+), 453 deletions(-) diff --git a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb index 628f7ab6..1d3d883a 100644 --- a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb +++ b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:57.302425Z", @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:58.948063Z", @@ -50,19 +50,7 @@ "shell.execute_reply": "2022-04-06T08:08:59.144710Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 files were found, each representing a single timestep\n", - "4 recorded vars were found: LAF, ST, AST, TMP\n", - "Recorded at 1461 points along the cable\n", - "The measurement is single ended\n", - "Reading the data from disk\n" - ] - } - ], + "outputs": [], "source": [ "filepath = os.path.join(\"..\", \"..\", \"tests\", \"data\", \"single_ended\")\n", "\n", @@ -79,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.171097Z", @@ -88,428 +76,14 @@ "shell.execute_reply": "2022-04-06T08:08:59.200765Z" } }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'st' (x: 1461, time: 3)>\n",
-       "array([[-8.05791e-01,  4.28741e-01, -5.13021e-01],\n",
-       "       [-4.58870e-01, -1.24484e-01,  9.68469e-03],\n",
-       "       [ 4.89174e-01, -9.57734e-02,  5.62837e-02],\n",
-       "       ...,\n",
-       "       [ 4.68457e+01,  4.72201e+01,  4.79139e+01],\n",
-       "       [ 3.76634e+01,  3.74649e+01,  3.83160e+01],\n",
-       "       [ 2.79879e+01,  2.78331e+01,  2.88055e+01]])\n",
-       "Coordinates:\n",
-       "  * x                  (x) float64 -80.74 -80.62 -80.49 ... 104.6 104.7 104.8\n",
-       "    filename           (time) <U31 'channel 2_20180504132202074.xml' ... 'cha...\n",
-       "    filename_tstamp    (time) int64 20180504132202074 ... 20180504132303723\n",
-       "    timestart          (time) datetime64[ns] 2018-05-04T12:22:02.710000 ... 2...\n",
-       "    timeend            (time) datetime64[ns] 2018-05-04T12:22:32.710000 ... 2...\n",
-       "  * time               (time) datetime64[ns] 2018-05-04T12:22:17.710000 ... 2...\n",
-       "    acquisitiontimeFW  (time) timedelta64[ns] 00:00:30 00:00:30 00:00:30\n",
-       "Attributes:\n",
-       "    name:         st\n",
-       "    description:  Stokes intensity\n",
-       "    units:        -
" - ], - "text/plain": [ - "\n", - "array([[-8.05791e-01, 4.28741e-01, -5.13021e-01],\n", - " [-4.58870e-01, -1.24484e-01, 9.68469e-03],\n", - " [ 4.89174e-01, -9.57734e-02, 5.62837e-02],\n", - " ...,\n", - " [ 4.68457e+01, 4.72201e+01, 4.79139e+01],\n", - " [ 3.76634e+01, 3.74649e+01, 3.83160e+01],\n", - " [ 2.79879e+01, 2.78331e+01, 2.88055e+01]])\n", - "Coordinates:\n", - " * x (x) float64 -80.74 -80.62 -80.49 ... 104.6 104.7 104.8\n", - " filename (time) " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "ds[\"tmp\"].plot(figsize=(12, 8));" ] @@ -548,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.267698Z", @@ -566,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.275670Z", @@ -584,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.281530Z", @@ -610,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.290091Z", @@ -627,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.296109Z", @@ -650,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.302128Z", @@ -674,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.308603Z", @@ -691,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2022-04-06T08:08:59.314626Z", @@ -715,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ From b466d085ef19e38701cdc3ba81273f818cb875f8 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 22 Jul 2023 18:31:53 +0200 Subject: [PATCH 04/14] Updated resample test --- tests/test_datastore.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 11aaf66a..b5851803 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -556,12 +556,14 @@ def read_data_from_fp_numpy(fp): def test_resample_datastore(): + import xarray as xr + filepath = data_dir_single_ended ds = read_silixa_files( directory=filepath, timezone_netcdf='UTC', file_ext='*.xml') assert ds.time.size == 3 - ds_resampled = ds.resample_datastore(how='mean', time="47S") + ds_resampled = DataStore(xr.Dataset(ds).resample(time="47S").mean()) assert ds_resampled.time.size == 2 assert ds_resampled.st.dims == ('x', 'time'), 'The dimension have to ' \ From 55bf8b0031261d4f501fa79fb8ef53760c91a0ce Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 22 Jul 2023 18:43:49 +0200 Subject: [PATCH 05/14] Removed deprecated pandas read_csv argument from import ts example New versions of pandas don't support the squeeze argument --- docs/notebooks/09Import_timeseries.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/notebooks/09Import_timeseries.ipynb b/docs/notebooks/09Import_timeseries.ipynb index 9c7229ed..41c81427 100644 --- a/docs/notebooks/09Import_timeseries.ipynb +++ b/docs/notebooks/09Import_timeseries.ipynb @@ -86,9 +86,9 @@ "outputs": [], "source": [ "ts = pd.read_csv(\n", - " filepath, sep=\",\", index_col=0, parse_dates=True, squeeze=True, engine=\"python\"\n", - ") # the latter 2 kwargs are to ensure a pd.Series is returned\n", - "ts = ts.tz_localize(\"Europe/Amsterdam\") # set the timezone" + " filepath, sep=\",\", index_col=0, parse_dates=True\n", + ")[\"Pt100 2\"] # See pandas' read_csv documentation for more options\n", + "ts = ts.tz_localize(\"Europe/Amsterdam\") # Set the timezone" ] }, { From 0f80ebbd5ed3c9e8eda116871a86a164ef77c439 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sun, 23 Jul 2023 12:35:04 +0200 Subject: [PATCH 06/14] Standardize the parameter names --- src/dtscalibration/datastore.py | 836 ++++++++++++-------------------- tests/test_dtscalibration.py | 78 +-- 2 files changed, 327 insertions(+), 587 deletions(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index b2f854d4..fe648b20 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -1745,15 +1745,6 @@ def calibration_single_ended( sections=None, st_var=None, ast_var=None, - store_c="c", - store_gamma="gamma", - store_dalpha="dalpha", - store_alpha="alpha", - store_ta="talpha", - store_tmpf="tmpf", - store_p_cov="p_cov", - store_p_val="p_val", - variance_suffix="_var", method="wls", solver="sparse", p_val=None, @@ -1811,10 +1802,6 @@ def calibration_single_ended( Parameters ---------- - store_p_cov : str - Key to store the covariance matrix of the calibrated parameters - store_p_val : str - Key to store the values of the calibrated parameters p_val : array-like, optional Define `p_val`, `p_var`, `p_cov` if you used an external function for calibration. Has size 2 + `nt`. First value is :math:`\gamma`, @@ -1849,24 +1836,6 @@ def calibration_single_ended( Or manually define a variance with a DataArray of the shape `ds.st.shape`, where the variance can be a function of time and/or x. Required if method is wls. - store_c : str - Label of where to store C - store_gamma : str - Label of where to store gamma - store_dalpha : str - Label of where to store dalpha; the spatial derivative of alpha. - store_alpha : str - Label of where to store alpha; The integrated differential - attenuation. - alpha(x=0) = 0 - store_ta : str - Label of where to store transient alpha's - store_tmpf : str - Label of where to store the calibrated temperature of the forward - direction - variance_suffix : str - String appended for storing the variance. Only used when method - is wls. method : {'wls',} Use `'wls'` for weighted least squares. @@ -2109,45 +2078,45 @@ def calibration_single_ended( assert p_cov.shape == (ip.npar, ip.npar) # store calibration parameters in DataStore - self[store_gamma] = (tuple(), p_val[ip.gamma].item()) - self[store_gamma + variance_suffix] = (tuple(), p_var[ip.gamma].item()) + self["gamma"] = (tuple(), p_val[ip.gamma].item()) + self["gamma_var"] = (tuple(), p_var[ip.gamma].item()) if nta > 0: - self[store_ta + "_fw"] = ( + self["talpha_fw"] = ( ("trans_att", time_dim), ip.get_taf_pars(p_val), ) - self[store_ta + "_fw" + variance_suffix] = ( + self["talpha_fw_var"] = ( ("trans_att", time_dim), ip.get_taf_pars(p_var), ) else: - self[store_ta + "_fw"] = (("trans_att", time_dim), np.zeros((0, nt))) - self[store_ta + "_fw" + variance_suffix] = (("trans_att", time_dim), np.zeros((0, nt))) + self["talpha_fw"] = (("trans_att", time_dim), np.zeros((0, nt))) + self["talpha_fw_var"] = (("trans_att", time_dim), np.zeros((0, nt))) - self[store_c] = ((time_dim,), p_val[ip.c]) - self[store_c + variance_suffix] = ((time_dim,), p_var[ip.c]) + self["c"] = ((time_dim,), p_val[ip.c]) + self["c_var"] = ((time_dim,), p_var[ip.c]) if fix_alpha: - self[store_alpha] = (("x",), fix_alpha[0]) - self[store_alpha + variance_suffix] = (("x",), fix_alpha[1]) + self["alpha"] = (("x",), fix_alpha[0]) + self["alpha_var"] = (("x",), fix_alpha[1]) else: - self[store_dalpha] = (tuple(), p_val[ip.dalpha].item()) - self[store_dalpha + variance_suffix] = (tuple(), p_var[ip.dalpha].item()) + self["dalpha"] = (tuple(), p_val[ip.dalpha].item()) + self["dalpha_var"] = (tuple(), p_var[ip.dalpha].item()) - self[store_alpha] = self[store_dalpha] * self.x - self[store_alpha + variance_suffix] = ( - self[store_dalpha + variance_suffix] * self.x ** 2 + self["alpha"] = self.dalpha * self.x + self["alpha_var"] = ( + self["dalpha_var"] * self.x ** 2 ) - self[store_ta + "_fw_full"] = ( + self["talpha_fw_full"] = ( ("x", time_dim), ip.get_taf_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) - self[store_ta + "_fw_full" + variance_suffix] = ( + self["talpha_fw_full_var"] = ( ("x", time_dim), ip.get_taf_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" @@ -2155,20 +2124,20 @@ def calibration_single_ended( ) tmpf = self.gamma / ( - (np.log(self.st / self.ast) + (self[store_c] + self[store_ta + "_fw_full"])) - + self[store_alpha] + (np.log(self.st / self.ast) + (self.c + self["talpha_fw_full"])) + + self.alpha ) - self[store_tmpf] = tmpf - 273.15 + self["tmpf"] = tmpf - 273.15 # tmpf_var deriv_dict = dict( - T_gamma_fw=tmpf / self[store_gamma], - T_st_fw=-(tmpf ** 2) / (self[store_gamma] * self.st), - T_ast_fw=tmpf ** 2 / (self[store_gamma] * self.ast), - T_c_fw=-(tmpf ** 2) / self[store_gamma], - T_dalpha_fw=-self.x * (tmpf ** 2) / self[store_gamma], - T_alpha_fw=-(tmpf ** 2) / self[store_gamma], - T_ta_fw=-(tmpf ** 2) / self[store_gamma], + T_gamma_fw=tmpf / self.gamma, + T_st_fw=-(tmpf ** 2) / (self.gamma * self.st), + T_ast_fw=tmpf ** 2 / (self.gamma * self.ast), + T_c_fw=-(tmpf ** 2) / self.gamma, + T_dalpha_fw=-self.x * (tmpf ** 2) / self.gamma, + T_alpha_fw=-(tmpf ** 2) / self.gamma, + T_ta_fw=-(tmpf ** 2) / self.gamma, ) deriv_ds = xr.Dataset(deriv_dict) # self["deriv"] = deriv_ds.to_array(dim="com2") @@ -2189,10 +2158,10 @@ def calibration_single_ended( var_fw_dict = dict( dT_dst=deriv_ds.T_st_fw ** 2 * parse_st_var(self, st_var, st_label="st"), dT_dast=deriv_ds.T_ast_fw ** 2 * parse_st_var(self, ast_var, st_label="ast"), - dT_gamma=deriv_ds.T_gamma_fw ** 2 * self[store_gamma + variance_suffix], - dT_dc=deriv_ds.T_c_fw ** 2 * self[store_c + variance_suffix], - dT_ddalpha=deriv_ds.T_alpha_fw ** 2 * self[store_alpha + variance_suffix], # same as dT_dalpha - dT_dta=deriv_ds.T_ta_fw ** 2 * self[store_ta + "_fw_full" + variance_suffix], + dT_gamma=deriv_ds.T_gamma_fw ** 2 * self["gamma_var"], + dT_dc=deriv_ds.T_c_fw ** 2 * self["c_var"], + dT_ddalpha=deriv_ds.T_alpha_fw ** 2 * self["alpha_var"], # same as dT_dalpha + dT_dta=deriv_ds.T_ta_fw ** 2 * self["talpha_fw_full_var"], dgamma_dc=(2 * deriv_ds.T_gamma_fw * deriv_ds.T_c_fw * sigma2_gamma_c), dta_dgamma=(2 * deriv_ds.T_ta_fw * deriv_ds.T_gamma_fw * sigma2_tafw_gamma), dta_dc=(2 * deriv_ds.T_ta_fw * deriv_ds.T_c_fw * sigma2_tafw_c), @@ -2223,10 +2192,10 @@ def calibration_single_ended( ) self["var_fw_da"] = xr.Dataset(var_fw_dict).to_array(dim="comp_fw") - self[store_tmpf + variance_suffix] = self["var_fw_da"].sum(dim="comp_fw") + self["tmpf_var"] = self["var_fw_da"].sum(dim="comp_fw") - self[store_tmpf].attrs.update(_dim_attrs[("tmpf",)]) - self[store_tmpf + variance_suffix].attrs.update(_dim_attrs[("tmpf_var",)]) + self["tmpf"].attrs.update(_dim_attrs[("tmpf",)]) + self["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) drop_vars = [ k for k, v in self.items() if {"params1", "params2"}.intersection(v.dims) @@ -2235,8 +2204,8 @@ def calibration_single_ended( for k in drop_vars: del self[k] - self[store_p_val] = (("params1",), p_val) - self[store_p_cov] = (("params1", "params2"), p_cov) + self["p_val"] = (("params1",), p_val) + self["p_cov"] = (("params1", "params2"), p_cov) pass @@ -2247,22 +2216,6 @@ def calibration_double_ended( ast_var=None, rst_var=None, rast_var=None, - store_df="df", - store_db="db", - store_gamma="gamma", - store_alpha="alpha", - store_ta="talpha", - store_tmpf="tmpf", - store_tmpb="tmpb", - store_tmpw="tmpw", - mc_sample_size=None, - mc_conf_ints=None, - mc_da_random_state=None, - mc_remove_set_flag=True, - mc_reduce_memory_usage=False, - store_p_cov="p_cov", - store_p_val="p_val", - variance_suffix="_var", method="wls", solver="sparse", p_val=None, @@ -2371,10 +2324,6 @@ def calibration_double_ended( Section 7.2 [1]_ . Parameters ---------- - store_p_cov : str - Key to store the covariance matrix of the calibrated parameters - store_p_val : str - Key to store the values of the calibrated parameters p_val : array-like, optional Define `p_val`, `p_var`, `p_cov` if you used an external function for calibration. Has size `1 + 2 * nt + nx + 2 * nt * nta`. @@ -2411,24 +2360,6 @@ def calibration_double_ended( Or manually define a variance with a DataArray of the shape `ds.st.shape`, where the variance can be a function of time and/or x. Required if method is wls. - store_df, store_db : str - Label of where to store D. D is different for the forward channel - and the backward channel - store_gamma : str - Label of where to store gamma - store_alpha : str - Label of where to store alpha - store_ta : str - Label of where to store transient alpha's - store_tmpf : str - Label of where to store the calibrated temperature of the forward - direction - store_tmpb : str - Label of where to store the calibrated temperature of the - backward direction - store_tmpw : str - Label of where to store the inverse-variance weighted average - temperature of the forward and backward channel measurements. mc_sample_size : int, optional If set, the variance is also computed using Monte Carlo sampling. The number of Monte Carlo samples drawn used to estimate the @@ -3192,80 +3123,80 @@ def calibration_double_ended( assert p_cov.shape == (ip.npar, ip.npar) # save estimates and variances to datastore, skip covariances - self[store_gamma] = (tuple(), p_val[ip.gamma].item()) - self[store_alpha] = (("x",), p_val[ip.alpha]) - self[store_df] = ((time_dim,), p_val[ip.df]) - self[store_db] = ((time_dim,), p_val[ip.db]) + self["gamma"] = (tuple(), p_val[ip.gamma].item()) + self["alpha"] = (("x",), p_val[ip.alpha]) + self["df"] = ((time_dim,), p_val[ip.df]) + self["db"] = ((time_dim,), p_val[ip.db]) if nta: - self[store_ta + "_fw"] = ( + self["talpha_fw"] = ( (time_dim, "trans_att"), p_val[ip.taf].reshape((nt, nta), order="C"), ) - self[store_ta + "_bw"] = ( + self["talpha_bw"] = ( (time_dim, "trans_att"), p_val[ip.tab].reshape((nt, nta), order="C"), ) - self[store_ta + "_fw" + variance_suffix] = ( + self["talpha_fw_var"] = ( (time_dim, "trans_att"), p_var[ip.taf].reshape((nt, nta), order="C"), ) - self[store_ta + "_bw" + variance_suffix] = ( + self["talpha_bw_var"] = ( (time_dim, "trans_att"), p_var[ip.tab].reshape((nt, nta), order="C"), ) else: - self[store_ta + "_fw"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) - self[store_ta + "_bw"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) - self[store_ta + "_fw" + variance_suffix] = ((time_dim, "trans_att"), np.zeros((nt, 0))) - self[store_ta + "_bw" + variance_suffix] = ((time_dim, "trans_att"), np.zeros((nt, 0))) + self["talpha_fw"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) + self["talpha_bw"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) + self["talpha_fw_var"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) + self["talpha_bw_var"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) - self[store_ta + "_fw_full"] = ( + self["talpha_fw_full"] = ( ("x", time_dim), ip.get_taf_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) - self[store_ta + "_bw_full"] = ( + self["talpha_bw_full"] = ( ("x", time_dim), ip.get_tab_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) - self[store_tmpf] = ( - self[store_gamma] + self["tmpf"] = ( + self.gamma / ( np.log(self.st / self.ast) - + self[store_df] - + self[store_alpha] - + self[store_ta + "_fw_full"] + + self["df"] + + self.alpha + + self["talpha_fw_full"] ) - 273.15 ) - self[store_tmpb] = ( - self[store_gamma] + self["tmpb"] = ( + self.gamma / ( np.log(self.rst / self.rast) - + self[store_db] - - self[store_alpha] - + self[store_ta + "_bw_full"] + + self["db"] + - self.alpha + + self["talpha_bw_full"] ) - 273.15 ) - self[store_gamma + variance_suffix] = (tuple(), p_var[ip.gamma].item()) - self[store_alpha + variance_suffix] = (("x",), p_var[ip.alpha]) - self[store_df + variance_suffix] = ((time_dim,), p_var[ip.df]) - self[store_db + variance_suffix] = ((time_dim,), p_var[ip.db]) - self[store_ta + "_fw_full" + variance_suffix] = ( + self["gamma_var"] = (tuple(), p_var[ip.gamma].item()) + self["alpha_var"] = (("x",), p_var[ip.alpha]) + self["df_var"] = ((time_dim,), p_var[ip.df]) + self["db_var"] = ((time_dim,), p_var[ip.db]) + self["talpha_fw_full_var"] = ( ("x", time_dim), ip.get_taf_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) - self[store_ta + "_bw_full" + variance_suffix] = ( + self["talpha_bw_full_var"] = ( ("x", time_dim), ip.get_tab_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" @@ -3329,22 +3260,22 @@ def calibration_double_ended( ) # sigma2_tafw_tabw - tmpf = self[store_tmpf] + 273.15 - tmpb = self[store_tmpb] + 273.15 + tmpf = self["tmpf"] + 273.15 + tmpb = self["tmpb"] + 273.15 deriv_dict = dict( - T_gamma_fw=tmpf / self[store_gamma], - T_st_fw=-(tmpf ** 2) / (self[store_gamma] * self.st), - T_ast_fw=tmpf ** 2 / (self[store_gamma] * self.ast), - T_df_fw=-(tmpf ** 2) / self[store_gamma], - T_alpha_fw=-(tmpf ** 2) / self[store_gamma], - T_ta_fw=-(tmpf ** 2) / self[store_gamma], - T_gamma_bw=tmpb / self[store_gamma], - T_rst_bw=-(tmpb ** 2) / (self[store_gamma] * self.rst), - T_rast_bw=tmpb ** 2 / (self[store_gamma] * self.rast), - T_db_bw=-(tmpb ** 2) / self[store_gamma], - T_alpha_bw=tmpb ** 2 / self[store_gamma], - T_ta_bw=-(tmpb ** 2) / self[store_gamma], + T_gamma_fw=tmpf / self.gamma, + T_st_fw=-(tmpf ** 2) / (self.gamma * self.st), + T_ast_fw=tmpf ** 2 / (self.gamma * self.ast), + T_df_fw=-(tmpf ** 2) / self.gamma, + T_alpha_fw=-(tmpf ** 2) / self.gamma, + T_ta_fw=-(tmpf ** 2) / self.gamma, + T_gamma_bw=tmpb / self.gamma, + T_rst_bw=-(tmpb ** 2) / (self.gamma * self.rst), + T_rast_bw=tmpb ** 2 / (self.gamma * self.rast), + T_db_bw=-(tmpb ** 2) / self.gamma, + T_alpha_bw=tmpb ** 2 / self.gamma, + T_ta_bw=-(tmpb ** 2) / self.gamma, ) deriv_ds = xr.Dataset(deriv_dict) self["deriv"] = deriv_ds.to_array(dim="com2") @@ -3352,10 +3283,10 @@ def calibration_double_ended( var_fw_dict = dict( dT_dst=deriv_ds.T_st_fw ** 2 * parse_st_var(self, st_var, st_label="st"), dT_dast=deriv_ds.T_ast_fw ** 2 * parse_st_var(self, ast_var, st_label="ast"), - dT_gamma=deriv_ds.T_gamma_fw ** 2 * self[store_gamma + variance_suffix], - dT_ddf=deriv_ds.T_df_fw ** 2 * self[store_df + variance_suffix], - dT_dalpha=deriv_ds.T_alpha_fw ** 2 * self[store_alpha + variance_suffix], - dT_dta=deriv_ds.T_ta_fw ** 2 * self[store_ta + "_fw_full" + variance_suffix], + dT_gamma=deriv_ds.T_gamma_fw ** 2 * self["gamma_var"], + dT_ddf=deriv_ds.T_df_fw ** 2 * self["df_var"], + dT_dalpha=deriv_ds.T_alpha_fw ** 2 * self["alpha_var"], + dT_dta=deriv_ds.T_ta_fw ** 2 * self["talpha_fw_full_var"], dgamma_ddf=(2 * deriv_ds.T_gamma_fw * deriv_ds.T_df_fw * sigma2_gamma_df), dgamma_dalpha=( 2 * deriv_ds.T_gamma_fw * deriv_ds.T_alpha_fw * sigma2_gamma_alpha @@ -3370,10 +3301,10 @@ def calibration_double_ended( var_bw_dict = dict( dT_drst=deriv_ds.T_rst_bw ** 2 * parse_st_var(self, rst_var, st_label="rst"), dT_drast=deriv_ds.T_rast_bw ** 2 * parse_st_var(self, rast_var, st_label="rast"), - dT_gamma=deriv_ds.T_gamma_bw ** 2 * self[store_gamma + variance_suffix], - dT_ddb=deriv_ds.T_db_bw ** 2 * self[store_db + variance_suffix], - dT_dalpha=deriv_ds.T_alpha_bw ** 2 * self[store_alpha + variance_suffix], - dT_dta=deriv_ds.T_ta_bw ** 2 * self[store_ta + "_bw_full" + variance_suffix], + dT_gamma=deriv_ds.T_gamma_bw ** 2 * self["gamma_var"], + dT_ddb=deriv_ds.T_db_bw ** 2 * self["db_var"], + dT_dalpha=deriv_ds.T_alpha_bw ** 2 * self["alpha_var"], + dT_dta=deriv_ds.T_ta_bw ** 2 * self["talpha_bw_full_var"], dgamma_ddb=(2 * deriv_ds.T_gamma_bw * deriv_ds.T_db_bw * sigma2_gamma_db), dgamma_dalpha=( 2 * deriv_ds.T_gamma_bw * deriv_ds.T_alpha_bw * sigma2_gamma_alpha @@ -3387,19 +3318,19 @@ def calibration_double_ended( self["var_fw_da"] = xr.Dataset(var_fw_dict).to_array(dim="comp_fw") self["var_bw_da"] = xr.Dataset(var_bw_dict).to_array(dim="comp_bw") - self[store_tmpf + variance_suffix] = self["var_fw_da"].sum(dim="comp_fw") - self[store_tmpb + variance_suffix] = self["var_bw_da"].sum(dim="comp_bw") + self["tmpf_var"] = self["var_fw_da"].sum(dim="comp_fw") + self["tmpb_var"] = self["var_bw_da"].sum(dim="comp_bw") # First estimate of tmpw_var - self[store_tmpw + variance_suffix + "_approx"] = 1 / ( - 1 / self[store_tmpf + variance_suffix] + 1 / self[store_tmpb + variance_suffix] + self["tmpw_var" + "_approx"] = 1 / ( + 1 / self["tmpf_var"] + 1 / self["tmpb_var"] ) - self[store_tmpw] = ((tmpf / self[store_tmpf + variance_suffix] + - tmpb / self[store_tmpb + variance_suffix] - ) * self[store_tmpw + variance_suffix + "_approx"]) - 273.15 + self["tmpw"] = ((tmpf / self["tmpf_var"] + + tmpb / self["tmpb_var"] + ) * self["tmpw_var" + "_approx"]) - 273.15 - weightsf = self[store_tmpw + variance_suffix + "_approx"] / self[store_tmpf + variance_suffix] - weightsb = self[store_tmpw + variance_suffix + "_approx"] / self[store_tmpb + variance_suffix] + weightsf = self["tmpw_var" + "_approx"] / self["tmpf_var"] + weightsb = self["tmpw_var" + "_approx"] / self["tmpb_var"] deriv_dict2 = dict( T_gamma_w=weightsf * deriv_dict['T_gamma_fw'] + weightsb * deriv_dict['T_gamma_bw'], @@ -3421,12 +3352,12 @@ def calibration_double_ended( dT_dast=deriv_ds2.T_ast_w ** 2 * parse_st_var(self, ast_var, st_label="ast"), dT_drst=deriv_ds2.T_rst_w ** 2 * parse_st_var(self, rst_var, st_label="rst"), dT_drast=deriv_ds2.T_rast_w ** 2 * parse_st_var(self, rast_var, st_label="rast"), - dT_gamma=deriv_ds2.T_gamma_w ** 2 * self[store_gamma + variance_suffix], - dT_ddf=deriv_ds2.T_df_w ** 2 * self[store_df + variance_suffix], - dT_ddb=deriv_ds2.T_db_w ** 2 * self[store_db + variance_suffix], - dT_dalpha=deriv_ds2.T_alpha_w ** 2 * self[store_alpha + variance_suffix], - dT_dtaf=deriv_ds2.T_taf_w ** 2 * self[store_ta + "_fw_full" + variance_suffix], - dT_dtab=deriv_ds2.T_tab_w ** 2 * self[store_ta + "_bw_full" + variance_suffix], + dT_gamma=deriv_ds2.T_gamma_w ** 2 * self["gamma_var"], + dT_ddf=deriv_ds2.T_df_w ** 2 * self["df_var"], + dT_ddb=deriv_ds2.T_db_w ** 2 * self["db_var"], + dT_dalpha=deriv_ds2.T_alpha_w ** 2 * self["alpha_var"], + dT_dtaf=deriv_ds2.T_taf_w ** 2 * self["talpha_fw_full_var"], + dT_dtab=deriv_ds2.T_tab_w ** 2 * self["talpha_bw_full_var"], dgamma_ddf=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_df_w * sigma2_gamma_df, dgamma_ddb=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_db_w * sigma2_gamma_db, dgamma_dalpha=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_alpha_w * sigma2_gamma_alpha, @@ -3442,7 +3373,7 @@ def calibration_double_ended( # dtaf_dtab=2 * deriv_ds2.T_tab_w * deriv_ds2.T_tab_w * sigma2_tafw_tabw, ) self["var_w_da"] = xr.Dataset(var_w_dict).to_array(dim="comp_w") - self[store_tmpw + variance_suffix] = self["var_w_da"].sum(dim="comp_w") + self["tmpw_var"] = self["var_w_da"].sum(dim="comp_w") tmpf_var_excl_par = ( self["var_fw_da"].sel(comp_fw=["dT_dst", "dT_dast"]).sum(dim="comp_fw") @@ -3450,22 +3381,22 @@ def calibration_double_ended( tmpb_var_excl_par = ( self["var_bw_da"].sel(comp_bw=["dT_drst", "dT_drast"]).sum(dim="comp_bw") ) - self[store_tmpw + variance_suffix + "_lower"] = 1 / ( + self["tmpw_var" + "_lower"] = 1 / ( 1 / tmpf_var_excl_par + 1 / tmpb_var_excl_par ) - self[store_tmpf].attrs.update(_dim_attrs[("tmpf",)]) - self[store_tmpb].attrs.update(_dim_attrs[("tmpb",)]) - self[store_tmpw].attrs.update(_dim_attrs[("tmpw",)]) - self[store_tmpf + variance_suffix].attrs.update(_dim_attrs[("tmpf_var",)]) - self[store_tmpb + variance_suffix].attrs.update(_dim_attrs[("tmpb_var",)]) - self[store_tmpw + variance_suffix].attrs.update( + self["tmpf"].attrs.update(_dim_attrs[("tmpf",)]) + self["tmpb"].attrs.update(_dim_attrs[("tmpb",)]) + self["tmpw"].attrs.update(_dim_attrs[("tmpw",)]) + self["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) + self["tmpb_var"].attrs.update(_dim_attrs[("tmpb_var",)]) + self["tmpw_var"].attrs.update( _dim_attrs[("tmpw_var",)] ) - self[store_tmpw + variance_suffix + "_approx"].attrs.update( + self["tmpw_var" + "_approx"].attrs.update( _dim_attrs[("tmpw_var_approx",)] ) - self[store_tmpw + variance_suffix + "_lower"].attrs.update( + self["tmpw_var" + "_lower"].attrs.update( _dim_attrs[("tmpw_var_lower",)] ) @@ -3476,8 +3407,8 @@ def calibration_double_ended( for k in drop_vars: del self[k] - self[store_p_val] = (("params1",), p_val) - self[store_p_cov] = (("params1", "params2"), p_cov) + self["p_val"] = (("params1",), p_val) + self["p_cov"] = (("params1", "params2"), p_cov) pass @@ -3487,8 +3418,6 @@ def conf_int_single_ended( p_cov='p_cov', st_var=None, ast_var=None, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=None, mc_sample_size=100, da_random_state=None, @@ -3536,14 +3465,6 @@ def conf_int_single_ended( Or manually define a variance with a DataArray of the shape `ds.st.shape`, where the variance can be a function of time and/or x. Required if method is wls. - store_tmpf : str - Key of how to store the Forward calculated temperature. Is - calculated using the - forward Stokes and anti-Stokes observations. - store_tempvar : str - a string that is appended to the store_tmp_ keys. and the - variance is calculated - for those store_tmp_ keys conf_ints : iterable object of float A list with the confidence boundaries that are calculated. Valid values are between @@ -3693,13 +3614,13 @@ def conf_int_single_ended( self['ta_mc_arr'] = (('mc', 'x', time_dim), ta_arr) if fixed_alpha: - self[store_tmpf + '_mc_set'] = self['gamma_mc'] / ( + self['tmpf_mc_set'] = self['gamma_mc'] / ( ( np.log(self['r_st']) - np.log(self['r_ast']) + (self['c_mc'] + self['ta_mc_arr'])) + self['alpha_mc']) - 273.15 else: - self[store_tmpf + '_mc_set'] = self['gamma_mc'] / ( + self['tmpf_mc_set'] = self['gamma_mc'] / ( ( np.log(self['r_st']) - np.log(self['r_ast']) + (self['c_mc'] + self['ta_mc_arr'])) + @@ -3707,17 +3628,17 @@ def conf_int_single_ended( avg_dims = ['mc'] - avg_axis = self[store_tmpf + '_mc_set'].get_axis_num(avg_dims) + avg_axis = self['tmpf_mc_set'].get_axis_num(avg_dims) - self[store_tmpf + '_mc' + store_tempvar] = ( - self[store_tmpf + '_mc_set'] - self[store_tmpf]).var( + self['tmpf_mc_var'] = ( + self['tmpf_mc_set'] - self["tmpf"]).var( dim=avg_dims, ddof=1) if conf_ints: new_chunks = ( - (len(conf_ints),),) + self[store_tmpf + '_mc_set'].chunks[1:] + (len(conf_ints),),) + self['tmpf_mc_set'].chunks[1:] - qq = self[store_tmpf + '_mc_set'] + qq = self['tmpf_mc_set'] q = qq.data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), @@ -3725,12 +3646,12 @@ def conf_int_single_ended( drop_axis=avg_axis, # avg dimesnions are dropped from input arr new_axis=0) # The new CI dimension is added as first axis - self[store_tmpf + '_mc'] = (('CI', 'x', time_dim), q) + self['tmpf_mc'] = (('CI', 'x', time_dim), q) if mc_remove_set_flag: drop_var = [ 'gamma_mc', 'dalpha_mc', 'alpha_mc', 'c_mc', 'mc', 'r_st', - 'r_ast', store_tmpf + '_mc_set', 'ta_mc_arr'] + 'r_ast', 'tmpf_mc_set', 'ta_mc_arr'] for k in drop_var: if k in self: del self[k] @@ -3743,8 +3664,6 @@ def average_single_ended( p_cov='p_cov', st_var=None, ast_var=None, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=None, mc_sample_size=100, ci_avg_time_flag1=False, @@ -3787,20 +3706,6 @@ def average_single_ended( Or manually define a variance with a DataArray of the shape `ds.st.shape`, where the variance can be a function of time and/or x. Required if method is wls. - store_tmpf : str - Key of how to store the Forward calculated temperature. Is - calculated using the - forward Stokes and anti-Stokes observations. - store_tempvar : str - a string that is appended to the store_tmp_ keys. and the - variance is calculated - for those store_tmp_ keys - store_ta : str - Key of how transient attenuation parameters are stored. Default - is `talpha`. `_fw` and `_bw` is appended to for the forward and - backward parameters. The `transient_asym_att_x` is derived from - the `coords` of this DataArray. The `coords` of `ds[store_ta + - '_fw']` should be ('time', 'trans_att'). conf_ints : iterable object of float A list with the confidence boundaries that are calculated. Valid values are between @@ -3817,7 +3722,7 @@ def average_single_ended( measurement were to be taken, it would have this ci" (2) all measurements. So you can state "The temperature remained during the entire measurement period between these ci bounds". - Adds store_tmpw + '_avg1' and store_tmpw + '_mc_avg1_var' to the + Adds "tmpw" + '_avg1' and "tmpw" + '_mc_avg1_var' to the DataStore. If `conf_ints` are set, also the confidence intervals `_mc_avg1` are added to the DataStore. Works independently of the ci_avg_time_flag2 and ci_avg_x_flag. @@ -3831,7 +3736,7 @@ def average_single_ended( intervals. I hereby assume the temperature does not change over time and average all measurements to get a better estimate of the background temperature. - Adds store_tmpw + '_avg2' and store_tmpw + '_mc_avg2_var' to the + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the DataStore. If `conf_ints` are set, also the confidence intervals `_mc_avg2` are added to the DataStore. Works independently of the ci_avg_time_flag1 and ci_avg_x_flag. @@ -3851,7 +3756,7 @@ def average_single_ended( it would have this ci" (2) all measurement locations. So you can state "The temperature along the fiber remained between these ci bounds". - Adds store_tmpw + '_avgx1' and store_tmpw + '_mc_avgx1_var' to the + Adds "tmpw" + '_avgx1' and "tmpw" + '_mc_avgx1_var' to the DataStore. If `conf_ints` are set, also the confidence intervals `_mc_avgx1` are added to the DataStore. Works independently of the ci_avg_time_flag1, ci_avg_time_flag2 and ci_avg_x2_flag. @@ -3866,7 +3771,7 @@ def average_single_ended( other parts of the fiber. And I would like to average the measurements from multiple locations to improve the estimated temperature. - Adds store_tmpw + '_avg2' and store_tmpw + '_mc_avg2_var' to the + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the DataStore. If `conf_ints` are set, also the confidence intervals `_mc_avg2` are added to the DataStore. Works independently of the ci_avg_time_flag1 and ci_avg_x_flag. @@ -3906,8 +3811,6 @@ def average_single_ended( p_cov=p_cov, st_var=st_var, ast_var=ast_var, - store_tmpf=store_tmpf, - store_tempvar=store_tempvar, conf_ints=None, mc_sample_size=mc_sample_size, da_random_state=da_random_state, @@ -3924,13 +3827,13 @@ def average_single_ended( (time_dim2,), self[time_dim].sel(**{ time_dim: ci_avg_time_sel}).data) - self[store_tmpf + '_avgsec'] = ( + self['tmpf_avgsec'] = ( ('x', time_dim2), - self[store_tmpf].sel(**{ + self["tmpf"].sel(**{ time_dim: ci_avg_time_sel}).data) - self[store_tmpf + '_mc_set'] = ( + self['tmpf_mc_set'] = ( ('mc', 'x', time_dim2), - self[store_tmpf + self["tmpf" + '_mc_set'].sel(**{ time_dim: ci_avg_time_sel}).data) @@ -3941,13 +3844,13 @@ def average_single_ended( (time_dim2,), self[time_dim].isel(**{ time_dim: ci_avg_time_isel}).data) - self[store_tmpf + '_avgsec'] = ( + self['tmpf_avgsec'] = ( ('x', time_dim2), - self[store_tmpf].isel(**{ + self["tmpf"].isel(**{ time_dim: ci_avg_time_isel}).data) - self[store_tmpf + '_mc_set'] = ( + self['tmpf_mc_set'] = ( ('mc', 'x', time_dim2), - self[store_tmpf + self["tmpf" + '_mc_set'].isel(**{ time_dim: ci_avg_time_isel}).data) @@ -3955,79 +3858,79 @@ def average_single_ended( time_dim2 = time_dim x_dim2 = 'x_avg' self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) - self[store_tmpf + '_avgsec'] = ( - (x_dim2, time_dim), self[store_tmpf].sel(x=ci_avg_x_sel).data) - self[store_tmpf + '_mc_set'] = ( + self['tmpf_avgsec'] = ( + (x_dim2, time_dim), self["tmpf"].sel(x=ci_avg_x_sel).data) + self['tmpf_mc_set'] = ( ('mc', x_dim2, time_dim), - self[store_tmpf + '_mc_set'].sel(x=ci_avg_x_sel).data) + self['tmpf_mc_set'].sel(x=ci_avg_x_sel).data) elif ci_avg_x_isel is not None: time_dim2 = time_dim x_dim2 = 'x_avg' self.coords[x_dim2] = ( (x_dim2,), self.x.isel(x=ci_avg_x_isel).data) - self[store_tmpf + '_avgsec'] = ( + self['tmpf_avgsec'] = ( (x_dim2, time_dim2), - self[store_tmpf].isel(x=ci_avg_x_isel).data) - self[store_tmpf + '_mc_set'] = ( + self["tmpf"].isel(x=ci_avg_x_isel).data) + self['tmpf_mc_set'] = ( ('mc', x_dim2, time_dim2), - self[store_tmpf + '_mc_set'].isel(x=ci_avg_x_isel).data) + self['tmpf_mc_set'].isel(x=ci_avg_x_isel).data) else: - self[store_tmpf + '_avgsec'] = self[store_tmpf] + self['tmpf_avgsec'] = self["tmpf"] x_dim2 = 'x' time_dim2 = time_dim # subtract the mean temperature - q = self[store_tmpf + '_mc_set'] - self[store_tmpf + '_avgsec'] - self[store_tmpf + '_mc' + '_avgsec' + store_tempvar] = ( + q = self['tmpf_mc_set'] - self['tmpf_avgsec'] + self['tmpf_mc' + '_avgsec_var'] = ( q.var(dim='mc', ddof=1)) if ci_avg_x_flag1: # unweighted mean - self[store_tmpf + '_avgx1'] = self[store_tmpf + self['tmpf_avgx1'] = self["tmpf" + '_avgsec'].mean(dim=x_dim2) - q = self[store_tmpf + '_mc_set'] - self[store_tmpf + '_avgsec'] + q = self['tmpf_mc_set'] - self['tmpf_avgsec'] qvar = q.var(dim=['mc', x_dim2], ddof=1) - self[store_tmpf + '_mc_avgx1' + store_tempvar] = qvar + self['tmpf_mc_avgx1_var'] = qvar if conf_ints: new_chunks = ( - len(conf_ints), self[store_tmpf + '_mc_set'].chunks[2]) - avg_axis = self[store_tmpf + '_mc_set'].get_axis_num( + len(conf_ints), self['tmpf_mc_set'].chunks[2]) + avg_axis = self['tmpf_mc_set'].get_axis_num( ['mc', x_dim2]) - q = self[store_tmpf + '_mc_set'].data.map_blocks( + q = self['tmpf_mc_set'].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr new_axis=0) # The new CI dim is added as firsaxis - self[store_tmpf + '_mc_avgx1'] = (('CI', time_dim2), q) + self['tmpf_mc_avgx1'] = (('CI', time_dim2), q) if ci_avg_x_flag2: - q = self[store_tmpf + '_mc_set'] - self[store_tmpf + '_avgsec'] + q = self['tmpf_mc_set'] - self['tmpf_avgsec'] qvar = q.var(dim=['mc'], ddof=1) # Inverse-variance weighting avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - self[store_tmpf + '_mc_avgx2' + store_tempvar] = avg_x_var + self['tmpf_mc_avgx2_var'] = avg_x_var - self[store_tmpf - + '_mc_avgx2_set'] = (self[store_tmpf + '_mc_set'] + self["tmpf" + + '_mc_avgx2_set'] = (self['tmpf_mc_set'] / qvar).sum(dim=x_dim2) * avg_x_var - self[store_tmpf - + '_avgx2'] = self[store_tmpf + self["tmpf" + + '_avgx2'] = self["tmpf" + '_mc_avgx2_set'].mean(dim='mc') if conf_ints: new_chunks = ( - len(conf_ints), self[store_tmpf + '_mc_set'].chunks[2]) - avg_axis_avgx = self[store_tmpf + '_mc_set'].get_axis_num('mc') + len(conf_ints), self['tmpf_mc_set'].chunks[2]) + avg_axis_avgx = self['tmpf_mc_set'].get_axis_num('mc') - qq = self[store_tmpf + '_mc_avgx2_set'].data.map_blocks( + qq = self['tmpf_mc_avgx2_set'].data.map_blocks( lambda x: np.percentile( x, q=conf_ints, axis=avg_axis_avgx), chunks=new_chunks, # @@ -4036,53 +3939,53 @@ def average_single_ended( new_axis=0, dtype=float) # The new CI dimension is added as # firsaxis - self[store_tmpf + '_mc_avgx2'] = (('CI', time_dim2), qq) + self['tmpf_mc_avgx2'] = (('CI', time_dim2), qq) if ci_avg_time_flag1 is not None: # unweighted mean - self[store_tmpf + '_avg1'] = self[store_tmpf + self['tmpf_avg1'] = self["tmpf" + '_avgsec'].mean(dim=time_dim2) - q = self[store_tmpf + '_mc_set'] - self[store_tmpf + '_avgsec'] + q = self['tmpf_mc_set'] - self['tmpf_avgsec'] qvar = q.var(dim=['mc', time_dim2], ddof=1) - self[store_tmpf + '_mc_avg1' + store_tempvar] = qvar + self['tmpf_mc_avg1_var'] = qvar if conf_ints: new_chunks = ( - len(conf_ints), self[store_tmpf + '_mc_set'].chunks[1]) - avg_axis = self[store_tmpf + '_mc_set'].get_axis_num( + len(conf_ints), self['tmpf_mc_set'].chunks[1]) + avg_axis = self['tmpf_mc_set'].get_axis_num( ['mc', time_dim2]) - q = self[store_tmpf + '_mc_set'].data.map_blocks( + q = self['tmpf_mc_set'].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr new_axis=0) # The new CI dim is added as firsaxis - self[store_tmpf + '_mc_avg1'] = (('CI', x_dim2), q) + self['tmpf_mc_avg1'] = (('CI', x_dim2), q) if ci_avg_time_flag2: - q = self[store_tmpf + '_mc_set'] - self[store_tmpf + '_avgsec'] + q = self['tmpf_mc_set'] - self['tmpf_avgsec'] qvar = q.var(dim=['mc'], ddof=1) # Inverse-variance weighting avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - self[store_tmpf + '_mc_avg2' + store_tempvar] = avg_time_var + self['tmpf_mc_avg2_var'] = avg_time_var - self[store_tmpf - + '_mc_avg2_set'] = (self[store_tmpf + '_mc_set'] / qvar).sum( + self["tmpf" + + '_mc_avg2_set'] = (self['tmpf_mc_set'] / qvar).sum( dim=time_dim2) * avg_time_var - self[store_tmpf + '_avg2'] = self[store_tmpf + self['tmpf_avg2'] = self["tmpf" + '_mc_avg2_set'].mean(dim='mc') if conf_ints: new_chunks = ( - len(conf_ints), self[store_tmpf + '_mc_set'].chunks[1]) - avg_axis_avg2 = self[store_tmpf + '_mc_set'].get_axis_num('mc') + len(conf_ints), self['tmpf_mc_set'].chunks[1]) + avg_axis_avg2 = self['tmpf_mc_set'].get_axis_num('mc') - qq = self[store_tmpf + '_mc_avg2_set'].data.map_blocks( + qq = self['tmpf_mc_avg2_set'].data.map_blocks( lambda x: np.percentile( x, q=conf_ints, axis=avg_axis_avg2), chunks=new_chunks, # @@ -4091,17 +3994,17 @@ def average_single_ended( new_axis=0, dtype=float) # The new CI dimension is added as # firsaxis - self[store_tmpf + '_mc_avg2'] = (('CI', x_dim2), qq) + self['tmpf_mc_avg2'] = (('CI', x_dim2), qq) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: remove_mc_set = [ 'r_st', 'r_ast', 'gamma_mc', 'dalpha_mc', 'c_mc', 'x_avg', 'time_avg', 'mc', 'ta_mc_arr'] - remove_mc_set.append(store_tmpf + '_avgsec') - remove_mc_set.append(store_tmpf + '_mc_set') - remove_mc_set.append(store_tmpf + '_mc_avg2_set') - remove_mc_set.append(store_tmpf + '_mc_avgx2_set') - remove_mc_set.append(store_tmpf + '_mc_avgsec' + store_tempvar) + remove_mc_set.append('tmpf_avgsec') + remove_mc_set.append('tmpf_mc_set') + remove_mc_set.append('tmpf_mc_avg2_set') + remove_mc_set.append('tmpf_mc_avgx2_set') + remove_mc_set.append('tmpf_mc_avgsec_var') for k in remove_mc_set: if k in self: @@ -4112,15 +4015,10 @@ def conf_int_double_ended( self, p_val='p_val', p_cov='p_cov', - store_ta=None, st_var=None, ast_var=None, rst_var=None, rast_var=None, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', conf_ints=None, mc_sample_size=100, var_only_sections=False, @@ -4214,32 +4112,6 @@ def conf_int_double_ended( Or manually define a variance with a DataArray of the shape `ds.st.shape`, where the variance can be a function of time and/or x. Required if method is wls. - store_tmpf : str - Key of how to store the Forward calculated temperature. Is - calculated using the - forward Stokes and anti-Stokes observations. - store_tmpb : str - Key of how to store the Backward calculated temperature. Is - calculated using the - backward Stokes and anti-Stokes observations. - store_tmpw : str - Key of how to store the forward-backward-weighted temperature. - First, the variance of - tmpf and tmpb are calculated. The Monte Carlo set of tmpf and - tmpb are averaged, - weighted by their variance. The median of this set is thought to - be the a reasonable - estimate of the temperature - store_tempvar : str - a string that is appended to the store_tmp_ keys. and the - variance is calculated - for those store_tmp_ keys - store_ta : str - Key of how transient attenuation parameters are stored. Default - is `talpha`. `_fw` and `_bw` is appended to for the forward and - backward parameters. The `transient_asym_att_x` is derived from - the `coords` of this DataArray. The `coords` of `ds[store_ta + - '_fw']` should be ('time', 'trans_att'). conf_ints : iterable object of float A list with the confidence boundaries that are calculated. Valid values are between [0, 1]. @@ -4313,34 +4185,14 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): time_dim = self.get_time_dim(data_var_key='st') - del_tmpf_after, del_tmpb_after = False, False - - if store_tmpw and not store_tmpf: - if store_tmpf in self: - del_tmpf_after = True - store_tmpf = 'tmpf' - if store_tmpw and not store_tmpb: - if store_tmpb in self: - del_tmpb_after = True - store_tmpb = 'tmpb' - if conf_ints: - assert store_tmpw, 'Current implementation requires you to ' \ - 'define store_tmpw when istimating confidence ' \ + assert "tmpw", 'Current implementation requires you to ' \ + 'define "tmpw" when estimating confidence ' \ 'intervals' no, nt = self.st.shape - npar = 1 + 2 * nt + no # number of parameters - - if store_ta: - ta_dim = [ - i for i in self[store_ta + '_fw'].dims if i != time_dim][0] - tax = self[ta_dim].values - nta = tax.size - npar += nt * 2 * nta - - else: - nta = 0 + nta = self.trans_att.size + npar = 1 + 2 * nt + no + nt * 2 * nta # number of parameters rsize = (mc_sample_size, no, nt) @@ -4364,7 +4216,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): assert isinstance(p_val, (str, np.ndarray, np.generic)) if isinstance(p_val, str): p_val = self[p_val].values - assert p_val.shape == (npar,), "Did you set `store_ta='talpha'` as " \ + assert p_val.shape == (npar,), "Did you set 'talpha' as " \ "keyword argument of the " \ "conf_int_double_ended() function?" @@ -4382,23 +4234,23 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): self['df_mc'] = ((time_dim,), d_fw) self['db_mc'] = ((time_dim,), d_bw) - if store_ta: + if nta: ta = p_val[2 * nt + 1 + no:].reshape((nt, 2, nta), order='F') ta_fw = ta[:, 0, :] ta_bw = ta[:, 1, :] ta_fw_arr = np.zeros((no, nt)) - for tai, taxi in zip(ta_fw.T, self.coords[ta_dim].values): + for tai, taxi in zip(ta_fw.T, self.coords["trans_att"].values): ta_fw_arr[self.x.values >= taxi] = \ ta_fw_arr[self.x.values >= taxi] + tai ta_bw_arr = np.zeros((no, nt)) - for tai, taxi in zip(ta_bw.T, self.coords[ta_dim].values): + for tai, taxi in zip(ta_bw.T, self.coords["trans_att"].values): ta_bw_arr[self.x.values < taxi] = \ ta_bw_arr[self.x.values < taxi] + tai - self[store_ta + '_fw_mc'] = (('x', time_dim), ta_fw_arr) - self[store_ta + '_bw_mc'] = (('x', time_dim), ta_bw_arr) + self['talpha_fw_mc'] = (('x', time_dim), ta_fw_arr) + self['talpha_bw_mc'] = (('x', time_dim), ta_bw_arr) elif isinstance(p_cov, bool) and p_cov: raise NotImplementedError( @@ -4452,7 +4304,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): self['alpha_mc'] = (('mc', 'x'), alpha) - if store_ta: + if nta: ta = po_mc[:, 2 * nt + 1 + nx_sec:].reshape( (mc_sample_size, nt, 2, nta), order='F') ta_fw = ta[:, :, 0, :] @@ -4461,7 +4313,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): ta_fw_arr = da.zeros( (mc_sample_size, no, nt), chunks=memchunk, dtype=float) for tai, taxi in zip(ta_fw.swapaxes(0, 2), - self.coords[ta_dim].values): + self.coords["trans_att"].values): # iterate over the splices i_splice = sum(self.x.values < taxi) mask = create_da_ta2( @@ -4472,15 +4324,15 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): ta_bw_arr = da.zeros( (mc_sample_size, no, nt), chunks=memchunk, dtype=float) for tai, taxi in zip(ta_bw.swapaxes(0, 2), - self.coords[ta_dim].values): + self.coords["trans_att"].values): i_splice = sum(self.x.values < taxi) mask = create_da_ta2( no, i_splice, direction='bw', chunks=memchunk) ta_bw_arr += mask * tai.T[:, None, :] - self[store_ta + '_fw_mc'] = (('mc', 'x', time_dim), ta_fw_arr) - self[store_ta + '_bw_mc'] = (('mc', 'x', time_dim), ta_bw_arr) + self['talpha_fw_mc'] = (('mc', 'x', time_dim), ta_fw_arr) + self['talpha_bw_mc'] = (('mc', 'x', time_dim), ta_bw_arr) # Draw from the normal distributions for the Stokes intensities for k, st_labeli, st_vari in zip(['r_st', 'r_ast', 'r_rst', 'r_rast'], @@ -4527,27 +4379,26 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): size=rsize, chunks=memchunk)) - for label, del_label in zip([store_tmpf, store_tmpb], - [del_tmpf_after, del_tmpb_after]): - if store_tmpw or label: - if label == store_tmpf: - if store_ta: - self[store_tmpf + '_mc_set'] = self['gamma_mc'] / ( + for label in ["tmpf", "tmpb"]: + if "tmpw" or label: + if label == "tmpf": + if nta: + self['tmpf_mc_set'] = self['gamma_mc'] / ( np.log(self['r_st'] / self['r_ast']) + self['df_mc'] + self['alpha_mc'] - + self[store_ta + '_fw_mc']) - 273.15 + + self['talpha_fw_mc']) - 273.15 else: - self[store_tmpf + '_mc_set'] = self['gamma_mc'] / ( + self['tmpf_mc_set'] = self['gamma_mc'] / ( np.log(self['r_st'] / self['r_ast']) + self['df_mc'] + self['alpha_mc']) - 273.15 else: - if store_ta: - self[store_tmpb + '_mc_set'] = self['gamma_mc'] / ( + if nta: + self['tmpb_mc_set'] = self['gamma_mc'] / ( np.log(self['r_rst'] / self['r_rast']) + self['db_mc'] - self['alpha_mc'] - + self[store_ta + '_bw_mc']) - 273.15 + + self['talpha_bw_mc']) - 273.15 else: - self[store_tmpb + '_mc_set'] = self['gamma_mc'] / ( + self['tmpb_mc_set'] = self['gamma_mc'] / ( np.log(self['r_rst'] / self['r_rast']) + self['db_mc'] - self['alpha_mc']) - 273.15 @@ -4563,9 +4414,9 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): # subtract the mean temperature q = self[label + '_mc_set'] - self[label] - self[label + '_mc' + store_tempvar] = (q.var(dim='mc', ddof=1)) + self[label + '_mc_var'] = (q.var(dim='mc', ddof=1)) - if conf_ints and not del_label: + if conf_ints: new_chunks = list(self[label + '_mc_set'].chunks) new_chunks[0] = (len(conf_ints),) avg_axis = self[label + '_mc_set'].get_axis_num('mc') @@ -4580,38 +4431,38 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): # Weighted mean of the forward and backward tmpw_var = 1 / ( - 1 / self[store_tmpf + '_mc' + store_tempvar] - + 1 / self[store_tmpb + '_mc' + store_tempvar]) + 1 / self['tmpf_mc_var'] + + 1 / self['tmpb_mc_var']) q = ( - self[store_tmpf + '_mc_set'] - / self[store_tmpf + '_mc' + store_tempvar] - + self[store_tmpb + '_mc_set'] - / self[store_tmpb + '_mc' + store_tempvar]) * tmpw_var - - self[store_tmpw + '_mc_set'] = q # - - self[store_tmpw] = \ - (self[store_tmpf] / - self[store_tmpf + '_mc' + store_tempvar] + - self[store_tmpb] / - self[store_tmpb + '_mc' + store_tempvar] + self['tmpf_mc_set'] + / self['tmpf_mc_var'] + + self['tmpb_mc_set'] + / self['tmpb_mc_var']) * tmpw_var + + self["tmpw" + '_mc_set'] = q # + + self["tmpw"] = \ + (self["tmpf"] / + self['tmpf_mc_var'] + + self["tmpb"] / + self['tmpb_mc_var'] ) * tmpw_var - q = self[store_tmpw + '_mc_set'] - self[store_tmpw] - self[store_tmpw + '_mc' + store_tempvar] = q.var(dim='mc', ddof=1) + q = self["tmpw" + '_mc_set'] - self["tmpw"] + self["tmpw" + '_mc_var'] = q.var(dim='mc', ddof=1) # Calculate the CI of the weighted MC_set if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + memchunk[1:] - avg_axis = self[store_tmpw + '_mc_set'].get_axis_num('mc') - q2 = self[store_tmpw + '_mc_set'].data.map_blocks( + avg_axis = self["tmpw" + '_mc_set'].get_axis_num('mc') + q2 = self["tmpw" + '_mc_set'].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks drop_axis=avg_axis, # avg dimensions are dropped new_axis=0, dtype=float) # The new CI dimension is added as first axis - self[store_tmpw + '_mc'] = (('CI', 'x', time_dim), q2) + self["tmpw" + '_mc'] = (('CI', 'x', time_dim), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: @@ -4619,36 +4470,26 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): 'r_st', 'r_ast', 'r_rst', 'r_rast', 'gamma_mc', 'alpha_mc', 'df_mc', 'db_mc'] - for i in [store_tmpf, store_tmpb, store_tmpw]: + for i in ["tmpf", "tmpb", "tmpw"]: remove_mc_set.append(i + '_mc_set') - if store_ta: - remove_mc_set.append(store_ta + '_fw_mc') - remove_mc_set.append(store_ta + '_bw_mc') + if nta: + remove_mc_set.append('talpha"_fw_mc') + remove_mc_set.append('talpha"_bw_mc') for k in remove_mc_set: if k in self: del self[k] - - if del_tmpf_after: - del self['tmpf'] - if del_tmpb_after: - del self['tmpb'] pass def average_double_ended( self, p_val='p_val', p_cov='p_cov', - store_ta=None, st_var=None, ast_var=None, rst_var=None, rast_var=None, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', conf_ints=None, mc_sample_size=100, ci_avg_time_flag1=False, @@ -4689,32 +4530,6 @@ def average_double_ended( Or manually define a variance with a DataArray of the shape `ds.st.shape`, where the variance can be a function of time and/or x. Required if method is wls. - store_tmpf : str - Key of how to store the Forward calculated temperature. Is - calculated using the - forward Stokes and anti-Stokes observations. - store_tmpb : str - Key of how to store the Backward calculated temperature. Is - calculated using the - backward Stokes and anti-Stokes observations. - store_tmpw : str - Key of how to store the forward-backward-weighted temperature. - First, the variance of - tmpf and tmpb are calculated. The Monte Carlo set of tmpf and - tmpb are averaged, - weighted by their variance. The median of this set is thought to - be the a reasonable - estimate of the temperature - store_tempvar : str - a string that is appended to the store_tmp_ keys. and the - variance is calculated - for those store_tmp_ keys - store_ta : str - Key of how transient attenuation parameters are stored. Default - is `talpha`. `_fw` and `_bw` is appended to for the forward and - backward parameters. The `transient_asym_att_x` is derived from - the `coords` of this DataArray. The `coords` of `ds[store_ta + - '_fw']` should be ('time', 'trans_att'). conf_ints : iterable object of float A list with the confidence boundaries that are calculated. Valid values are between @@ -4731,7 +4546,7 @@ def average_double_ended( measurement were to be taken, it would have this ci" (2) all measurements. So you can state "The temperature remained during the entire measurement period between these ci bounds". - Adds store_tmpw + '_avg1' and store_tmpw + '_mc_avg1_var' to the + Adds "tmpw" + '_avg1' and "tmpw" + '_mc_avg1_var' to the DataStore. If `conf_ints` are set, also the confidence intervals `_mc_avg1` are added to the DataStore. Works independently of the ci_avg_time_flag2 and ci_avg_x_flag. @@ -4745,7 +4560,7 @@ def average_double_ended( intervals. I hereby assume the temperature does not change over time and average all measurements to get a better estimate of the background temperature. - Adds store_tmpw + '_avg2' and store_tmpw + '_mc_avg2_var' to the + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the DataStore. If `conf_ints` are set, also the confidence intervals `_mc_avg2` are added to the DataStore. Works independently of the ci_avg_time_flag1 and ci_avg_x_flag. @@ -4765,7 +4580,7 @@ def average_double_ended( it would have this ci" (2) all measurement locations. So you can state "The temperature along the fiber remained between these ci bounds". - Adds store_tmpw + '_avgx1' and store_tmpw + '_mc_avgx1_var' to the + Adds "tmpw" + '_avgx1' and "tmpw" + '_mc_avgx1_var' to the DataStore. If `conf_ints` are set, also the confidence intervals `_mc_avgx1` are added to the DataStore. Works independently of the ci_avg_time_flag1, ci_avg_time_flag2 and ci_avg_x2_flag. @@ -4780,7 +4595,7 @@ def average_double_ended( other parts of the fiber. And I would like to average the measurements from multiple locations to improve the estimated temperature. - Adds store_tmpw + '_avg2' and store_tmpw + '_mc_avg2_var' to the + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the DataStore. If `conf_ints` are set, also the confidence intervals `_mc_avg2` are added to the DataStore. Works independently of the ci_avg_time_flag1 and ci_avg_x_flag. @@ -4851,15 +4666,10 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): self.conf_int_double_ended( p_val=p_val, p_cov=p_cov, - store_ta=store_ta, st_var=st_var, ast_var=ast_var, rst_var=rst_var, rast_var=rast_var, - store_tmpf=store_tmpf, - store_tmpb=store_tmpb, - store_tmpw=store_tmpw, - store_tempvar=store_tempvar, conf_ints=None, mc_sample_size=mc_sample_size, da_random_state=da_random_state, @@ -4869,7 +4679,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): time_dim = self.get_time_dim(data_var_key='st') - for label in [store_tmpf, store_tmpb]: + for label in ["tmpf", "tmpb"]: if ci_avg_time_sel is not None: time_dim2 = time_dim + '_avg' x_dim2 = 'x' @@ -4934,7 +4744,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): # subtract the mean temperature q = self[label + '_mc_set'] - self[label + '_avgsec'] - self[label + '_mc' + '_avgsec' + store_tempvar] = ( + self[label + '_mc' + '_avgsec_var'] = ( q.var(dim='mc', ddof=1)) if ci_avg_x_flag1: @@ -4944,7 +4754,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): q = self[label + '_mc_set'] - self[label + '_avgsec'] qvar = q.var(dim=['mc', x_dim2], ddof=1) - self[label + '_mc_avgx1' + store_tempvar] = qvar + self[label + '_mc_avgx1_var'] = qvar if conf_ints: new_chunks = ( @@ -4968,7 +4778,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): # Inverse-variance weighting avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - self[label + '_mc_avgx2' + store_tempvar] = avg_x_var + self[label + '_mc_avgx2_var'] = avg_x_var self[label + '_mc_avgx2_set'] = (self[label + '_mc_set'] @@ -4999,7 +4809,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): q = self[label + '_mc_set'] - self[label + '_avgsec'] qvar = q.var(dim=['mc', time_dim2], ddof=1) - self[label + '_mc_avg1' + store_tempvar] = qvar + self[label + '_mc_avg1_var'] = qvar if conf_ints: new_chunks = ( @@ -5023,7 +4833,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): # Inverse-variance weighting avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - self[label + '_mc_avg2' + store_tempvar] = avg_time_var + self[label + '_mc_avg2_var'] = avg_time_var self[label + '_mc_avg2_set'] = (self[label + '_mc_set'] / qvar).sum( @@ -5049,41 +4859,41 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): # Weighted mean of the forward and backward tmpw_var = 1 / ( - 1 / self[store_tmpf + '_mc' + '_avgsec' + store_tempvar] - + 1 / self[store_tmpb + '_mc' + '_avgsec' + store_tempvar]) + 1 / self['tmpf_mc' + '_avgsec_var'] + + 1 / self['tmpb_mc' + '_avgsec_var']) q = ( - self[store_tmpf + '_mc_set'] - / self[store_tmpf + '_mc' + '_avgsec' + store_tempvar] - + self[store_tmpb + '_mc_set'] - / self[store_tmpb + '_mc' + '_avgsec' + store_tempvar]) * tmpw_var - - self[store_tmpw + '_mc_set'] = q # - - # self[store_tmpw] = self[store_tmpw + '_mc_set'].mean(dim='mc') - self[store_tmpw + '_avgsec'] = \ - (self[store_tmpf + '_avgsec'] / - self[store_tmpf + '_mc' + '_avgsec' + store_tempvar] + - self[store_tmpb + '_avgsec'] / - self[store_tmpb + '_mc' + '_avgsec' + store_tempvar] + self['tmpf_mc_set'] + / self['tmpf_mc' + '_avgsec_var'] + + self['tmpb_mc_set'] + / self['tmpb_mc' + '_avgsec_var']) * tmpw_var + + self["tmpw" + '_mc_set'] = q # + + # self["tmpw"] = self["tmpw" + '_mc_set'].mean(dim='mc') + self["tmpw" + '_avgsec'] = \ + (self['tmpf_avgsec'] / + self['tmpf_mc' + '_avgsec_var'] + + self['tmpb_avgsec'] / + self['tmpb_mc' + '_avgsec_var'] ) * tmpw_var - q = self[store_tmpw + '_mc_set'] - self[store_tmpw + '_avgsec'] - self[store_tmpw + '_mc' + '_avgsec' + store_tempvar] = q.var( + q = self["tmpw" + '_mc_set'] - self["tmpw" + '_avgsec'] + self["tmpw" + '_mc' + '_avgsec_var'] = q.var( dim='mc', ddof=1) if ci_avg_time_flag1: - self[store_tmpw + '_avg1'] = \ - self[store_tmpw + '_avgsec'].mean(dim=time_dim2) + self["tmpw" + '_avg1'] = \ + self["tmpw" + '_avgsec'].mean(dim=time_dim2) - self[store_tmpw + '_mc_avg1' + store_tempvar] = \ - self[store_tmpw + '_mc_set'].var(dim=['mc', time_dim2]) + self["tmpw" + '_mc_avg1_var'] = \ + self["tmpw" + '_mc_set'].var(dim=['mc', time_dim2]) if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis = self[store_tmpw + '_mc_set'].get_axis_num( + avg_axis = self["tmpw" + '_mc_set'].get_axis_num( ['mc', time_dim2]) - q2 = self[store_tmpw + '_mc_set'].data.map_blocks( + q2 = self["tmpw" + '_mc_set'].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks @@ -5091,37 +4901,37 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): new_axis=0, dtype=float) # The new CI dimension is added as # first axis - self[store_tmpw + '_mc_avg1'] = (('CI', x_dim2), q2) + self["tmpw" + '_mc_avg1'] = (('CI', x_dim2), q2) if ci_avg_time_flag2: tmpw_var_avg2 = 1 / ( - 1 / self[store_tmpf + '_mc_avg2' + store_tempvar] - + 1 / self[store_tmpb + '_mc_avg2' + store_tempvar]) + 1 / self['tmpf_mc_avg2_var'] + + 1 / self['tmpb_mc_avg2_var']) - q = (self[store_tmpf + '_mc_avg2_set'] / - self[store_tmpf + '_mc_avg2' + store_tempvar] + - self[store_tmpb + '_mc_avg2_set'] / - self[store_tmpb + '_mc_avg2' + store_tempvar]) * \ + q = (self['tmpf_mc_avg2_set'] / + self['tmpf_mc_avg2_var'] + + self['tmpb_mc_avg2_set'] / + self['tmpb_mc_avg2_var']) * \ tmpw_var_avg2 - self[store_tmpw + '_mc_avg2_set'] = q # + self["tmpw" + '_mc_avg2_set'] = q # - self[store_tmpw + '_avg2'] = \ - (self[store_tmpf + '_avg2'] / - self[store_tmpf + '_mc_avg2' + store_tempvar] + - self[store_tmpb + '_avg2'] / - self[store_tmpb + '_mc_avg2' + store_tempvar] + self["tmpw" + '_avg2'] = \ + (self['tmpf_avg2'] / + self['tmpf_mc_avg2_var'] + + self['tmpb_avg2'] / + self['tmpb_mc_avg2_var'] ) * tmpw_var_avg2 - self[store_tmpw + '_mc_avg2' + store_tempvar] = \ + self["tmpw" + '_mc_avg2_var'] = \ tmpw_var_avg2 if conf_ints: # We first need to know the x-dim-chunk-size new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis_avg2 = self[store_tmpw + avg_axis_avg2 = self["tmpw" + '_mc_avg2_set'].get_axis_num('mc') - q2 = self[store_tmpw + '_mc_avg2_set'].data.map_blocks( + q2 = self["tmpw" + '_mc_avg2_set'].data.map_blocks( lambda x: np.percentile( x, q=conf_ints, axis=avg_axis_avg2), chunks=new_chunks_weighted, @@ -5129,20 +4939,20 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): drop_axis=avg_axis_avg2, # avg dimensions are dropped new_axis=0, dtype=float) # The new CI dimension is added as firstax - self[store_tmpw + '_mc_avg2'] = (('CI', x_dim2), q2) + self["tmpw" + '_mc_avg2'] = (('CI', x_dim2), q2) if ci_avg_x_flag1: - self[store_tmpw + '_avgx1'] = \ - self[store_tmpw + '_avgsec'].mean(dim=x_dim2) + self["tmpw" + '_avgx1'] = \ + self["tmpw" + '_avgsec'].mean(dim=x_dim2) - self[store_tmpw + '_mc_avgx1' + store_tempvar] = \ - self[store_tmpw + '_mc_set'].var(dim=x_dim2) + self["tmpw" + '_mc_avgx1_var'] = \ + self["tmpw" + '_mc_set'].var(dim=x_dim2) if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis = self[store_tmpw + '_mc_set'].get_axis_num( + avg_axis = self["tmpw" + '_mc_set'].get_axis_num( ['mc', x_dim2]) - q2 = self[store_tmpw + '_mc_set'].data.map_blocks( + q2 = self["tmpw" + '_mc_set'].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks @@ -5150,37 +4960,37 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): new_axis=0, dtype=float) # The new CI dimension is added as # first axis - self[store_tmpw + '_mc_avgx1'] = (('CI', time_dim2), q2) + self["tmpw" + '_mc_avgx1'] = (('CI', time_dim2), q2) if ci_avg_x_flag2: tmpw_var_avgx2 = 1 / ( - 1 / self[store_tmpf + '_mc_avgx2' + store_tempvar] - + 1 / self[store_tmpb + '_mc_avgx2' + store_tempvar]) + 1 / self['tmpf_mc_avgx2_var'] + + 1 / self['tmpb_mc_avgx2_var']) - q = (self[store_tmpf + '_mc_avgx2_set'] / - self[store_tmpf + '_mc_avgx2' + store_tempvar] + - self[store_tmpb + '_mc_avgx2_set'] / - self[store_tmpb + '_mc_avgx2' + store_tempvar]) * \ + q = (self['tmpf_mc_avgx2_set'] / + self['tmpf_mc_avgx2_var'] + + self['tmpb_mc_avgx2_set'] / + self['tmpb_mc_avgx2_var']) * \ tmpw_var_avgx2 - self[store_tmpw + '_mc_avgx2_set'] = q # + self["tmpw" + '_mc_avgx2_set'] = q # - self[store_tmpw + '_avgx2'] = \ - (self[store_tmpf + '_avgx2'] / - self[store_tmpf + '_mc_avgx2' + store_tempvar] + - self[store_tmpb + '_avgx2'] / - self[store_tmpb + '_mc_avgx2' + store_tempvar] + self["tmpw" + '_avgx2'] = \ + (self['tmpf_avgx2'] / + self['tmpf_mc_avgx2_var'] + + self['tmpb_avgx2'] / + self['tmpb_mc_avgx2_var'] ) * tmpw_var_avgx2 - self[store_tmpw + '_mc_avgx2' + store_tempvar] = \ + self["tmpw" + '_mc_avgx2_var'] = \ tmpw_var_avgx2 if conf_ints: # We first need to know the x-dim-chunk-size new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis_avgx2 = self[store_tmpw + avg_axis_avgx2 = self["tmpw" + '_mc_avgx2_set'].get_axis_num('mc') - q2 = self[store_tmpw + '_mc_avgx2_set'].data.map_blocks( + q2 = self["tmpw" + '_mc_avgx2_set'].data.map_blocks( lambda x: np.percentile( x, q=conf_ints, axis=avg_axis_avgx2), chunks=new_chunks_weighted, @@ -5188,7 +4998,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): drop_axis=avg_axis_avgx2, # avg dimensions are dropped new_axis=0, dtype=float) # The new CI dimension is added as firstax - self[store_tmpw + '_mc_avgx2'] = (('CI', time_dim2), q2) + self["tmpw" + '_mc_avgx2'] = (('CI', time_dim2), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: @@ -5196,16 +5006,16 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): 'r_st', 'r_ast', 'r_rst', 'r_rast', 'gamma_mc', 'alpha_mc', 'df_mc', 'db_mc', 'x_avg', 'time_avg', 'mc'] - for i in [store_tmpf, store_tmpb, store_tmpw]: + for i in ["tmpf", "tmpb", "tmpw"]: remove_mc_set.append(i + '_avgsec') remove_mc_set.append(i + '_mc_set') remove_mc_set.append(i + '_mc_avg2_set') remove_mc_set.append(i + '_mc_avgx2_set') - remove_mc_set.append(i + '_mc_avgsec' + store_tempvar) + remove_mc_set.append(i + '_mc_avgsec_var') - if store_ta: - remove_mc_set.append(store_ta + '_fw_mc') - remove_mc_set.append(store_ta + '_bw_mc') + if self.trans_att.size: + remove_mc_set.append('talpha"_fw_mc') + remove_mc_set.append('talpha"_bw_mc') for k in remove_mc_set: if k in self: @@ -5516,14 +5326,14 @@ class ParameterIndexDoubleEnded: d_bw = pval[1 + nt:1 + 2 * nt] alpha = pval[1 + 2 * nt:1 + 2 * nt + nx] # store calibration parameters in DataStore - self[store_gamma] = (tuple(), gamma) - self[store_alpha] = (('x',), alpha) - self[store_df] = ((time_dim,), d_fw) - self[store_db] = ((time_dim,), d_bw) + self["gamma"] = (tuple(), gamma) + self["alpha"] = (('x',), alpha) + self["df"] = ((time_dim,), d_fw) + self["db"] = ((time_dim,), d_bw) if nta > 0: ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self[store_ta + '_fw'] = ((time_dim, 'trans_att'), ta[:, 0, :]) - self[store_ta + '_bw'] = ((time_dim, 'trans_att'), ta[:, 1, :]) + self['talpha_fw'] = ((time_dim, 'trans_att'), ta[:, 0, :]) + self['talpha_bw'] = ((time_dim, 'trans_att'), ta[:, 1, :]) """ def __init__(self, nt, nx, nta, fix_gamma=False, fix_alpha=False): @@ -5604,7 +5414,7 @@ def taf(self): """ Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self[store_ta + '_fw'] = ((time_dim, 'trans_att'), ta[:, 0, :]) + self['talpha_fw'] = ((time_dim, 'trans_att'), ta[:, 0, :]) """ return self.ta[:, 0, :].flatten(order="C") @@ -5613,7 +5423,7 @@ def tab(self): """ Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self[store_ta + '_bw'] = ((time_dim, 'trans_att'), ta[:, 1, :]) + self['talpha_bw'] = ((time_dim, 'trans_att'), ta[:, 1, :]) """ return self.ta[:, 1, :].flatten(order="C") @@ -5781,7 +5591,7 @@ def c(self): def taf(self): """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" # ta = p_val[-nt * nta:].reshape((nt, nta), order='F') - # self[store_ta] = ((time_dim, 'trans_att'), ta[:, :]) + # self["talpha"] = ((time_dim, 'trans_att'), ta[:, :]) if self.includes_alpha: return np.arange(1 + self.nx + self.nt, 1 + self.nx + self.nt + self.nt * self.nta ).reshape((self.nt, self.nta), order='F') diff --git a/tests/test_dtscalibration.py b/tests/test_dtscalibration.py index 5dbb7774..edfa6111 100644 --- a/tests/test_dtscalibration.py +++ b/tests/test_dtscalibration.py @@ -529,10 +529,6 @@ def test_double_ended_variance_estimate_synthetic(): ast_var=mast_var, rst_var=mrst_var, rast_var=mrast_var, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', conf_ints=[2.5, 50., 97.5], mc_sample_size=100, da_random_state=state) @@ -642,8 +638,6 @@ def test_single_ended_variance_estimate_synthetic(): p_cov='p_cov', st_var=mst_var, ast_var=mast_var, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=[2.5, 50., 97.5], mc_sample_size=50, da_random_state=state) @@ -914,8 +908,7 @@ def test_double_ended_wls_estimate_synthetic(): rst_var=1e-7, rast_var=1e-7, method='wls', - solver='sparse', - mc_sample_size=5) + solver='sparse') assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=10) assert_almost_equal_verbose(ds.alpha.values, alpha, decimal=8) @@ -1011,9 +1004,7 @@ def test_double_ended_wls_estimate_synthetic_df_and_db_are_different(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=1000, - fix_gamma=(gamma, 0.), - mc_remove_set_flag=True) + fix_gamma=(gamma, 0.)) assert_almost_equal_verbose(df, ds.df.values, decimal=14) assert_almost_equal_verbose(db, ds.db.values, decimal=13) @@ -1113,9 +1104,7 @@ def test_reneaming_old_default_labels_to_new_fixed_labels(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=1000, - fix_gamma=(gamma, 0.), - mc_remove_set_flag=True) + fix_gamma=(gamma, 0.)) assert_almost_equal_verbose(df, ds.df.values, decimal=14) assert_almost_equal_verbose(db, ds.db.values, decimal=13) @@ -1292,8 +1281,6 @@ def test_double_ended_asymmetrical_attenuation(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=1000, - mc_remove_set_flag=True, trans_att=[50.]) assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) @@ -1322,8 +1309,6 @@ def test_double_ended_asymmetrical_attenuation(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=1000, - mc_remove_set_flag=True, transient_asym_att_x=[50.]) assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) @@ -1410,8 +1395,6 @@ def test_double_ended_one_matching_section_and_one_asym_att(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=3, - mc_remove_set_flag=True, trans_att=[50.], matching_sections=[ ( @@ -1516,8 +1499,6 @@ def test_double_ended_two_matching_sections_and_two_asym_atts(): rast_var=0.1, method='wls', solver='sparse', - mc_sample_size=3, - mc_remove_set_flag=True, trans_att=[x[3 * nx_per_sec], x[6 * nx_per_sec]], matching_sections=ms) @@ -1599,7 +1580,6 @@ def test_double_ended_wls_fix_gamma_estimate_synthetic(): rast_var=1e-12, method='wls', solver='sparse', - mc_sample_size=5, fix_gamma=(gamma, 0.)) assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=18) @@ -1678,7 +1658,6 @@ def test_double_ended_wls_fix_alpha_estimate_synthetic(): rast_var=1e-7, method='wls', solver='sparse', - mc_sample_size=5, fix_alpha=(alpha, np.zeros_like(alpha))) assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=8) @@ -1757,7 +1736,6 @@ def test_double_ended_wls_fix_alpha_fix_gamma_estimate_synthetic(): rast_var=1e-7, method='wls', solver='sparse', - mc_sample_size=5, fix_gamma=(gamma, 0.), fix_alpha=(alpha, np.zeros_like(alpha))) @@ -1848,8 +1826,6 @@ def test_double_ended_fix_alpha_matching_sections_and_one_asym_att(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=3, - mc_remove_set_flag=True, trans_att=[50.], matching_sections=[ ( @@ -1872,8 +1848,6 @@ def test_double_ended_fix_alpha_matching_sections_and_one_asym_att(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=3, - mc_remove_set_flag=True, fix_alpha=(alpha_adj, alpha_var_adj), trans_att=[50.], matching_sections=[ @@ -1965,8 +1939,6 @@ def test_double_ended_fix_alpha_gamma_matching_sections_and_one_asym_att(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=3, - mc_remove_set_flag=True, trans_att=[50.], matching_sections=[ ( @@ -1989,8 +1961,6 @@ def test_double_ended_fix_alpha_gamma_matching_sections_and_one_asym_att(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=3, - mc_remove_set_flag=True, fix_alpha=(alpha_adj, alpha_var_adj), fix_gamma=(gamma, 0.), trans_att=[50.], @@ -2083,9 +2053,7 @@ def test_double_ended_fix_gamma_matching_sections_and_one_asym_att(): rast_var=1., method='wls', solver='sparse', - mc_sample_size=3, fix_gamma=(gamma, 0.), - mc_remove_set_flag=True, trans_att=[50.], matching_sections=[ ( @@ -2205,10 +2173,6 @@ def test_double_ended_exponential_variance_estimate_synthetic(): ast_var=mast_var, rst_var=mrst_var, rast_var=mrast_var, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', conf_ints=[2.5, 50., 97.5], mc_sample_size=100, da_random_state=state) @@ -2323,8 +2287,7 @@ def test_estimate_variance_of_temperature_estimate(): # fix_gamma=(gamma, 0.), # fix_alpha=(alpha, 0. * alpha), method='wls', - solver='stats', - mc_sample_size=nmc) + solver='stats') ds.conf_int_double_ended( p_val='p_val', @@ -2333,10 +2296,6 @@ def test_estimate_variance_of_temperature_estimate(): ast_var=mast_var, rst_var=mrst_var, rast_var=mrast_var, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', # conf_ints=[20., 80.], mc_sample_size=nmc, da_random_state=state, @@ -3002,8 +2961,6 @@ def test_single_ended_matching_sections_synthetic(): p_cov='p_cov', st_var=1.0, ast_var=1.0, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=[2.5, 50., 97.5], mc_sample_size=50) @@ -3106,8 +3063,6 @@ def test_single_ended_exponential_variance_estimate_synthetic(): p_cov='p_cov', st_var=mst_var, ast_var=mast_var, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=[2.5, 50., 97.5], mc_sample_size=50, da_random_state=state) @@ -3197,8 +3152,6 @@ def test_average_measurements_single_ended(): p_cov='p_cov', st_var=st_var, ast_var=ast_var, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=[2.5, 97.5], mc_sample_size=50, # <- choose a much larger sample size ci_avg_x_flag1=True, @@ -3209,8 +3162,6 @@ def test_average_measurements_single_ended(): p_cov='p_cov', st_var=st_var, ast_var=ast_var, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=[2.5, 97.5], mc_sample_size=50, # <- choose a much larger sample size ci_avg_x_flag2=True, @@ -3223,8 +3174,6 @@ def test_average_measurements_single_ended(): p_cov='p_cov', st_var=st_var, ast_var=ast_var, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=[2.5, 97.5], mc_sample_size=50, # <- choose a much larger sample size ci_avg_time_flag1=True, @@ -3235,8 +3184,6 @@ def test_average_measurements_single_ended(): p_cov='p_cov', st_var=st_var, ast_var=ast_var, - store_tmpf='tmpf', - store_tempvar='_var', conf_ints=[2.5, 97.5], mc_sample_size=50, # <- choose a much larger sample size ci_avg_time_flag1=False, @@ -3267,7 +3214,6 @@ def test_average_measurements_double_ended(): ast_var=ast_var, rst_var=rst_var, rast_var=rast_var, - store_tmpw='tmpw', method='wls', solver='sparse') ds.average_double_ended( @@ -3277,10 +3223,6 @@ def test_average_measurements_double_ended(): ast_var=ast_var, rst_var=rst_var, rast_var=rast_var, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', conf_ints=[2.5, 97.5], mc_sample_size=50, # <- choose a much larger sample size ci_avg_x_flag1=True, @@ -3293,10 +3235,6 @@ def test_average_measurements_double_ended(): ast_var=ast_var, rst_var=rst_var, rast_var=rast_var, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', conf_ints=[2.5, 97.5], mc_sample_size=50, # <- choose a much larger sample size ci_avg_x_flag2=True, @@ -3311,10 +3249,6 @@ def test_average_measurements_double_ended(): ast_var=ast_var, rst_var=rst_var, rast_var=rast_var, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', conf_ints=[2.5, 97.5], mc_sample_size=50, # <- choose a much larger sample size ci_avg_time_flag1=True, @@ -3327,10 +3261,6 @@ def test_average_measurements_double_ended(): ast_var=ast_var, rst_var=rst_var, rast_var=rast_var, - store_tmpf='tmpf', - store_tmpb='tmpb', - store_tmpw='tmpw', - store_tempvar='_var', conf_ints=[2.5, 97.5], mc_sample_size=50, # <- choose a much larger sample size ci_avg_time_flag1=False, From 520c075ce7d39fee75433b05f84929b36b58c82b Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 13:39:33 +0200 Subject: [PATCH 07/14] Update 08Calibrate_double_ended.ipynb --- docs/notebooks/08Calibrate_double_ended.ipynb | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/notebooks/08Calibrate_double_ended.ipynb b/docs/notebooks/08Calibrate_double_ended.ipynb index 189c7cbe..c76fcdf4 100644 --- a/docs/notebooks/08Calibrate_double_ended.ipynb +++ b/docs/notebooks/08Calibrate_double_ended.ipynb @@ -241,8 +241,6 @@ " ast_var=ast_var,\n", " rst_var=rst_var,\n", " rast_var=rast_var,\n", - " mc_sample_size=10000, # Optional\n", - " mc_conf_ints=[2.5, 97.5], # Optional\n", ")" ] }, From e327b6863d94108d8054410f2e142c425ab48afb Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 13:45:44 +0200 Subject: [PATCH 08/14] Update 14Lossy_splices.ipynb --- docs/notebooks/14Lossy_splices.ipynb | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/notebooks/14Lossy_splices.ipynb b/docs/notebooks/14Lossy_splices.ipynb index 29fd99c0..37c2f3fe 100644 --- a/docs/notebooks/14Lossy_splices.ipynb +++ b/docs/notebooks/14Lossy_splices.ipynb @@ -122,7 +122,7 @@ "ds.isel(time=0).ast.plot(label=\"ast\")\n", "ds.isel(time=0).rst.plot(label=\"rst\")\n", "ds.isel(time=0).rast.plot(label=\"rast\")\n", - "plt.legend()" + "plt.legend();" ] }, { @@ -157,12 +157,11 @@ " ast_var=ast_var,\n", " rst_var=rst_var,\n", " rast_var=rast_var,\n", - " store_tmpw=\"tmpw\",\n", " method=\"wls\",\n", " solver=\"sparse\",\n", ")\n", "\n", - "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\")" + "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\");" ] }, { @@ -198,14 +197,13 @@ " rst_var=rst_var,\n", " rast_var=rast_var,\n", " trans_att=[50.0],\n", - " store_tmpw=\"tmpw\",\n", " method=\"wls\",\n", " solver=\"sparse\",\n", ")\n", "\n", "ds_a.isel(time=0).tmpw.plot(label=\"no trans. att.\")\n", "ds.isel(time=0).tmpw.plot(label=\"with trans. att.\")\n", - "plt.legend()" + "plt.legend();" ] }, { @@ -218,7 +216,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, From a115b56bf341ad8ec316f8ce139845b1f91277d8 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 24 Jul 2023 14:02:28 +0200 Subject: [PATCH 09/14] Updated notebooks 14, 15, and 16 --- docs/notebooks/14Lossy_splices.ipynb | 6 +----- docs/notebooks/15Matching_sections.ipynb | 12 +++--------- docs/notebooks/16Averaging_temperatures.ipynb | 7 ++----- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/docs/notebooks/14Lossy_splices.ipynb b/docs/notebooks/14Lossy_splices.ipynb index 37c2f3fe..b7959031 100644 --- a/docs/notebooks/14Lossy_splices.ipynb +++ b/docs/notebooks/14Lossy_splices.ipynb @@ -157,8 +157,6 @@ " ast_var=ast_var,\n", " rst_var=rst_var,\n", " rast_var=rast_var,\n", - " method=\"wls\",\n", - " solver=\"sparse\",\n", ")\n", "\n", "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\");" @@ -197,8 +195,6 @@ " rst_var=rst_var,\n", " rast_var=rast_var,\n", " trans_att=[50.0],\n", - " method=\"wls\",\n", - " solver=\"sparse\",\n", ")\n", "\n", "ds_a.isel(time=0).tmpw.plot(label=\"no trans. att.\")\n", @@ -230,7 +226,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.11" + "version": "3.10.10" } }, "nbformat": 4, diff --git a/docs/notebooks/15Matching_sections.ipynb b/docs/notebooks/15Matching_sections.ipynb index 5cb355d0..c62ca7c3 100644 --- a/docs/notebooks/15Matching_sections.ipynb +++ b/docs/notebooks/15Matching_sections.ipynb @@ -132,10 +132,7 @@ " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", - " rast_var=rast_var,\n", - " store_tmpw=\"tmpw\",\n", - " method=\"wls\",\n", - " solver=\"sparse\",\n", + " rast_var=rast_var\n", ")\n", "\n", "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\")" @@ -183,9 +180,6 @@ " rast_var=rast_var,\n", " trans_att=[50.0],\n", " matching_sections=matching_sections,\n", - " store_tmpw=\"tmpw\",\n", - " method=\"wls\",\n", - " solver=\"sparse\",\n", ")\n", "\n", "ds_a.isel(time=0).tmpw.plot(label=\"normal calibration\")\n", @@ -203,7 +197,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -217,7 +211,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.11" + "version": "3.10.10" } }, "nbformat": 4, diff --git a/docs/notebooks/16Averaging_temperatures.ipynb b/docs/notebooks/16Averaging_temperatures.ipynb index 33126b58..6079bd9f 100644 --- a/docs/notebooks/16Averaging_temperatures.ipynb +++ b/docs/notebooks/16Averaging_temperatures.ipynb @@ -104,9 +104,6 @@ " ast_var=ast_var,\n", " rst_var=rst_var,\n", " rast_var=rast_var,\n", - " store_tmpw=\"tmpw\",\n", - " method=\"wls\",\n", - " solver=\"sparse\",\n", ")" ] }, @@ -386,7 +383,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -400,7 +397,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.11" + "version": "3.10.10" } }, "nbformat": 4, From 544d61fc7147806175344d60cb1cd1ade680edc0 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 27 Jul 2023 20:20:24 +0200 Subject: [PATCH 10/14] Also standardize x and time dim names --- src/dtscalibration/calibrate_utils.py | 28 ++-- src/dtscalibration/datastore.py | 231 ++++++++++---------------- src/dtscalibration/plot.py | 48 +++--- 3 files changed, 122 insertions(+), 185 deletions(-) diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index cd79ed48..377935f9 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -633,11 +633,11 @@ def calibration_double_ended_solver( # noqa: MC0001 # put E outside of reference section in solution # concatenating makes a copy of the data instead of using a pointer ds_sub = ds[['st', 'ast', 'rst', 'rast', 'trans_att']] - time_dim = ds_sub.get_time_dim() - ds_sub['df'] = ((time_dim,), p_sol[1:1 + nt]) - ds_sub['df_var'] = ((time_dim,), p_var[1:1 + nt]) - ds_sub['db'] = ((time_dim,), p_sol[1 + nt:1 + 2 * nt]) - ds_sub['db_var'] = ((time_dim,), p_var[1 + nt:1 + 2 * nt]) + 'time' = ds_sub.get_time_dim() + ds_sub['df'] = (('time',), p_sol[1:1 + nt]) + ds_sub['df_var'] = (('time',), p_var[1:1 + nt]) + ds_sub['db'] = (('time',), p_sol[1 + nt:1 + 2 * nt]) + ds_sub['db_var'] = (('time',), p_var[1 + nt:1 + 2 * nt]) E_all_exact, E_all_var_exact = calc_alpha_double( 'exact', ds_sub, @@ -1238,7 +1238,7 @@ def calc_alpha_double( assert ix_alpha_is_zero >= 0, 'Define ix_alpha_is_zero' + \ str(ix_alpha_is_zero) - time_dim = ds.get_time_dim() + 'time' = ds.get_time_dim() if st_var is not None: if callable(st_var): @@ -1280,8 +1280,8 @@ def calc_alpha_double( # Can be improved by including covariances. That reduces the # uncert. - ta_arr_fw = np.zeros((ds.x.size, ds[time_dim].size)) - ta_arr_fw_var = np.zeros((ds.x.size, ds[time_dim].size)) + ta_arr_fw = np.zeros((ds.x.size, ds['time'].size)) + ta_arr_fw_var = np.zeros((ds.x.size, ds['time'].size)) for tai, taxi, tai_var in zip( talpha_fw.T, ds.trans_att.values, talpha_fw_var.T): ta_arr_fw[ds.x.values >= taxi] = \ @@ -1289,8 +1289,8 @@ def calc_alpha_double( ta_arr_fw_var[ds.x.values >= taxi] = \ ta_arr_fw_var[ds.x.values >= taxi] + tai_var - ta_arr_bw = np.zeros((ds.x.size, ds[time_dim].size)) - ta_arr_bw_var = np.zeros((ds.x.size, ds[time_dim].size)) + ta_arr_bw = np.zeros((ds.x.size, ds['time'].size)) + ta_arr_bw_var = np.zeros((ds.x.size, ds['time'].size)) for tai, taxi, tai_var in zip( talpha_bw.T, ds.trans_att.values, talpha_bw_var.T): ta_arr_bw[ds.x.values < taxi] = \ @@ -1308,8 +1308,8 @@ def calc_alpha_double( A_var = (i_var_fw + i_var_bw + D_B_var + D_F_var) / 2 A = (i_bw - i_fw) / 2 + (D_B - D_F) / 2 - E_var = 1 / (1 / A_var).sum(dim=time_dim) - E = (A / A_var).sum(dim=time_dim) * E_var + E_var = 1 / (1 / A_var).sum(dim='time') + E = (A / A_var).sum(dim='time') * E_var else: i_fw = np.log(ds.st / ds.ast) @@ -1322,8 +1322,8 @@ def calc_alpha_double( D_B = ds[D_B_label] A = (i_bw - i_fw) / 2 + (D_B - D_F) / 2 - E_var = A.var(dim=time_dim) - E = A.mean(dim=time_dim) + E_var = A.var(dim='time') + E = A.mean(dim='time') # E is defined zero at the first index of the reference sections if mode == 'guess': diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index fe648b20..2582170d 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -103,11 +103,10 @@ def __init__(self, *args, autofill_dim_attrs=True, **kwargs): ideal_dim.append('x') all_dim.pop(all_dim.index('x')) - time_dim = self.get_time_dim() - if time_dim: - if time_dim in all_dim: - ideal_dim.append(time_dim) - all_dim.pop(all_dim.index(time_dim)) + if 'time': + if 'time' in all_dim: + ideal_dim.append('time') + all_dim.pop(all_dim.index('time')) ideal_dim += all_dim @@ -370,8 +369,7 @@ def timeseries_keys(self): """ Returns the keys of all timeseires that can be used for calibration. """ - time_dim = self.get_time_dim() - return [k for k, v in self.data_vars.items() if v.dims == (time_dim,)] + return [k for k, v in self.data_vars.items() if v.dims == ('time',)] def to_netcdf( self, @@ -702,43 +700,6 @@ def get_default_encoding(self, time_chunks_from_key=None): return encoding - def get_time_dim(self, data_var_key=None): - """ - Find relevant time dimension. by educative guessing - - Parameters - ---------- - data_var_key : str - The data variable key that contains a relevant time dimension. If - None, 'st' is used. - - Returns - ------- - - """ - options = [ - 'date', 'time', 'day', 'days', 'hour', 'hours', 'minute', - 'minutes', 'second', 'seconds'] - if data_var_key is None: - if 'st' in self.data_vars: - data_var_key = 'st' - elif 'st' in self.data_vars: - data_var_key = 'st' - else: - return 'time' - - dims = self[data_var_key].dims - # find all dims in options - in_opt = [next(filter(lambda s: s == d, options), None) for d in dims] - - if in_opt and in_opt != [None]: - # exclude Nones from list - return next(filter(None, in_opt)) - - else: - # there is no time dimension - return None - def get_section_indices(self, sec): """Returns the x-indices of the section. `sec` is a slice.""" xis = self.x.astype(int) * 0 + np.arange(self.x.size, dtype=int) @@ -1719,8 +1680,6 @@ def check_reference_section_values(self): ------- """ - time_dim = self.get_time_dim() - for key in self.sections.keys(): if not np.issubdtype(self[key].dtype, np.floating): raise ValueError( @@ -1733,7 +1692,7 @@ def check_reference_section_values(self): 'NaN/inf value(s) found in reference temperature "' + key + '"') - if self[key].dims != (time_dim,): + if self[key].dims != ('time',): raise ValueError( 'Time dimension of the reference temperature timeseries ' + key + 'is not the same as the time dimension' @@ -1902,8 +1861,7 @@ def calibration_single_ended( self.check_reference_section_values() nx = self.x.size - time_dim = self.get_time_dim() - nt = self[time_dim].size + nt = self['time'].size nta = self.trans_att.size assert self.st.dims[0] == "x", "Stokes are transposed" @@ -2083,19 +2041,19 @@ def calibration_single_ended( if nta > 0: self["talpha_fw"] = ( - ("trans_att", time_dim), + ("trans_att", 'time'), ip.get_taf_pars(p_val), ) self["talpha_fw_var"] = ( - ("trans_att", time_dim), + ("trans_att", 'time'), ip.get_taf_pars(p_var), ) else: - self["talpha_fw"] = (("trans_att", time_dim), np.zeros((0, nt))) - self["talpha_fw_var"] = (("trans_att", time_dim), np.zeros((0, nt))) + self["talpha_fw"] = (("trans_att", 'time'), np.zeros((0, nt))) + self["talpha_fw_var"] = (("trans_att", 'time'), np.zeros((0, nt))) - self["c"] = ((time_dim,), p_val[ip.c]) - self["c_var"] = ((time_dim,), p_var[ip.c]) + self["c"] = (('time',), p_val[ip.c]) + self["c_var"] = (('time',), p_var[ip.c]) if fix_alpha: self["alpha"] = (("x",), fix_alpha[0]) @@ -2111,13 +2069,13 @@ def calibration_single_ended( ) self["talpha_fw_full"] = ( - ("x", time_dim), + ("x", 'time'), ip.get_taf_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) self["talpha_fw_full_var"] = ( - ("x", time_dim), + ("x", 'time'), ip.get_taf_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" ), @@ -2444,8 +2402,7 @@ def calibration_double_ended( self.check_reference_section_values() nx = self.x.size - time_dim = self.get_time_dim() - nt = self[time_dim].size + nt = self['time'].size nta = self.trans_att.size ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") nx_sec = ix_sec.size @@ -3125,40 +3082,40 @@ def calibration_double_ended( # save estimates and variances to datastore, skip covariances self["gamma"] = (tuple(), p_val[ip.gamma].item()) self["alpha"] = (("x",), p_val[ip.alpha]) - self["df"] = ((time_dim,), p_val[ip.df]) - self["db"] = ((time_dim,), p_val[ip.db]) + self["df"] = (('time',), p_val[ip.df]) + self["db"] = (('time',), p_val[ip.db]) if nta: self["talpha_fw"] = ( - (time_dim, "trans_att"), + ('time', "trans_att"), p_val[ip.taf].reshape((nt, nta), order="C"), ) self["talpha_bw"] = ( - (time_dim, "trans_att"), + ('time', "trans_att"), p_val[ip.tab].reshape((nt, nta), order="C"), ) self["talpha_fw_var"] = ( - (time_dim, "trans_att"), + ('time', "trans_att"), p_var[ip.taf].reshape((nt, nta), order="C"), ) self["talpha_bw_var"] = ( - (time_dim, "trans_att"), + ('time', "trans_att"), p_var[ip.tab].reshape((nt, nta), order="C"), ) else: - self["talpha_fw"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) - self["talpha_bw"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) - self["talpha_fw_var"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) - self["talpha_bw_var"] = ((time_dim, "trans_att"), np.zeros((nt, 0))) + self["talpha_fw"] = (('time', "trans_att"), np.zeros((nt, 0))) + self["talpha_bw"] = (('time', "trans_att"), np.zeros((nt, 0))) + self["talpha_fw_var"] = (('time', "trans_att"), np.zeros((nt, 0))) + self["talpha_bw_var"] = (('time', "trans_att"), np.zeros((nt, 0))) self["talpha_fw_full"] = ( - ("x", time_dim), + ("x", 'time'), ip.get_taf_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) self["talpha_bw_full"] = ( - ("x", time_dim), + ("x", 'time'), ip.get_tab_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), @@ -3188,16 +3145,16 @@ def calibration_double_ended( self["gamma_var"] = (tuple(), p_var[ip.gamma].item()) self["alpha_var"] = (("x",), p_var[ip.alpha]) - self["df_var"] = ((time_dim,), p_var[ip.df]) - self["db_var"] = ((time_dim,), p_var[ip.db]) + self["df_var"] = (('time',), p_var[ip.df]) + self["db_var"] = (('time',), p_var[ip.db]) self["talpha_fw_full_var"] = ( - ("x", time_dim), + ("x", 'time'), ip.get_taf_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) self["talpha_bw_full_var"] = ( - ("x", time_dim), + ("x", 'time'), ip.get_tab_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" ), @@ -3497,8 +3454,6 @@ def conf_int_single_ended( else: state = da.random.RandomState() - time_dim = self.get_time_dim(data_var_key='st') - no, nt = self.st.data.shape if 'trans_att' in self.keys(): nta = self.trans_att.size @@ -3534,15 +3489,15 @@ def conf_int_single_ended( if fixed_alpha: self['alpha_mc'] = (('mc', 'x'), p_mc[:, 1:no + 1]) - self['c_mc'] = (('mc', time_dim), p_mc[:, 1 + no:1 + no + nt]) + self['c_mc'] = (('mc', 'time'), p_mc[:, 1 + no:1 + no + nt]) else: self['dalpha_mc'] = (('mc',), p_mc[:, 1]) - self['c_mc'] = (('mc', time_dim), p_mc[:, 2:nt + 2]) + self['c_mc'] = (('mc', 'time'), p_mc[:, 2:nt + 2]) self['gamma_mc'] = (('mc',), p_mc[:, 0]) if nta: self['ta_mc'] = ( - ('mc', 'trans_att', time_dim), + ('mc', 'trans_att', 'time'), np.reshape(p_mc[:, -nt * nta:], (mc_sample_size, nta, nt))) rsize = (self.mc.size, self.x.size, self.time.size) @@ -3597,7 +3552,7 @@ def conf_int_single_ended( st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) self[k] = ( - ('mc', 'x', time_dim), + ('mc', 'x', 'time'), state.normal( loc=loc, # has chunks=memchunk[1:] scale=st_vari_da**0.5, @@ -3611,7 +3566,7 @@ def conf_int_single_ended( for tai, taxi in zip(ta.values, self.trans_att.values): ta_arr[ii, self.x.values >= taxi] = \ ta_arr[ii, self.x.values >= taxi] + tai - self['ta_mc_arr'] = (('mc', 'x', time_dim), ta_arr) + self['ta_mc_arr'] = (('mc', 'x', 'time'), ta_arr) if fixed_alpha: self['tmpf_mc_set'] = self['gamma_mc'] / ( @@ -3646,7 +3601,7 @@ def conf_int_single_ended( drop_axis=avg_axis, # avg dimesnions are dropped from input arr new_axis=0) # The new CI dimension is added as first axis - self['tmpf_mc'] = (('CI', 'x', time_dim), q) + self['tmpf_mc'] = (('CI', 'x', 'time'), q) if mc_remove_set_flag: drop_var = [ @@ -3818,54 +3773,52 @@ def average_single_ended( reduce_memory_usage=reduce_memory_usage, **kwargs) - time_dim = self.get_time_dim(data_var_key='st') - if ci_avg_time_sel is not None: - time_dim2 = time_dim + '_avg' + time_dim2 = 'time' + '_avg' x_dim2 = 'x' self.coords[time_dim2] = ( (time_dim2,), - self[time_dim].sel(**{ - time_dim: ci_avg_time_sel}).data) + self['time'].sel(**{ + 'time': ci_avg_time_sel}).data) self['tmpf_avgsec'] = ( ('x', time_dim2), self["tmpf"].sel(**{ - time_dim: ci_avg_time_sel}).data) + 'time': ci_avg_time_sel}).data) self['tmpf_mc_set'] = ( ('mc', 'x', time_dim2), self["tmpf" + '_mc_set'].sel(**{ - time_dim: ci_avg_time_sel}).data) + 'time': ci_avg_time_sel}).data) elif ci_avg_time_isel is not None: - time_dim2 = time_dim + '_avg' + time_dim2 = 'time' + '_avg' x_dim2 = 'x' self.coords[time_dim2] = ( (time_dim2,), - self[time_dim].isel(**{ - time_dim: ci_avg_time_isel}).data) + self['time'].isel(**{ + 'time': ci_avg_time_isel}).data) self['tmpf_avgsec'] = ( ('x', time_dim2), self["tmpf"].isel(**{ - time_dim: ci_avg_time_isel}).data) + 'time': ci_avg_time_isel}).data) self['tmpf_mc_set'] = ( ('mc', 'x', time_dim2), self["tmpf" + '_mc_set'].isel(**{ - time_dim: ci_avg_time_isel}).data) + 'time': ci_avg_time_isel}).data) elif ci_avg_x_sel is not None: - time_dim2 = time_dim + time_dim2 = 'time' x_dim2 = 'x_avg' self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) self['tmpf_avgsec'] = ( - (x_dim2, time_dim), self["tmpf"].sel(x=ci_avg_x_sel).data) + (x_dim2, 'time'), self["tmpf"].sel(x=ci_avg_x_sel).data) self['tmpf_mc_set'] = ( - ('mc', x_dim2, time_dim), + ('mc', x_dim2, 'time'), self['tmpf_mc_set'].sel(x=ci_avg_x_sel).data) elif ci_avg_x_isel is not None: - time_dim2 = time_dim + time_dim2 = 'time' x_dim2 = 'x_avg' self.coords[x_dim2] = ( (x_dim2,), self.x.isel(x=ci_avg_x_isel).data) @@ -3878,7 +3831,7 @@ def average_single_ended( else: self['tmpf_avgsec'] = self["tmpf"] x_dim2 = 'x' - time_dim2 = time_dim + time_dim2 = 'time' # subtract the mean temperature q = self['tmpf_mc_set'] - self['tmpf_avgsec'] @@ -4183,8 +4136,6 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): else: state = da.random.RandomState() - time_dim = self.get_time_dim(data_var_key='st') - if conf_ints: assert "tmpw", 'Current implementation requires you to ' \ 'define "tmpw" when estimating confidence ' \ @@ -4231,8 +4182,8 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): self['gamma_mc'] = (tuple(), gamma) self['alpha_mc'] = (('x',), alpha) - self['df_mc'] = ((time_dim,), d_fw) - self['db_mc'] = ((time_dim,), d_bw) + self['df_mc'] = (('time',), d_fw) + self['db_mc'] = (('time',), d_bw) if nta: ta = p_val[2 * nt + 1 + no:].reshape((nt, 2, nta), order='F') @@ -4249,8 +4200,8 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): ta_bw_arr[self.x.values < taxi] = \ ta_bw_arr[self.x.values < taxi] + tai - self['talpha_fw_mc'] = (('x', time_dim), ta_fw_arr) - self['talpha_bw_mc'] = (('x', time_dim), ta_bw_arr) + self['talpha_fw_mc'] = (('x', 'time'), ta_fw_arr) + self['talpha_bw_mc'] = (('x', 'time'), ta_bw_arr) elif isinstance(p_cov, bool) and p_cov: raise NotImplementedError( @@ -4281,8 +4232,8 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): d_bw = po_mc[:, 1 + nt:2 * nt + 1] self['gamma_mc'] = (('mc',), gamma) - self['df_mc'] = (('mc', time_dim), d_fw) - self['db_mc'] = (('mc', time_dim), d_bw) + self['df_mc'] = (('mc', 'time'), d_fw) + self['db_mc'] = (('mc', 'time'), d_bw) # calculate alpha seperately alpha = np.zeros((mc_sample_size, no), dtype=float) @@ -4331,8 +4282,8 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): ta_bw_arr += mask * tai.T[:, None, :] - self['talpha_fw_mc'] = (('mc', 'x', time_dim), ta_fw_arr) - self['talpha_bw_mc'] = (('mc', 'x', time_dim), ta_bw_arr) + self['talpha_fw_mc'] = (('mc', 'x', 'time'), ta_fw_arr) + self['talpha_bw_mc'] = (('mc', 'x', 'time'), ta_bw_arr) # Draw from the normal distributions for the Stokes intensities for k, st_labeli, st_vari in zip(['r_st', 'r_ast', 'r_rst', 'r_rast'], @@ -4372,7 +4323,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) self[k] = ( - ('mc', 'x', time_dim), + ('mc', 'x', 'time'), state.normal( loc=loc, # has chunks=memchunk[1:] scale=st_vari_da**0.5, @@ -4427,7 +4378,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): # avg dimensions are dropped from input arr new_axis=0) # The new CI dimension is added as firsaxis - self[label + '_mc'] = (('CI', 'x', time_dim), q) + self[label + '_mc'] = (('CI', 'x', 'time'), q) # Weighted mean of the forward and backward tmpw_var = 1 / ( @@ -4462,7 +4413,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): drop_axis=avg_axis, # avg dimensions are dropped new_axis=0, dtype=float) # The new CI dimension is added as first axis - self["tmpw" + '_mc'] = (('CI', 'x', time_dim), q2) + self["tmpw" + '_mc'] = (('CI', 'x', 'time'), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: @@ -4677,55 +4628,53 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): reduce_memory_usage=reduce_memory_usage, **kwargs) - time_dim = self.get_time_dim(data_var_key='st') - for label in ["tmpf", "tmpb"]: if ci_avg_time_sel is not None: - time_dim2 = time_dim + '_avg' + time_dim2 = 'time' + '_avg' x_dim2 = 'x' self.coords[time_dim2] = ( (time_dim2,), - self[time_dim].sel(**{ - time_dim: ci_avg_time_sel}).data) + self['time'].sel(**{ + 'time': ci_avg_time_sel}).data) self[label + '_avgsec'] = ( ('x', time_dim2), self[label].sel(**{ - time_dim: ci_avg_time_sel}).data) + 'time': ci_avg_time_sel}).data) self[label + '_mc_set'] = ( ('mc', 'x', time_dim2), self[label + '_mc_set'].sel(**{ - time_dim: ci_avg_time_sel}).data) + 'time': ci_avg_time_sel}).data) elif ci_avg_time_isel is not None: - time_dim2 = time_dim + '_avg' + time_dim2 = 'time' + '_avg' x_dim2 = 'x' self.coords[time_dim2] = ( (time_dim2,), - self[time_dim].isel(**{ - time_dim: ci_avg_time_isel}).data) + self['time'].isel(**{ + 'time': ci_avg_time_isel}).data) self[label + '_avgsec'] = ( ('x', time_dim2), self[label].isel(**{ - time_dim: ci_avg_time_isel}).data) + 'time': ci_avg_time_isel}).data) self[label + '_mc_set'] = ( ('mc', 'x', time_dim2), self[label + '_mc_set'].isel(**{ - time_dim: ci_avg_time_isel}).data) + 'time': ci_avg_time_isel}).data) elif ci_avg_x_sel is not None: - time_dim2 = time_dim + time_dim2 = 'time' x_dim2 = 'x_avg' self.coords[x_dim2] = ( (x_dim2,), self.x.sel(x=ci_avg_x_sel).data) self[label + '_avgsec'] = ( - (x_dim2, time_dim), self[label].sel(x=ci_avg_x_sel).data) + (x_dim2, 'time'), self[label].sel(x=ci_avg_x_sel).data) self[label + '_mc_set'] = ( - ('mc', x_dim2, time_dim), + ('mc', x_dim2, 'time'), self[label + '_mc_set'].sel(x=ci_avg_x_sel).data) elif ci_avg_x_isel is not None: - time_dim2 = time_dim + time_dim2 = 'time' x_dim2 = 'x_avg' self.coords[x_dim2] = ( (x_dim2,), self.x.isel(x=ci_avg_x_isel).data) @@ -4738,7 +4687,7 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): else: self[label + '_avgsec'] = self[label] x_dim2 = 'x' - time_dim2 = time_dim + time_dim2 = 'time' memchunk = self[label + '_mc_set'].chunks @@ -5047,8 +4996,6 @@ def temperature_residuals(self, label=None, sections=None): resid_da : xarray.DataArray The residuals as DataArray """ - time_dim = self.get_time_dim(data_var_key=label) - resid_temp = self.ufunc_per_section( sections=sections, label=label, temp_err=True, calc_per='all') resid_x = self.ufunc_per_section( @@ -5061,10 +5008,10 @@ def temperature_residuals(self, label=None, sections=None): resid_sorted[resid_ix, :] = resid_temp resid_da = xr.DataArray( data=resid_sorted, - dims=('x', time_dim), + dims=('x', 'time'), coords={ 'x': self.x, - time_dim: self.time}) + 'time': self.time}) return resid_da def ufunc_per_section( @@ -5256,7 +5203,7 @@ def func(a): if subtract_from_label: # calculate std wrt other series # check_dims(self, [subtract_from_label], - # correct_dims=('x', time_dim)) + # correct_dims=('x', 'time')) assert not temp_err @@ -5328,12 +5275,12 @@ class ParameterIndexDoubleEnded: # store calibration parameters in DataStore self["gamma"] = (tuple(), gamma) self["alpha"] = (('x',), alpha) - self["df"] = ((time_dim,), d_fw) - self["db"] = ((time_dim,), d_bw) + self["df"] = (('time',), d_fw) + self["db"] = (('time',), d_bw) if nta > 0: ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_fw'] = ((time_dim, 'trans_att'), ta[:, 0, :]) - self['talpha_bw'] = ((time_dim, 'trans_att'), ta[:, 1, :]) + self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) + self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) """ def __init__(self, nt, nx, nta, fix_gamma=False, fix_alpha=False): @@ -5414,7 +5361,7 @@ def taf(self): """ Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_fw'] = ((time_dim, 'trans_att'), ta[:, 0, :]) + self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) """ return self.ta[:, 0, :].flatten(order="C") @@ -5423,7 +5370,7 @@ def tab(self): """ Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_bw'] = ((time_dim, 'trans_att'), ta[:, 1, :]) + self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) """ return self.ta[:, 1, :].flatten(order="C") @@ -5591,7 +5538,7 @@ def c(self): def taf(self): """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" # ta = p_val[-nt * nta:].reshape((nt, nta), order='F') - # self["talpha"] = ((time_dim, 'trans_att'), ta[:, :]) + # self["talpha"] = (('time', 'trans_att'), ta[:, :]) if self.includes_alpha: return np.arange(1 + self.nx + self.nt, 1 + self.nx + self.nt + self.nt * self.nta ).reshape((self.nt, self.nta), order='F') diff --git a/src/dtscalibration/plot.py b/src/dtscalibration/plot.py index 27aedca6..32f47a6b 100755 --- a/src/dtscalibration/plot.py +++ b/src/dtscalibration/plot.py @@ -15,8 +15,6 @@ def plot_residuals_reference_sections( units="", fig_kwargs=None, method="split", - time_dim="time", - x_dim="x", cmap="RdBu_r", ): """ @@ -44,10 +42,6 @@ def plot_residuals_reference_sections( whitespace. 'single' will use the previous method, where all sections are in one plot. - time_dim : str - Name of the time dimension to average/take the variance of - x_dim : str - Name of the spatial dimension cmap : str Matplotlib colormap to use for the residual plot. By default it will use a diverging colormap. @@ -182,10 +176,10 @@ def plot_residuals_reference_sections( ) section_axes[ii].set_ylabel("") - resid.sel(x=section_list[ii]).std(dim=time_dim).plot( - ax=section_ax_avg[ii], y=x_dim, c="blue") - resid.sel(x=section_list[ii]).mean(dim=time_dim).plot( - ax=section_ax_avg[ii], y=x_dim, c="orange") + resid.sel(x=section_list[ii]).std(dim='time').plot( + ax=section_ax_avg[ii], y="x", c="blue") + resid.sel(x=section_list[ii]).mean(dim='time').plot( + ax=section_ax_avg[ii], y="x", c="orange") section_ax_avg[ii].axvline( 0, linestyle="-", c="black", linewidth=0.8) section_ax_avg[ii].set_ylabel("") @@ -210,8 +204,8 @@ def plot_residuals_reference_sections( - 1].set_ylabel("x (m)") # plot the x ax avg - resid.std(dim=x_dim).plot(ax=x_ax_avg, c="blue") - resid.mean(dim=x_dim).plot(ax=x_ax_avg, c="orange") + resid.std(dim="x").plot(ax=x_ax_avg, c="blue") + resid.mean(dim="x").plot(ax=x_ax_avg, c="orange") x_ax_avg.axhline(0, linestyle="-", c="black", linewidth=0.8) x_ax_avg.set_xlabel("") x_ax_avg.set_ylabel(units) @@ -251,8 +245,6 @@ def plot_residuals_reference_sections_single( robust=True, units="", fig_kwargs=None, - time_dim="time", - x_dim="x", ): """ Analyze the residuals of the reference sections, between the Stokes @@ -274,9 +266,9 @@ def plot_residuals_reference_sections_single( The sections obj is normally used to set DataStore.sections, now is used toobtain the section names to plot the names on top of the residuals. - time_dim : str + 'time' : str Name of the time dimension to average/take the variance of - x_dim : str + "x" : str Name of the spatial dimension Returns @@ -313,8 +305,8 @@ def plot_residuals_reference_sections_single( x_ax_avg = fig.add_subplot(grid[:2, 2:-1]) # , sharex=main_ax legend_ax = fig.add_subplot(grid[:2, :2], xticklabels=[], yticklabels=[]) cbar_ax = fig.add_subplot(grid[2:, -1], xticklabels=[], yticklabels=[]) - if np.issubdtype(resid[time_dim].dtype, float) or np.issubdtype( - resid[time_dim].dtype, int): + if np.issubdtype(resid['time'].dtype, float) or np.issubdtype( + resid['time'].dtype, int): resid.plot.imshow( ax=main_ax, cbar_ax=cbar_ax, @@ -332,8 +324,8 @@ def plot_residuals_reference_sections_single( # x_ax_avg x_ax_avg2 = x_ax_avg.twinx() - resid.std(dim=x_dim).plot(ax=x_ax_avg2, c="blue") - resid.mean(dim=x_dim).plot(ax=x_ax_avg2, c="orange") + resid.std(dim="x").plot(ax=x_ax_avg2, c="blue") + resid.mean(dim="x").plot(ax=x_ax_avg2, c="orange") x_ax_avg2.axhline(0, linestyle="-", c="black", linewidth=0.8) if plot_avg_std is not None: x_ax_avg2.axhline(plot_avg_std, linestyle="--", c="blue") @@ -344,11 +336,11 @@ def plot_residuals_reference_sections_single( x_ax_avg2.set_ylabel(units) # y_ax_avg - dp = resid.std(dim=time_dim) + dp = resid.std(dim='time') x = dp.values y = dp.x y_ax_avg.plot(x, y, c="blue") - dp = resid.mean(dim=time_dim) + dp = resid.mean(dim='time') x = dp.values y = dp.x y_ax_avg.plot(x, y, c="orange") @@ -397,7 +389,6 @@ def plot_accuracy( title=None, plot_names=True, sections=None, - x_dim="x", ): """ Analyze the residuals of the reference sections, between the Stokes @@ -419,7 +410,7 @@ def plot_accuracy( The sections obj is normally used to set DataStore.sections, now is used toobtain the section names to plot the names on top of the residuals. - x_dim : str + "x" : str Name of the spatial dimension Returns @@ -552,7 +543,6 @@ def plot_sigma_report( temp_var_label itimes """ - time_dim = ds.get_time_dim(data_var_key=temp_label) assert "CI" not in ds[temp_label].dims, "use other plot report function" fig, (ax1, ax2) = plt.subplots(nrows=2, sharex=True, figsize=(12, 8)) @@ -565,8 +555,8 @@ def plot_sigma_report( temp = ds[temp_label].isel(time=itimes) stds = np.sqrt(ds[temp_var_acc_label].isel(time=itimes)).compute() else: - temp = ds[temp_label].mean(dim=time_dim).compute() - stds = np.sqrt(ds[temp_var_acc_label]).mean(dim=time_dim).compute() + temp = ds[temp_label].mean(dim='time').compute() + stds = np.sqrt(ds[temp_var_acc_label]).mean(dim='time').compute() for lbl, clr in zip([2.0, 1.0], colors): y1 = temp - lbl * stds @@ -620,7 +610,7 @@ def plot_sigma_report( v_sei = v_sei.compute() if itimes: - val = ds[k].mean(dim=time_dim) + val = ds[k].mean(dim='time') else: val = ds[k].isel(time=itimes) @@ -673,7 +663,7 @@ def plot_sigma_report( if itimes: stds_prec = np.sqrt(ds[temp_var_prec_label].isel(time=itimes)) else: - stds_prec = np.sqrt(ds[temp_var_prec_label]).mean(dim=time_dim) + stds_prec = np.sqrt(ds[temp_var_prec_label]).mean(dim='time') stds_prec.plot( ax=ax2, c="black", label="Projected precision", **line_kwargs) From 97f92ef16e723f5368f02e034739ad9e8e7c7a22 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 27 Jul 2023 20:24:30 +0200 Subject: [PATCH 11/14] removed final bits of get_time_dim --- src/dtscalibration/calibrate_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index 377935f9..85b3ef14 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -633,7 +633,6 @@ def calibration_double_ended_solver( # noqa: MC0001 # put E outside of reference section in solution # concatenating makes a copy of the data instead of using a pointer ds_sub = ds[['st', 'ast', 'rst', 'rast', 'trans_att']] - 'time' = ds_sub.get_time_dim() ds_sub['df'] = (('time',), p_sol[1:1 + nt]) ds_sub['df_var'] = (('time',), p_var[1:1 + nt]) ds_sub['db'] = (('time',), p_sol[1 + nt:1 + 2 * nt]) @@ -1238,8 +1237,6 @@ def calc_alpha_double( assert ix_alpha_is_zero >= 0, 'Define ix_alpha_is_zero' + \ str(ix_alpha_is_zero) - 'time' = ds.get_time_dim() - if st_var is not None: if callable(st_var): st_var_val = st_var(ds.st) From 63fe7678c33f73200cdbcae7cda2c57e5bfa5924 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 27 Jul 2023 20:30:14 +0200 Subject: [PATCH 12/14] Update CHANGELOG.rst --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 93592290..2b208300 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,10 @@ Bug fixes * Single-ended measurements with `fix_alpha` failed due to a bug introduced in v2.0.0 ([#173](https://github.com/dtscalibration/python-dts-calibration/pull/173)). +Introduced limitations + +* Standardized parameter names. Reduced the freedom in choosing parameter names and dimension names in favor of simplifying the code. + 2.0.0 (2023-05-24) ------------------ From 26223aabdc55e69b962b0d2b99c44a012c9037d7 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 27 Jul 2023 23:02:00 +0200 Subject: [PATCH 13/14] Error in solving merge conflicts --- src/dtscalibration/datastore_utils.py | 2 -- tests/test_datastore.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 6a4fc458..f5cb92c7 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -246,8 +246,6 @@ def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True) # throw out is dt differs from its neighbors if verify_timedeltas: - dt = (ds_bw.isel(time=iuse_chbw).time.values - ds_fw.isel(time=iuse_chfw).time.values) /\ - np.array(1000000000, dtype='timedelta64[ns]') dt = ( (ds_bw.isel(time=iuse_chbw).time.values - ds_fw.isel(time=iuse_chfw).time.values) / np.timedelta64(1, "s")) diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 5b26c500..8df6a0a6 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -556,8 +556,6 @@ def read_data_from_fp_numpy(fp): def test_resample_datastore(): - import xarray as xr - filepath = data_dir_single_ended ds = read_silixa_files( directory=filepath, timezone_netcdf='UTC', file_ext='*.xml') From d7026b8991f4a34b0dba770322d43ce44e523bbe Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 28 Jul 2023 09:44:17 +0200 Subject: [PATCH 14/14] Cleaning up arguments for calc_alpha_double() --- src/dtscalibration/calibrate_utils.py | 44 +++++++++++---------------- src/dtscalibration/datastore.py | 16 ++++------ 2 files changed, 24 insertions(+), 36 deletions(-) diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index 85b3ef14..bf29d939 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -390,12 +390,12 @@ def calibration_double_ended_solver( # noqa: MC0001 # Calculate E as initial estimate for the E calibration. # Does not require ta to be passed on E_all_guess, E_all_var_guess = calc_alpha_double( - 'guess', - ds, - st_var, - ast_var, - rst_var, - rast_var, + mode='guess', + ds=ds, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, ix_alpha_is_zero=ix_alpha_is_zero) df_est, db_est = calc_df_db_double_est(ds, ix_alpha_is_zero, 485.) @@ -638,16 +638,12 @@ def calibration_double_ended_solver( # noqa: MC0001 ds_sub['db'] = (('time',), p_sol[1 + nt:1 + 2 * nt]) ds_sub['db_var'] = (('time',), p_var[1 + nt:1 + 2 * nt]) E_all_exact, E_all_var_exact = calc_alpha_double( - 'exact', - ds_sub, - st_var, - ast_var, - rst_var, - rast_var, - 'df', - 'db', - 'df_var', - 'db_var', + mode='exact', + ds=ds_sub, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, ix_alpha_is_zero=ix_alpha_is_zero, talpha_fw=talpha_fw, talpha_bw=talpha_bw, @@ -1222,10 +1218,6 @@ def calc_alpha_double( ast_var=None, rst_var=None, rast_var=None, - D_F_label=None, - D_B_label=None, - D_F_var_label=None, - D_B_var_label=None, ix_alpha_is_zero=-1, talpha_fw=None, talpha_bw=None, @@ -1268,10 +1260,10 @@ def calc_alpha_double( A = (i_bw - i_fw) / 2 elif mode == 'exact': - D_F = ds[D_F_label] - D_B = ds[D_B_label] - D_F_var = ds[D_F_var_label] - D_B_var = ds[D_B_var_label] + D_F = ds["df"] + D_B = ds["db"] + D_F_var = ds["df_var"] + D_B_var = ds["db_var"] if ds.trans_att.size > 0: # Can be improved by including covariances. That reduces the @@ -1315,8 +1307,8 @@ def calc_alpha_double( if mode == 'guess': A = (i_bw - i_fw) / 2 elif mode == 'exact': - D_F = ds[D_F_label] - D_B = ds[D_B_label] + D_F = ds["df"] + D_B = ds["db"] A = (i_bw - i_fw) / 2 + (D_B - D_F) / 2 E_var = A.var(dim='time') diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 7d751e45..6e1b0579 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -2799,16 +2799,12 @@ def calibration_double_ended( talpha_bw_var = None E_all_exact, E_all_var_exact = calc_alpha_double( - "exact", - ds_sub, - st_var, - ast_var, - rst_var, - rast_var, - "df", - "db", - "df_var", - "db_var", + mode="exact", + ds=ds_sub, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, ix_alpha_is_zero=ix_sec[0], talpha_fw=talpha_fw, talpha_bw=talpha_bw,