diff --git a/.gitignore b/.gitignore index f06bd184..dd531a85 100644 --- a/.gitignore +++ b/.gitignore @@ -57,6 +57,7 @@ output/*/index.html # Sphinx docs/_build +**/generated/**/* .DS_Store *~ diff --git a/README.rst b/README.rst index 3c645cb9..03f01ee2 100644 --- a/README.rst +++ b/README.rst @@ -93,6 +93,7 @@ Devices currently supported Documentation ============= +* A full calibration procedure for single-ended setups is presented in notebook `07Calibrate_single_ended.ipynb `_ and for double-ended setups in `08Calibrate_double_ended.ipynb `_. * Documentation at https://python-dts-calibration.readthedocs.io/ . * Example notebooks that work within the browser can be viewed `here `_. diff --git a/docs/api/dtscalibration.DataStore.rst b/docs/api/dtscalibration.DataStore.rst deleted file mode 100644 index 8ed756d9..00000000 --- a/docs/api/dtscalibration.DataStore.rst +++ /dev/null @@ -1,7 +0,0 @@ -DataStore -========= - -.. currentmodule:: dtscalibration - -.. autoclass:: DataStore - :show-inheritance: diff --git a/docs/api/dtscalibration.check_dims.rst b/docs/api/dtscalibration.check_dims.rst deleted file mode 100644 index 1dc68e7e..00000000 --- a/docs/api/dtscalibration.check_dims.rst +++ /dev/null @@ -1,6 +0,0 @@ -check_dims -========== - -.. currentmodule:: dtscalibration - -.. autofunction:: check_dims diff --git a/docs/api/dtscalibration.check_timestep_allclose.rst b/docs/api/dtscalibration.check_timestep_allclose.rst deleted file mode 100644 index d655bfe4..00000000 --- a/docs/api/dtscalibration.check_timestep_allclose.rst +++ /dev/null @@ -1,6 +0,0 @@ -check_timestep_allclose -======================= - -.. currentmodule:: dtscalibration - -.. autofunction:: check_timestep_allclose diff --git a/docs/api/dtscalibration.get_netcdf_encoding.rst b/docs/api/dtscalibration.get_netcdf_encoding.rst deleted file mode 100644 index 820b0a0f..00000000 --- a/docs/api/dtscalibration.get_netcdf_encoding.rst +++ /dev/null @@ -1,6 +0,0 @@ -get_netcdf_encoding -=================== - -.. currentmodule:: dtscalibration - -.. autofunction:: get_netcdf_encoding diff --git a/docs/api/dtscalibration.merge_double_ended.rst b/docs/api/dtscalibration.merge_double_ended.rst deleted file mode 100644 index c8e512b5..00000000 --- a/docs/api/dtscalibration.merge_double_ended.rst +++ /dev/null @@ -1,6 +0,0 @@ -merge_double_ended -================== - -.. currentmodule:: dtscalibration - -.. autofunction:: merge_double_ended diff --git a/docs/api/dtscalibration.open_datastore.rst b/docs/api/dtscalibration.open_datastore.rst deleted file mode 100644 index 95a987bd..00000000 --- a/docs/api/dtscalibration.open_datastore.rst +++ /dev/null @@ -1,6 +0,0 @@ -open_datastore -============== - -.. currentmodule:: dtscalibration - -.. autofunction:: open_datastore diff --git a/docs/api/dtscalibration.open_mf_datastore.rst b/docs/api/dtscalibration.open_mf_datastore.rst deleted file mode 100644 index edab4ebe..00000000 --- a/docs/api/dtscalibration.open_mf_datastore.rst +++ /dev/null @@ -1,6 +0,0 @@ -open_mf_datastore -================= - -.. currentmodule:: dtscalibration - -.. autofunction:: open_mf_datastore diff --git a/docs/api/dtscalibration.plot_accuracy.rst b/docs/api/dtscalibration.plot_accuracy.rst deleted file mode 100644 index a2e41fc2..00000000 --- a/docs/api/dtscalibration.plot_accuracy.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_accuracy -============= - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_accuracy diff --git a/docs/api/dtscalibration.plot_location_residuals_double_ended.rst b/docs/api/dtscalibration.plot_location_residuals_double_ended.rst deleted file mode 100644 index ad0d16db..00000000 --- a/docs/api/dtscalibration.plot_location_residuals_double_ended.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_location_residuals_double_ended -==================================== - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_location_residuals_double_ended diff --git a/docs/api/dtscalibration.plot_residuals_reference_sections.rst b/docs/api/dtscalibration.plot_residuals_reference_sections.rst deleted file mode 100644 index 45f2529b..00000000 --- a/docs/api/dtscalibration.plot_residuals_reference_sections.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_residuals_reference_sections -================================= - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_residuals_reference_sections diff --git a/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst b/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst deleted file mode 100644 index 725d77a0..00000000 --- a/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_residuals_reference_sections_single -======================================== - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_residuals_reference_sections_single diff --git a/docs/api/dtscalibration.plot_sigma_report.rst b/docs/api/dtscalibration.plot_sigma_report.rst deleted file mode 100644 index b047cdeb..00000000 --- a/docs/api/dtscalibration.plot_sigma_report.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_sigma_report -================= - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_sigma_report diff --git a/docs/api/dtscalibration.read_apsensing_files.rst b/docs/api/dtscalibration.read_apsensing_files.rst deleted file mode 100644 index 04bd67d6..00000000 --- a/docs/api/dtscalibration.read_apsensing_files.rst +++ /dev/null @@ -1,6 +0,0 @@ -read_apsensing_files -==================== - -.. currentmodule:: dtscalibration - -.. autofunction:: read_apsensing_files diff --git a/docs/api/dtscalibration.read_sensornet_files.rst b/docs/api/dtscalibration.read_sensornet_files.rst deleted file mode 100644 index 541f00cb..00000000 --- a/docs/api/dtscalibration.read_sensornet_files.rst +++ /dev/null @@ -1,6 +0,0 @@ -read_sensornet_files -==================== - -.. currentmodule:: dtscalibration - -.. autofunction:: read_sensornet_files diff --git a/docs/api/dtscalibration.read_sensortran_files.rst b/docs/api/dtscalibration.read_sensortran_files.rst deleted file mode 100644 index 35d92c94..00000000 --- a/docs/api/dtscalibration.read_sensortran_files.rst +++ /dev/null @@ -1,6 +0,0 @@ -read_sensortran_files -===================== - -.. currentmodule:: dtscalibration - -.. autofunction:: read_sensortran_files diff --git a/docs/api/dtscalibration.read_silixa_files.rst b/docs/api/dtscalibration.read_silixa_files.rst deleted file mode 100644 index d5adee72..00000000 --- a/docs/api/dtscalibration.read_silixa_files.rst +++ /dev/null @@ -1,6 +0,0 @@ -read_silixa_files -================= - -.. currentmodule:: dtscalibration - -.. autofunction:: read_silixa_files diff --git a/docs/api/dtscalibration.shift_double_ended.rst b/docs/api/dtscalibration.shift_double_ended.rst deleted file mode 100644 index 0a879ea1..00000000 --- a/docs/api/dtscalibration.shift_double_ended.rst +++ /dev/null @@ -1,6 +0,0 @@ -shift_double_ended -================== - -.. currentmodule:: dtscalibration - -.. autofunction:: shift_double_ended diff --git a/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst b/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst deleted file mode 100644 index 14a135c0..00000000 --- a/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst +++ /dev/null @@ -1,6 +0,0 @@ -suggest_cable_shift_double_ended -================================ - -.. currentmodule:: dtscalibration - -.. autofunction:: suggest_cable_shift_double_ended diff --git a/docs/conf.py b/docs/conf.py index 6e28eadd..d69bb728 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,7 +1,11 @@ -# -*- coding: utf-8 -*- -from datetime import date import os +from datetime import date + +from xarray import Dataset # noqa: E402 +import sphinx_autosummary_accessors +import dtscalibration # noqa: E402 +from dtscalibration.dts_accessor import DtsAccessor # noqa: E402 extensions = [ "sphinx_rtd_theme", @@ -15,6 +19,7 @@ "sphinx.ext.todo", "sphinx.ext.viewcode", "sphinx.ext.autosectionlabel", + "sphinx_autosummary_accessors", "nbsphinx", "sphinx.ext.mathjax", "sphinx.ext.intersphinx", @@ -41,11 +46,11 @@ project = "dtscalibration" year = str(date.today().year) author = "Bas des Tombe and Bart Schilperoort" -copyright = "{0}, {1}".format(year, author) +copyright = f"{year}, {author}" version = release = "2.0.0" pygments_style = "trac" -templates_path = ["."] +templates_path = [".", sphinx_autosummary_accessors.templates_path] extlinks = { "issue": ( "https://github.com/dtscalibration/python-dts-calibration/issues" "/%s", @@ -63,7 +68,7 @@ html_sidebars = { "**": ["searchbox.html", "globaltoc.html", "sourcelink.html"], } -html_short_title = "%s-%s" % (project, version) +html_short_title = f"{project}-{version}" napoleon_use_ivar = True napoleon_use_rtype = False diff --git a/docs/notebooks/01Load_xml_measurement_files.ipynb b/docs/notebooks/01Load_xml_measurement_files.ipynb index 04611154..506249f6 100644 --- a/docs/notebooks/01Load_xml_measurement_files.ipynb +++ b/docs/notebooks/01Load_xml_measurement_files.ipynb @@ -7,13 +7,13 @@ "source": [ "# 1. Load your first measurement files\n", "\n", - "The goal of this notebook is to show the different options of loading measurements from raw DTS files. These files are loaded into a `DataStore` object. This object has various methods for calibration, plotting. The current supported devices are:\n", + "The goal of this notebook is to show the different options of loading measurements from raw DTS files. These files are loaded into a `xarray.Dataset` object. This object has various methods for calibration, plotting. Both single-ended and double-ended measurements are supported. The current supported devices are:\n", "- Silixa\n", "- Sensornet\n", + "- AP Sensing\n", + "- Sensortran\n", "\n", - "This example loads Silixa files. Both single-ended and double-ended measurements are supported. The first step is to load the correct read routine from `dtscalibration`.\n", - "- Silixa -> `dtscalibration.read_silixa_files`\n", - "- Sensornet -> `dtscalibration.read_sensornet_files`" + "See notebooks A2, A3, and A4." ] }, { @@ -125,20 +125,6 @@ "source": [ "print(ds)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -157,7 +143,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.10.10" } }, "nbformat": 4, diff --git a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb b/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb deleted file mode 100644 index ad563848..00000000 --- a/docs/notebooks/02Common_DataStore_functions_slice_mean_max_std_resample.ipynb +++ /dev/null @@ -1,373 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 2. Common DataStore functions\n", - "Examples of how to do some of the more commonly used functions:\n", - "\n", - "1. mean, min, max, std\n", - "2. Selecting\n", - "3. Selecting by index\n", - "4. Downsample (time dimension)\n", - "5. Upsample / Interpolation (length and time dimension)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:57.302425Z", - "iopub.status.busy": "2022-04-06T08:08:57.301918Z", - "iopub.status.idle": "2022-04-06T08:08:58.945453Z", - "shell.execute_reply": "2022-04-06T08:08:58.944983Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "from dtscalibration import read_silixa_files" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First we load the raw measurements into a `DataStore` object, as we learned from the previous notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:58.948063Z", - "iopub.status.busy": "2022-04-06T08:08:58.947893Z", - "iopub.status.idle": "2022-04-06T08:08:59.145387Z", - "shell.execute_reply": "2022-04-06T08:08:59.144710Z" - } - }, - "outputs": [], - "source": [ - "filepath = os.path.join(\"..\", \"..\", \"tests\", \"data\", \"single_ended\")\n", - "\n", - "ds = read_silixa_files(directory=filepath, timezone_netcdf=\"UTC\", file_ext=\"*.xml\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 0 Access the data\n", - "The implemented read routines try to read as much data from the raw DTS files as possible. Usually they would have coordinates (time and space) and Stokes and anti Stokes measurements. We can access the data by key. It is presented as a DataArray. More examples are found at http://xarray.pydata.org/en/stable/indexing.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.171097Z", - "iopub.status.busy": "2022-04-06T08:08:59.170926Z", - "iopub.status.idle": "2022-04-06T08:08:59.201341Z", - "shell.execute_reply": "2022-04-06T08:08:59.200765Z" - } - }, - "outputs": [], - "source": [ - "ds[\"st\"] # is the data stored, presented as a DataArray" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.203775Z", - "iopub.status.busy": "2022-04-06T08:08:59.203591Z", - "iopub.status.idle": "2022-04-06T08:08:59.265177Z", - "shell.execute_reply": "2022-04-06T08:08:59.264679Z" - } - }, - "outputs": [], - "source": [ - "ds[\"tmp\"].plot(figsize=(12, 8));" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1 mean, min, max\n", - "The first argument is the dimension. The function is taken along that dimension. `dim` can be any dimension (e.g., `time`, `x`). The returned `DataStore` does not contain that dimension anymore.\n", - "\n", - "Normally, you would like to keep the attributes (the informative texts from the loaded files), so set `keep_attrs` to `True`. They don't take any space compared to your Stokes data, so keep them.\n", - "\n", - "Note that also the sections are stored as attribute. If you delete the attributes, you would have to redefine the sections." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.267698Z", - "iopub.status.busy": "2022-04-06T08:08:59.267493Z", - "iopub.status.idle": "2022-04-06T08:08:59.273319Z", - "shell.execute_reply": "2022-04-06T08:08:59.272886Z" - } - }, - "outputs": [], - "source": [ - "ds_min = ds.mean(\n", - " dim=\"time\", keep_attrs=True\n", - ") # take the minimum of all data variables (e.g., Stokes, Temperature) along the time dimension" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.275670Z", - "iopub.status.busy": "2022-04-06T08:08:59.275507Z", - "iopub.status.idle": "2022-04-06T08:08:59.279270Z", - "shell.execute_reply": "2022-04-06T08:08:59.278851Z" - } - }, - "outputs": [], - "source": [ - "ds_max = ds.max(\n", - " dim=\"x\", keep_attrs=True\n", - ") # Take the maximum of all data variables (e.g., Stokes, Temperature) along the x dimension" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.281530Z", - "iopub.status.busy": "2022-04-06T08:08:59.281321Z", - "iopub.status.idle": "2022-04-06T08:08:59.287525Z", - "shell.execute_reply": "2022-04-06T08:08:59.286991Z" - } - }, - "outputs": [], - "source": [ - "ds_std = ds.std(\n", - " dim=\"time\", keep_attrs=True\n", - ") # Calculate the standard deviation along the time dimension" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2 Selecting\n", - "What if you would like to get the maximum temperature between $x >= 20$ m and $x < 35$ m over time? We first have to select a section along the cable." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.290091Z", - "iopub.status.busy": "2022-04-06T08:08:59.289877Z", - "iopub.status.idle": "2022-04-06T08:08:59.293824Z", - "shell.execute_reply": "2022-04-06T08:08:59.293152Z" - } - }, - "outputs": [], - "source": [ - "section = slice(20.0, 35.0)\n", - "section_of_interest = ds.sel(x=section)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.296109Z", - "iopub.status.busy": "2022-04-06T08:08:59.295850Z", - "iopub.status.idle": "2022-04-06T08:08:59.299802Z", - "shell.execute_reply": "2022-04-06T08:08:59.299282Z" - } - }, - "outputs": [], - "source": [ - "section_of_interest_max = section_of_interest.max(dim=\"x\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "What if you would like to have the measurement at approximately $x=20$ m?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.302128Z", - "iopub.status.busy": "2022-04-06T08:08:59.301950Z", - "iopub.status.idle": "2022-04-06T08:08:59.306081Z", - "shell.execute_reply": "2022-04-06T08:08:59.305484Z" - } - }, - "outputs": [], - "source": [ - "point_of_interest = ds.sel(x=20.0, method=\"nearest\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3 Selecting by index\n", - "What if you would like to see what the values on the first timestep are? We can use isel (index select) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.308603Z", - "iopub.status.busy": "2022-04-06T08:08:59.308281Z", - "iopub.status.idle": "2022-04-06T08:08:59.312353Z", - "shell.execute_reply": "2022-04-06T08:08:59.311877Z" - }, - "scrolled": true - }, - "outputs": [], - "source": [ - "section_of_interest = ds.isel(time=slice(0, 2)) # The first two time steps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.314626Z", - "iopub.status.busy": "2022-04-06T08:08:59.314411Z", - "iopub.status.idle": "2022-04-06T08:08:59.317904Z", - "shell.execute_reply": "2022-04-06T08:08:59.317392Z" - } - }, - "outputs": [], - "source": [ - "section_of_interest = ds.isel(x=0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4 Downsample (time dimension)\n", - "We currently have measurements at 3 time steps, with 30.001 seconds inbetween. For our next exercise we would like to down sample the measurements to 2 time steps with 47 seconds inbetween. The calculated variances are not valid anymore. We use the function `resample` from xarray." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# We use the logic from xarray to resample. However, it returns an xarray dataset type. Therefore we convert it back to the dtscalibration Datastore type.\n", - "from dtscalibration import DataStore\n", - "\n", - "ds_resampled = DataStore(ds.resample(time=\"47S\").mean())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5 Upsample / Interpolation (length and time dimension)\n", - "So we have measurements every 0.12 cm starting at $x=0$ m. What if we would like to change our coordinate system to have a value every 12 cm starting at $x=0.05$ m. We use (linear) interpolation, extrapolation is not supported. The calculated variances are not valid anymore." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.344388Z", - "iopub.status.busy": "2022-04-06T08:08:59.344165Z", - "iopub.status.idle": "2022-04-06T08:08:59.353186Z", - "shell.execute_reply": "2022-04-06T08:08:59.352734Z" - } - }, - "outputs": [], - "source": [ - "x_old = ds.x.data\n", - "x_new = x_old[:-1] + 0.05 # no extrapolation\n", - "ds_xinterped = ds.interp(coords={\"x\": x_new})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can do the same in the time dimension" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:08:59.355702Z", - "iopub.status.busy": "2022-04-06T08:08:59.355479Z", - "iopub.status.idle": "2022-04-06T08:08:59.371585Z", - "shell.execute_reply": "2022-04-06T08:08:59.371063Z" - } - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "time_old = ds.time.data\n", - "time_new = time_old + np.timedelta64(10, \"s\")\n", - "ds_tinterped = ds.interp(coords={\"time\": time_new})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/notebooks/03Define_sections.ipynb b/docs/notebooks/03Define_sections.ipynb index c342cd53..6e390a1f 100644 --- a/docs/notebooks/03Define_sections.ipynb +++ b/docs/notebooks/03Define_sections.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "# 3. Define calibration sections\n", - "The goal of this notebook is to show how you can define calibration sections. That means that we define certain parts of the fiber to a timeseries of temperature measurements. Here, we assume the temperature timeseries is already part of the `DataStore` object." + "The goal of this notebook is to show how you can define calibration sections. That means that we define certain parts of the fiber to a timeseries of temperature measurements. Here, we assume the temperature timeseries is already part of the `xarray.Dataset` object." ] }, { @@ -22,7 +22,6 @@ "outputs": [], "source": [ "import os\n", - "\n", "from dtscalibration import read_silixa_files" ] }, @@ -49,7 +48,7 @@ "source": [ "First we have a look at which temperature timeseries are available for calibration. Therefore we access `ds.data_vars` and we find `probe1Temperature` and `probe2Temperature` that refer to the temperature measurement timeseries of the two probes attached to the Ultima.\n", "\n", - "Alternatively, we can access the `ds.timeseries_keys` property to list all timeseries that can be used for calibration." + "Alternatively, we can access the `ds.dts.get_timeseries_keys()` function to list all timeseries that can be used for calibration." ] }, { @@ -65,9 +64,11 @@ }, "outputs": [], "source": [ - "print(ds.timeseries_keys) # list the available timeseeries\n", - "ds.probe1Temperature.plot(figsize=(12, 8))\n", - "# plot one of the timeseries" + "# The following command adds the .dts accessor to the xarray Dataset.\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "\n", + "print(ds.dts.get_timeseries_keys()) # list the available timeseeries\n", + "ds.probe1Temperature.plot(figsize=(12, 8))" ] }, { diff --git a/docs/notebooks/04Calculate_variance_Stokes.ipynb b/docs/notebooks/04Calculate_variance_Stokes.ipynb index a167e999..ec8cbb78 100644 --- a/docs/notebooks/04Calculate_variance_Stokes.ipynb +++ b/docs/notebooks/04Calculate_variance_Stokes.ipynb @@ -35,6 +35,8 @@ "warnings.simplefilter(\"ignore\") # Hide warnings to avoid clutter in the notebook\n", "\n", "from dtscalibration import read_silixa_files\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant\n", "from matplotlib import pyplot as plt\n", "\n", "%matplotlib inline" @@ -90,10 +92,10 @@ "source": [ "The variance in the Stokes signal will vary along the length of the fiber. There are multiple ways to approach this, each has its own pros and cons. **It is important to consider which model you use for your setup, as this will impact the calibration weights and predicted uncertainty.**\n", "\n", - "- In small setups with small variations in Stokes intensity, `ds.variance_stokes_constant` can be used. This function determines a single (constant) value for the variance. This method is not recommended for larger setups (e.g., >300 m) due to the signal strength dependency of the variance.\n", + "- In small setups with small variations in Stokes intensity, `variance_stokes_constant` can be used. This function determines a single (constant) value for the variance. This method is not recommended for larger setups (e.g., >300 m) due to the signal strength dependency of the variance.\n", "\n", "\n", - "- For larger setups `ds.variance_stokes_linear` should be used. This function assumes a linear relationship between the Stokes signal strength and variance. Tests on Silixa and Sensornet devices indicate this relationship is linear, and (approximately) goes through the origin; i.e. at 0 Stokes intensity, the signal variance is very close to 0.\n", + "- For larger setups `variance_stokes_linear` should be used. This function assumes a linear relationship between the Stokes signal strength and variance. Tests on Silixa and Sensornet devices indicate this relationship is linear, and (approximately) goes through the origin; i.e. at 0 Stokes intensity, the signal variance is very close to 0.\n", "\n", "\n", "- `variance_stokes_exponential` can be used for small setups with very few time steps. Too many degrees of freedom results in an under estimation of the noise variance. Almost never the case, but use when calibrating e.g. a single time step." @@ -119,7 +121,9 @@ }, "outputs": [], "source": [ - "I_var, residuals = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", + "I_var, residuals = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", "print(\n", " \"The variance of the Stokes signal along the reference sections \"\n", " \"is approximately {:.2f} on a {:.1f} sec acquisition time\".format(\n", @@ -213,7 +217,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.11" + "version": "3.10.10" } }, "nbformat": 4, diff --git a/docs/notebooks/07Calibrate_single_ended.ipynb b/docs/notebooks/07Calibrate_single_ended.ipynb index 7edf0a8a..81aeae45 100644 --- a/docs/notebooks/07Calibrate_single_ended.ipynb +++ b/docs/notebooks/07Calibrate_single_ended.ipynb @@ -43,6 +43,8 @@ "import os\n", "\n", "from dtscalibration import read_silixa_files\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" @@ -119,8 +121,12 @@ }, "outputs": [], "source": [ - "st_var, resid = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"ast\")" + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")" ] }, { @@ -160,7 +166,7 @@ }, "outputs": [], "source": [ - "ds.calibration_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)" + "out = ds.dts.calibrate_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)" ] }, { @@ -177,7 +183,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds.tmpf.plot(figsize=(12, 4))" + "out.tmpf.plot(figsize=(12, 4))" ] }, { @@ -186,7 +192,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds1 = ds.isel(time=0)\n", + "ds1 = out.isel(time=0)\n", "ds1.tmpf.plot(figsize=(12, 4))\n", "(ds1.tmpf_var**0.5).plot(figsize=(12, 4))\n", "plt.ylabel(\"$\\sigma$ ($^\\circ$C)\")" @@ -216,8 +222,8 @@ }, "outputs": [], "source": [ - "ds1.st.plot(figsize=(12, 8))\n", - "ds1.ast.plot()" + "ds.isel(time=0).st.plot(figsize=(12, 8))\n", + "ds.isel(time=0).ast.plot()" ] }, { diff --git a/docs/notebooks/08Calibrate_double_ended.ipynb b/docs/notebooks/08Calibrate_double_ended.ipynb index 5f36c954..d0f1603d 100644 --- a/docs/notebooks/08Calibrate_double_ended.ipynb +++ b/docs/notebooks/08Calibrate_double_ended.ipynb @@ -45,6 +45,8 @@ " suggest_cable_shift_double_ended,\n", " shift_double_ended,\n", ")\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", @@ -170,7 +172,9 @@ "## Estimate the variance of the noise in the Stokes and anti-Stokes measurements\n", "First calculate the variance in the measured Stokes and anti-Stokes signals, in the forward and backward direction. See Notebook 4 for more information.\n", "\n", - "The Stokes and anti-Stokes signals should follow a smooth decaying exponential. This function fits a decaying exponential to each reference section for each time step. The variance of the residuals between the measured Stokes and anti-Stokes signals and the fitted signals is used as an estimate of the variance in measured signals. This algorithm assumes that the temperature is the same for the entire section but may vary over time and differ per section." + "The Stokes and anti-Stokes signals should follow a smooth decaying exponential. This function fits a decaying exponential to each reference section for each time step. The variance of the residuals between the measured Stokes and anti-Stokes signals and the fitted signals is used as an estimate of the variance in measured signals. This algorithm assumes that the temperature is the same for the entire section but may vary over time and differ per section.\n", + "\n", + "Note that the acquisition time of the backward channel is passed to the variance_stokes function for the later two funciton calls." ] }, { @@ -186,10 +190,18 @@ }, "outputs": [], "source": [ - "st_var, resid = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"ast\")\n", - "rst_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"rst\")\n", - "rast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"rast\")" + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")\n", + "rst_var, _ = variance_stokes_constant(\n", + " ds.dts.rst, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")\n", + "rast_var, _ = variance_stokes_constant(\n", + " ds.dts.rast, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")" ] }, { @@ -236,7 +248,7 @@ }, "outputs": [], "source": [ - "ds.calibration_double_ended(\n", + "out = ds.dts.calibrate_double_ended(\n", " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", @@ -258,7 +270,7 @@ }, "outputs": [], "source": [ - "ds.tmpw.plot(figsize=(12, 4))" + "out.tmpw.plot(figsize=(12, 4))" ] }, { @@ -293,10 +305,10 @@ "metadata": {}, "outputs": [], "source": [ - "ds.tmpw_var.plot(figsize=(12, 4))\n", - "ds1 = ds.isel(time=-1) # take only the first timestep\n", + "out.tmpw_var.plot(figsize=(12, 4))\n", + "ds1 = out.isel(time=-1) # take only the first timestep\n", "(ds1.tmpw_var**0.5).plot(figsize=(12, 4))\n", - "plt.gca().set_ylabel(\"Standard error ($^\\circ$C)\")" + "plt.gca().set_ylabel(\"Standard error ($^\\circ$C)\");" ] }, { @@ -323,8 +335,8 @@ "metadata": {}, "outputs": [], "source": [ - "ds.conf_int_double_ended(\n", - " sections=sections,\n", + "out2 = ds.dts.monte_carlo_double_ended(\n", + " result=out,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -333,7 +345,7 @@ " mc_sample_size=500,\n", ") # < increase sample size for better approximation\n", "\n", - "ds.tmpw_mc_var.plot(figsize=(12, 4))" + "out2.tmpw_mc_var.plot(figsize=(12, 4))" ] }, { @@ -349,10 +361,9 @@ }, "outputs": [], "source": [ - "ds1 = ds.isel(time=-1) # take only the first timestep\n", - "ds1.tmpw.plot(linewidth=0.7, figsize=(12, 4))\n", - "ds1.tmpw_mc.isel(CI=0).plot(linewidth=0.7, label=\"CI: 2.5%\")\n", - "ds1.tmpw_mc.isel(CI=1).plot(linewidth=0.7, label=\"CI: 97.5%\")\n", + "out.isel(time=-1).tmpw.plot(linewidth=0.7, figsize=(12, 4))\n", + "out2.isel(time=-1).tmpw_mc.isel(CI=0).plot(linewidth=0.7, label=\"CI: 2.5%\")\n", + "out2.isel(time=-1).tmpw_mc.isel(CI=1).plot(linewidth=0.7, label=\"CI: 97.5%\")\n", "plt.legend(fontsize=\"small\")" ] }, @@ -376,12 +387,12 @@ }, "outputs": [], "source": [ - "(ds1.tmpf_mc_var**0.5).plot(figsize=(12, 4))\n", - "(ds1.tmpf_var**0.5).plot()\n", - "(ds1.tmpb_mc_var**0.5).plot()\n", - "(ds1.tmpb_var**0.5).plot()\n", - "(ds1.tmpw_var**0.5).plot()\n", - "(ds1.tmpw_mc_var**0.5).plot()\n", + "(out2.isel(time=-1).tmpf_mc_var ** 0.5).plot(figsize=(12, 4))\n", + "(out.isel(time=-1).tmpf_var ** 0.5).plot()\n", + "(out2.isel(time=-1).tmpb_mc_var ** 0.5).plot()\n", + "(out.isel(time=-1).tmpb_var ** 0.5).plot()\n", + "(out.isel(time=-1).tmpw_var ** 0.5).plot()\n", + "(out2.isel(time=-1).tmpw_mc_var ** 0.5).plot()\n", "plt.ylabel(\"$\\sigma$ ($^\\circ$C)\")" ] }, diff --git a/docs/notebooks/09Import_timeseries.ipynb b/docs/notebooks/09Import_timeseries.ipynb index 39e06c53..c12234c9 100644 --- a/docs/notebooks/09Import_timeseries.ipynb +++ b/docs/notebooks/09Import_timeseries.ipynb @@ -22,10 +22,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:31.444824Z", - "iopub.status.busy": "2022-04-06T08:10:31.444098Z", - "iopub.status.idle": "2022-04-06T08:10:32.898656Z", - "shell.execute_reply": "2022-04-06T08:10:32.897961Z" + "iopub.execute_input": "2023-10-20T09:16:48.371460Z", + "iopub.status.busy": "2023-10-20T09:16:48.371128Z", + "iopub.status.idle": "2023-10-20T09:16:49.350990Z", + "shell.execute_reply": "2023-10-20T09:16:49.350668Z" } }, "outputs": [], @@ -48,10 +48,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:32.901423Z", - "iopub.status.busy": "2022-04-06T08:10:32.901062Z", - "iopub.status.idle": "2022-04-06T08:10:32.905451Z", - "shell.execute_reply": "2022-04-06T08:10:32.904987Z" + "iopub.execute_input": "2023-10-20T09:16:49.353033Z", + "iopub.status.busy": "2023-10-20T09:16:49.352815Z", + "iopub.status.idle": "2023-10-20T09:16:49.355625Z", + "shell.execute_reply": "2023-10-20T09:16:49.355358Z" } }, "outputs": [], @@ -77,10 +77,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:32.928616Z", - "iopub.status.busy": "2022-04-06T08:10:32.927992Z", - "iopub.status.idle": "2022-04-06T08:10:33.076019Z", - "shell.execute_reply": "2022-04-06T08:10:33.075543Z" + "iopub.execute_input": "2023-10-20T09:16:49.375672Z", + "iopub.status.busy": "2023-10-20T09:16:49.375532Z", + "iopub.status.idle": "2023-10-20T09:16:49.399183Z", + "shell.execute_reply": "2023-10-20T09:16:49.398877Z" } }, "outputs": [], @@ -96,10 +96,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:33.078306Z", - "iopub.status.busy": "2022-04-06T08:10:33.078143Z", - "iopub.status.idle": "2022-04-06T08:10:33.088386Z", - "shell.execute_reply": "2022-04-06T08:10:33.087913Z" + "iopub.execute_input": "2023-10-20T09:16:49.401036Z", + "iopub.status.busy": "2023-10-20T09:16:49.400906Z", + "iopub.status.idle": "2023-10-20T09:16:49.405335Z", + "shell.execute_reply": "2023-10-20T09:16:49.405054Z" } }, "outputs": [], @@ -119,10 +119,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:33.090805Z", - "iopub.status.busy": "2022-04-06T08:10:33.090642Z", - "iopub.status.idle": "2022-04-06T08:10:33.301302Z", - "shell.execute_reply": "2022-04-06T08:10:33.300636Z" + "iopub.execute_input": "2023-10-20T09:16:49.407001Z", + "iopub.status.busy": "2023-10-20T09:16:49.406895Z", + "iopub.status.idle": "2023-10-20T09:16:49.587342Z", + "shell.execute_reply": "2023-10-20T09:16:49.587037Z" } }, "outputs": [], @@ -145,10 +145,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:33.303735Z", - "iopub.status.busy": "2022-04-06T08:10:33.303562Z", - "iopub.status.idle": "2022-04-06T08:10:33.308068Z", - "shell.execute_reply": "2022-04-06T08:10:33.307543Z" + "iopub.execute_input": "2023-10-20T09:16:49.589047Z", + "iopub.status.busy": "2023-10-20T09:16:49.588934Z", + "iopub.status.idle": "2023-10-20T09:16:49.591317Z", + "shell.execute_reply": "2023-10-20T09:16:49.591061Z" } }, "outputs": [], @@ -168,10 +168,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:33.310439Z", - "iopub.status.busy": "2022-04-06T08:10:33.310255Z", - "iopub.status.idle": "2022-04-06T08:10:33.315622Z", - "shell.execute_reply": "2022-04-06T08:10:33.315088Z" + "iopub.execute_input": "2023-10-20T09:16:49.592925Z", + "iopub.status.busy": "2023-10-20T09:16:49.592798Z", + "iopub.status.idle": "2023-10-20T09:16:49.594944Z", + "shell.execute_reply": "2023-10-20T09:16:49.594703Z" } }, "outputs": [], @@ -192,10 +192,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:33.318123Z", - "iopub.status.busy": "2022-04-06T08:10:33.317912Z", - "iopub.status.idle": "2022-04-06T08:10:33.328112Z", - "shell.execute_reply": "2022-04-06T08:10:33.327546Z" + "iopub.execute_input": "2023-10-20T09:16:49.596483Z", + "iopub.status.busy": "2023-10-20T09:16:49.596379Z", + "iopub.status.idle": "2023-10-20T09:16:49.716179Z", + "shell.execute_reply": "2023-10-20T09:16:49.715795Z" } }, "outputs": [], @@ -208,10 +208,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2022-04-06T08:10:33.330579Z", - "iopub.status.busy": "2022-04-06T08:10:33.330405Z", - "iopub.status.idle": "2022-04-06T08:10:33.342398Z", - "shell.execute_reply": "2022-04-06T08:10:33.341846Z" + "iopub.execute_input": "2023-10-20T09:16:49.718112Z", + "iopub.status.busy": "2023-10-20T09:16:49.717968Z", + "iopub.status.idle": "2023-10-20T09:16:49.722423Z", + "shell.execute_reply": "2023-10-20T09:16:49.721693Z" }, "scrolled": true }, diff --git a/docs/notebooks/10Align_double_ended_measurements.ipynb b/docs/notebooks/10Align_double_ended_measurements.ipynb index d80d9f02..911f2dea 100644 --- a/docs/notebooks/10Align_double_ended_measurements.ipynb +++ b/docs/notebooks/10Align_double_ended_measurements.ipynb @@ -38,7 +38,6 @@ " shift_double_ended,\n", ")\n", "import numpy as np\n", - "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" ] diff --git a/docs/notebooks/12Datastore_from_numpy_arrays.ipynb b/docs/notebooks/12Datastore_from_numpy_arrays.ipynb index 9cd233a5..1d17574c 100644 --- a/docs/notebooks/12Datastore_from_numpy_arrays.ipynb +++ b/docs/notebooks/12Datastore_from_numpy_arrays.ipynb @@ -4,8 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 12. Creating a DataStore from numpy arrays\n", - "The goal of this notebook is to demonstrate how to create a `DataStore` from scratch. This can be useful if your device is not supported or if you would like to integrate the `dtscalibration` library in your current routine." + "# 12. Creating a Dataset from numpy arrays\n", + "The goal of this notebook is to demonstrate how to create a `xarray.Dataset` from scratch. This can be useful if your device is not supported or if you would like to integrate the `dtscalibration` library in your current routine." ] }, { @@ -26,14 +26,16 @@ "import matplotlib.pyplot as plt\n", "import xarray as xr\n", "\n", - "from dtscalibration import DataStore, read_silixa_files" + "from dtscalibration import read_silixa_files\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "For a `DataStore` object, a few things are needed:\n", + "For a `xarray.Dataset` object, a few things are needed:\n", "\n", "- timestamps\n", "\n", @@ -68,7 +70,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We will get all the numpy arrays from this `DataStore` to create a new one from 'scratch'.\n", + "We will get all the numpy arrays from this `xarray.Dataset` to create a new one from 'scratch'.\n", "\n", "Let's start with the most basic data:" ] @@ -132,7 +134,6 @@ }, "outputs": [], "source": [ - "ds = DataStore(ds)\n", "print(ds)" ] }, @@ -148,7 +149,7 @@ "\n", "- a double ended flag\n", "\n", - "We'll put these into the custom `DataStore`:" + "We'll put these into the custom `xarray.Dataset`:" ] }, { @@ -198,28 +199,15 @@ " \"temp2\": [slice(5.5, 15.5)], # cold bath\n", "}\n", "\n", - "st_var, resid = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"ast\")\n", - "ds.calibration_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)\n", - "\n", - "ds.isel(time=0).tmpf.plot()" + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")\n", + "out = ds.dts.calibrate_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)\n", + "out.isel(time=0).tmpf.plot()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/notebooks/13Fixed_parameter_calibration.ipynb b/docs/notebooks/13Fixed_parameter_calibration.ipynb index fc2642ae..8f4f3e0f 100644 --- a/docs/notebooks/13Fixed_parameter_calibration.ipynb +++ b/docs/notebooks/13Fixed_parameter_calibration.ipynb @@ -26,6 +26,8 @@ "import os\n", "\n", "from dtscalibration import read_silixa_files\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline\n", @@ -68,9 +70,13 @@ "fix_gamma = (481.9, 0) # (gamma value, gamma variance)\n", "fix_dalpha = (-2.014e-5, 0) # (alpha value, alpha variance)\n", "\n", - "st_var, resid = ds100.variance_stokes_constant(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds100.variance_stokes_constant(sections=sections, st_label=\"ast\")\n", - "ds100.calibration_single_ended(\n", + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")\n", + "out = ds100.dts.calibrate_single_ended(\n", " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", @@ -99,8 +105,8 @@ }, "outputs": [], "source": [ - "print(\"gamma used in calibration:\", ds100.gamma.values)\n", - "print(\"dalpha used in calibration:\", ds100.dalpha.values)" + "print(\"gamma used in calibration:\", out.gamma.values)\n", + "print(\"dalpha used in calibration:\", out.dalpha.values)" ] }, { @@ -123,24 +129,15 @@ }, "outputs": [], "source": [ - "ds1 = ds100.isel(time=0) # take only the first timestep\n", - "\n", - "ds1.tmpf.plot(\n", + "out.isel(time=0).tmpf.plot(\n", " linewidth=1, figsize=(12, 8), label=\"User calibrated\"\n", ") # plot the temperature calibrated by us\n", - "ds1.tmp.plot(\n", + "ds100.isel(time=0).tmp.plot(\n", " linewidth=1, label=\"Device calibrated\"\n", ") # plot the temperature calibrated by the device\n", "plt.title(\"Temperature at the first time step\")\n", "plt.legend()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/notebooks/14Lossy_splices.ipynb b/docs/notebooks/14Lossy_splices.ipynb index 977969f3..318d3372 100644 --- a/docs/notebooks/14Lossy_splices.ipynb +++ b/docs/notebooks/14Lossy_splices.ipynb @@ -44,6 +44,8 @@ "import os\n", "\n", "from dtscalibration import read_silixa_files\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" @@ -144,14 +146,20 @@ }, "outputs": [], "source": [ - "ds_a = ds.copy(deep=True)\n", - "\n", - "st_var, resid = ds_a.variance_stokes(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"ast\")\n", - "rst_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"rst\")\n", - "rast_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"rast\")\n", + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")\n", + "rst_var, _ = variance_stokes_constant(\n", + " ds.dts.rst, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")\n", + "rast_var, _ = variance_stokes_constant(\n", + " ds.dts.rast, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")\n", "\n", - "ds_a.calibration_double_ended(\n", + "out = ds.dts.calibrate_double_ended(\n", " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", @@ -159,7 +167,7 @@ " rast_var=rast_var,\n", ")\n", "\n", - "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\")" + "out.isel(time=0).tmpw.plot(label=\"calibrated\")" ] }, { @@ -184,12 +192,20 @@ }, "outputs": [], "source": [ - "st_var, resid = ds.variance_stokes(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes(sections=sections, st_label=\"ast\")\n", - "rst_var, _ = ds.variance_stokes(sections=sections, st_label=\"rst\")\n", - "rast_var, _ = ds.variance_stokes(sections=sections, st_label=\"rast\")\n", + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")\n", + "rst_var, _ = variance_stokes_constant(\n", + " ds.dts.rst, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")\n", + "rast_var, _ = variance_stokes_constant(\n", + " ds.dts.rast, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")\n", "\n", - "ds.calibration_double_ended(\n", + "out2 = ds.dts.calibrate_double_ended(\n", " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", @@ -198,8 +214,8 @@ " trans_att=[50.0],\n", ")\n", "\n", - "ds_a.isel(time=0).tmpw.plot(label=\"no trans. att.\")\n", - "ds.isel(time=0).tmpw.plot(label=\"with trans. att.\")\n", + "out.isel(time=0).tmpw.plot(label=\"no trans. att.\")\n", + "out2.isel(time=0).tmpw.plot(label=\"with trans. att.\")\n", "plt.legend()" ] }, diff --git a/docs/notebooks/15Matching_sections.ipynb b/docs/notebooks/15Matching_sections.ipynb index 3017c04b..26c017a9 100644 --- a/docs/notebooks/15Matching_sections.ipynb +++ b/docs/notebooks/15Matching_sections.ipynb @@ -40,6 +40,9 @@ "import os\n", "\n", "from dtscalibration import read_silixa_files\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant\n", + "\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" @@ -120,14 +123,20 @@ }, "outputs": [], "source": [ - "ds_a = ds.copy(deep=True)\n", - "\n", - "st_var, resid = ds_a.variance_stokes(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"ast\")\n", - "rst_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"rst\")\n", - "rast_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"rast\")\n", + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")\n", + "rst_var, _ = variance_stokes_constant(\n", + " ds.dts.rst, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")\n", + "rast_var, _ = variance_stokes_constant(\n", + " ds.dts.rast, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")\n", "\n", - "ds_a.calibration_double_ended(\n", + "out = ds.dts.calibrate_double_ended(\n", " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", @@ -135,7 +144,7 @@ " rast_var=rast_var,\n", ")\n", "\n", - "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\")" + "out.isel(time=0).tmpw.plot(label=\"calibrated\")" ] }, { @@ -168,12 +177,7 @@ "source": [ "matching_sections = [(slice(7.5, 17.6), slice(69, 79.1), False)]\n", "\n", - "st_var, resid = ds.variance_stokes(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes(sections=sections, st_label=\"ast\")\n", - "rst_var, _ = ds.variance_stokes(sections=sections, st_label=\"rst\")\n", - "rast_var, _ = ds.variance_stokes(sections=sections, st_label=\"rast\")\n", - "\n", - "ds.calibration_double_ended(\n", + "out2 = ds.dts.calibrate_double_ended(\n", " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", @@ -183,8 +187,10 @@ " matching_sections=matching_sections,\n", ")\n", "\n", - "ds_a.isel(time=0).tmpw.plot(label=\"normal calibration\")\n", - "ds.isel(time=0).tmpw.plot(label=\"matching sections\")\n", + "out2.isel(time=0).tmpw.plot(label=\"calibrated\")\n", + "\n", + "out.isel(time=0).tmpw.plot(label=\"normal calibration\")\n", + "out2.isel(time=0).tmpw.plot(label=\"matching sections\")\n", "plt.legend()" ] }, diff --git a/docs/notebooks/16Averaging_temperatures.ipynb b/docs/notebooks/16Averaging_temperatures.ipynb index 2d97bb85..2bdd41e8 100644 --- a/docs/notebooks/16Averaging_temperatures.ipynb +++ b/docs/notebooks/16Averaging_temperatures.ipynb @@ -37,6 +37,8 @@ "import os\n", "\n", "from dtscalibration import read_silixa_files\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" @@ -79,10 +81,18 @@ }, "outputs": [], "source": [ - "st_var, resid = ds.variance_stokes(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes(sections=sections, st_label=\"ast\")\n", - "rst_var, _ = ds.variance_stokes(sections=sections, st_label=\"rst\")\n", - "rast_var, _ = ds.variance_stokes(sections=sections, st_label=\"rast\")" + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")\n", + "rst_var, _ = variance_stokes_constant(\n", + " ds.dts.rst, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")\n", + "rast_var, _ = variance_stokes_constant(\n", + " ds.dts.rast, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False\n", + ")" ] }, { @@ -98,7 +108,7 @@ }, "outputs": [], "source": [ - "ds.calibration_double_ended(\n", + "out = ds.dts.calibrate_double_ended(\n", " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", @@ -178,8 +188,8 @@ }, "outputs": [], "source": [ - "ds.average_double_ended(\n", - " sections=sections,\n", + "out_avg = ds.dts.average_monte_carlo_double_ended(\n", + " result=out,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -191,7 +201,7 @@ " ci_avg_time_isel=[0, 1, 2, 3, 4, 5],\n", " ci_avg_time_sel=None,\n", ")\n", - "ds.tmpw_mc_avg1.plot(hue=\"CI\", linewidth=0.8)" + "out_avg.tmpw_mc_avg1.plot(hue=\"CI\", linewidth=0.8)" ] }, { @@ -239,8 +249,8 @@ }, "outputs": [], "source": [ - "ds.average_double_ended(\n", - " sections=sections,\n", + "out_avg = ds.dts.average_monte_carlo_double_ended(\n", + " result=out,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -252,7 +262,7 @@ " ci_avg_time_isel=[0, 1, 2, 3, 4, 5],\n", " ci_avg_time_sel=None,\n", ")\n", - "ds.tmpw_mc_avg2.plot(hue=\"CI\", linewidth=0.8)" + "out_avg.tmpw_mc_avg2.plot(hue=\"CI\", linewidth=0.8)" ] }, { @@ -300,8 +310,8 @@ }, "outputs": [], "source": [ - "ds.average_double_ended(\n", - " sections=sections,\n", + "out_avg = ds.dts.average_monte_carlo_double_ended(\n", + " result=out,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -313,7 +323,7 @@ " ci_avg_x_sel=slice(7.5, 17.0),\n", " ci_avg_x_isel=None,\n", ")\n", - "ds.tmpw_mc_avgx1.plot(hue=\"CI\", linewidth=0.8)" + "out_avg.tmpw_mc_avgx1.plot(hue=\"CI\", linewidth=0.8)" ] }, { @@ -361,8 +371,8 @@ }, "outputs": [], "source": [ - "ds.average_double_ended(\n", - " sections=sections,\n", + "out_avg = ds.dts.average_monte_carlo_double_ended(\n", + " result=out,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -374,15 +384,8 @@ " ci_avg_x_sel=slice(7.5, 17.0),\n", " ci_avg_x_isel=None,\n", ")\n", - "ds.tmpw_mc_avgx2.plot(hue=\"CI\", linewidth=0.8)" + "out_avg.tmpw_mc_avgx2.plot(hue=\"CI\", linewidth=0.8)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb b/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb index 301e4963..a21be0bd 100644 --- a/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb +++ b/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb @@ -24,6 +24,8 @@ "import os\n", "\n", "from dtscalibration import read_silixa_files\n", + "from dtscalibration.dts_accessor import DtsAccessor # noqa: E402\n", + "from dtscalibration.variance_stokes import variance_stokes_constant\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline\n", @@ -36,11 +38,14 @@ " \"probe1Temperature\": [slice(20, 25.5)], # warm bath\n", " \"probe2Temperature\": [slice(5.5, 15.5)], # cold bath\n", "}\n", + "st_var, resid = variance_stokes_constant(\n", + " ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=True\n", + ")\n", + "ast_var, _ = variance_stokes_constant(\n", + " ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False\n", + ")\n", "\n", - "st_var, resid = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"ast\")\n", - "\n", - "ds.calibration_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)" + "out = ds.dts.calibrate_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)" ] }, { @@ -59,7 +64,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds1 = ds.isel(time=0)\n", + "ds1 = out.isel(time=0)\n", "\n", "# Uncertainty from the noise in (anti-) stokes measurements\n", "stast_var = ds1.var_fw_da.sel(comp_fw=[\"dT_dst\", \"dT_dast\"]).sum(dim=\"comp_fw\")\n", @@ -84,7 +89,8 @@ "outputs": [], "source": [ "# The effects of the parameter uncertainty can be further inspected\n", - "ds1.var_fw_da.plot(hue=\"comp_fw\", figsize=(12, 4))" + "# Note that the parameter uncertainty is not constant over the fiber and certain covariations can reduce to temperature uncertainty\n", + "ds1.var_fw_da.plot(hue=\"comp_fw\", figsize=(12, 4));" ] }, { @@ -119,8 +125,12 @@ "metadata": {}, "outputs": [], "source": [ - "ds.conf_int_single_ended(\n", - " st_var=st_var, ast_var=ast_var, conf_ints=[2.5, 97.5], mc_sample_size=500\n", + "out2 = ds.dts.monte_carlo_single_ended(\n", + " result=out,\n", + " st_var=st_var,\n", + " ast_var=ast_var,\n", + " conf_ints=[2.5, 97.5],\n", + " mc_sample_size=500,\n", ")" ] }, @@ -140,10 +150,12 @@ "metadata": {}, "outputs": [], "source": [ - "ds1 = ds.isel(time=0)\n", + "ds1 = out.isel(time=0)\n", "\n", - "(ds1.tmpf_mc_var**0.5).plot(figsize=(12, 4), label=\"Monte Carlo approx.\")\n", - "(ds1.tmpf_var**0.5).plot(label=\"Linear error approx.\")\n", + "(out2.isel(time=0).tmpf_mc_var ** 0.5).plot(\n", + " figsize=(12, 4), label=\"Monte Carlo approx.\"\n", + ")\n", + "(out.isel(time=0).tmpf_var ** 0.5).plot(label=\"Linear error approx.\")\n", "plt.ylabel(\"$\\sigma$ ($^\\circ$C)\")\n", "plt.legend(fontsize=\"small\")" ] @@ -163,9 +175,9 @@ "metadata": {}, "outputs": [], "source": [ - "ds1.tmpf.plot(linewidth=0.7, figsize=(12, 4))\n", - "ds1.tmpf_mc.sel(CI=2.5).plot(linewidth=0.7, label=\"CI: 2.5%\")\n", - "ds1.tmpf_mc.sel(CI=97.5).plot(linewidth=0.7, label=\"CI: 97.5%\")\n", + "out.isel(time=0).tmpf.plot(linewidth=0.7, figsize=(12, 4))\n", + "out2.isel(time=0).tmpf_mc.sel(CI=2.5).plot(linewidth=0.7, label=\"CI: 2.5%\")\n", + "out2.isel(time=0).tmpf_mc.sel(CI=97.5).plot(linewidth=0.7, label=\"CI: 97.5%\")\n", "plt.legend(fontsize=\"small\")" ] }, diff --git a/docs/notebooks/A2Load_sensornet_files.ipynb b/docs/notebooks/A2Load_sensornet_files.ipynb index 58069ca1..88fff3f7 100644 --- a/docs/notebooks/A2Load_sensornet_files.ipynb +++ b/docs/notebooks/A2Load_sensornet_files.ipynb @@ -98,7 +98,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The object tries to gather as much metadata from the measurement files as possible (temporal and spatial coordinates, filenames, temperature probes measurements). All other configuration settings are loaded from the first files and stored as attributes of the `DataStore`." + "The object tries to gather as much metadata from the measurement files as possible (temporal and spatial coordinates, filenames, temperature probes measurements). All other configuration settings are loaded from the first files and stored as attributes of the `xarray.Dataset`." ] }, { diff --git a/docs/notebooks/A3Load_ap_sensing_files.ipynb b/docs/notebooks/A3Load_ap_sensing_files.ipynb index 810fc920..91a6e7b6 100644 --- a/docs/notebooks/A3Load_ap_sensing_files.ipynb +++ b/docs/notebooks/A3Load_ap_sensing_files.ipynb @@ -98,7 +98,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The object tries to gather as much metadata from the measurement files as possible (temporal and spatial coordinates, filenames, temperature probes measurements). All other configuration settings are loaded from the first files and stored as attributes of the `DataStore`.\n", + "The object tries to gather as much metadata from the measurement files as possible (temporal and spatial coordinates, filenames, temperature probes measurements). All other configuration settings are loaded from the first files and stored as attributes of the `xarray.Dataset`.\n", "\n", "Calibration follows as usual (see the other notebooks)." ] diff --git a/docs/notebooks/A4Load_sensortran_files.ipynb b/docs/notebooks/A4Load_sensortran_files.ipynb index 1a045a41..1db6488f 100644 --- a/docs/notebooks/A4Load_sensortran_files.ipynb +++ b/docs/notebooks/A4Load_sensortran_files.ipynb @@ -103,7 +103,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The object tries to gather as much metadata from the measurement files as possible (temporal and spatial coordinates, filenames, temperature probes measurements). All other configuration settings are loaded from the first files and stored as attributes of the `DataStore`. Sensortran's data files contain less information than the other manufacturer's devices, one being the acquisition time. The acquisition time is needed for estimating variances, and is set a constant 1s." + "The object tries to gather as much metadata from the measurement files as possible (temporal and spatial coordinates, filenames, temperature probes measurements). All other configuration settings are loaded from the first files and stored as attributes of the `xarray.Dataset`. Sensortran's data files contain less information than the other manufacturer's devices, one being the acquisition time. The acquisition time is needed for estimating variances, and is set a constant 1s." ] }, { diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 9d9e4821..73c3961c 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -1,10 +1,51 @@ Reference ========= -.. toctree:: - :glob: +Load the data +------------- -.. automodapi:: dtscalibration - :skip: plot_dask +See example notebooks 01, A2, A3, and A4. + +.. automodule:: dtscalibration.io + :members: dtscalibration.read_apsensing_files + :nosignatures: + +Compute the variance in the Stokes measurements +----------------------------------------------- + +See example notebooks 04 and have a look at the docstring of the dtscalibration.variance_stokes funcitons. + +.. automodule:: dtscalibration.variance_stokes + :members: + :nosignatures: + + +The DTS Accessor +---------------- + +See example natebooks 07, 08, and 17. + +.. currentmodule:: xarray +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_method.rst + :nosignatures: + + Dataset.dts.sections + Dataset.dts.calibrate_single_ended + Dataset.dts.calibrate_double_ended + Dataset.dts.monte_carlo_single_ended + Dataset.dts.monte_carlo_double_ended + Dataset.dts.average_monte_carlo_single_ended + Dataset.dts.average_monte_carlo_double_ended + Dataset.dts.get_default_encoding + Dataset.dts.get_timeseries_keys + Dataset.dts.matching_sections + Dataset.dts.ufunc_per_section + +Plot the results +---------------- + +.. automodule:: dtscalibration.plot :members: - :no-inheritance-diagram: + :nosignatures: \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 682d0c3e..4fae6751 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ docs = [ # Required for ReadTheDocs "sphinx_rtd_theme", "sphinx-autoapi", "sphinx-automodapi", + "sphinx-autosummary-accessors", "coverage[toml]", "nbsphinx", "ipykernel", @@ -122,7 +123,7 @@ features = ["docs"] [tool.hatch.envs.docs.scripts] build = [ - "sphinx-build -c docs -b html docs dist/docs", #"python docs/nb_examples_to_docs.py", + "sphinx-build -E -c docs -b html docs dist/docs", #"python docs/nb_examples_to_docs.py", ] [tool.hatch.envs.matrix_test] diff --git a/src/dtscalibration/__init__.py b/src/dtscalibration/__init__.py index cd531011..5bf2ca5b 100644 --- a/src/dtscalibration/__init__.py +++ b/src/dtscalibration/__init__.py @@ -1,13 +1,9 @@ -from dtscalibration.datastore import DataStore from dtscalibration.datastore_utils import check_dims -from dtscalibration.datastore_utils import check_timestep_allclose from dtscalibration.datastore_utils import get_netcdf_encoding from dtscalibration.datastore_utils import merge_double_ended from dtscalibration.datastore_utils import shift_double_ended from dtscalibration.datastore_utils import suggest_cable_shift_double_ended from dtscalibration.io.apsensing import read_apsensing_files -from dtscalibration.io.datastore import open_datastore -from dtscalibration.io.datastore import open_mf_datastore from dtscalibration.io.sensornet import read_sensornet_files from dtscalibration.io.sensortran import read_sensortran_files from dtscalibration.io.silixa import read_silixa_files @@ -19,15 +15,11 @@ __version__ = "2.0.0" __all__ = [ - "DataStore", - "open_datastore", - "open_mf_datastore", "read_apsensing_files", "read_sensornet_files", "read_sensortran_files", "read_silixa_files", "check_dims", - "check_timestep_allclose", "get_netcdf_encoding", "merge_double_ended", "shift_double_ended", @@ -38,18 +30,3 @@ "plot_residuals_reference_sections_single", "plot_sigma_report", ] - -# filenames = ['datastore.py', 'datastore_utils.py', 'calibrate_utils.py', -# 'plot.py', 'io_utils.py'] -# filenames = ['plot.py'] -# -# for filename in filenames: -# with open(join(dirname(__file__), filename)) as file: -# node = ast.parse(file.read()) -# -# functions = [n for n in node.body if isinstance(n, ast.FunctionDef)] -# classes = [n for n in node.body if isinstance(n, ast.ClassDef)] -# __all__.extend([i.name for i in functions]) -# -# __all__.sort() -# print(__all__) diff --git a/src/dtscalibration/averaging_utils.py b/src/dtscalibration/averaging_utils.py index d17b1f56..662510b5 100644 --- a/src/dtscalibration/averaging_utils.py +++ b/src/dtscalibration/averaging_utils.py @@ -7,7 +7,7 @@ def inverse_variance_weighted_mean( tmpw_store="tmpw", tmpw_var_store="tmpw_var", ): - """Compute inverse variance weighted average, and add result in-place. + """Compute inverse variance weighted average, and add result in-place. Parameters ---------- diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index 960fa220..e1156157 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -5,14 +5,16 @@ from scipy.sparse import linalg as ln -def parse_st_var(ds, st_var, st_label="st"): +def parse_st_var(st, st_var): """ Utility function to check the st_var input and to return in DataArray format. Parameters ---------- - ds : DataStore + st : DataArray + Stokes/anti-stokes data variable for which the variance is being parsed. + st_var : float, callable, array-like If `float` the variance of the noise from the Stokes detector is described with a single value. @@ -21,26 +23,24 @@ def parse_st_var(ds, st_var, st_label="st"): Or when the variance is a function of the intensity (Poisson distributed) define a DataArray of the shape as ds.st, where the variance can be a function of time and/or x. - st_label : string - Name of the (reverse) stokes/anti-stokes data variable which is being - parsed. Returns ------- - Parsed st_var + st_var_sec : DataArray + The variance of the noise from the Stokes detector. """ if callable(st_var): - st_var_sec = st_var(ds[st_label]) + st_var_sec = st_var(st) else: - st_var_sec = xr.ones_like(ds[st_label]) * st_var + st_var_sec = xr.ones_like(st) * st_var - assert np.all(np.isfinite(st_var_sec)), ( - "NaN/inf values detected in " + st_label + "_var. Please check input." - ) + assert np.all( + np.isfinite(st_var_sec) + ), "NaN/inf values detected in computed st_var. Please check input." - assert np.all(st_var_sec > 0.0), ( - "Negative values detected in " + st_label + "_var. Please check input." - ) + assert np.all( + st_var_sec > 0.0 + ), "Negative values detected in computed st_var. Please check input." return st_var_sec @@ -53,17 +53,20 @@ def calibration_single_ended_helper( fix_alpha, fix_dalpha, fix_gamma, - matching_indices, - nt, - nta, - nx, + matching_sections, + trans_att, solver, ): """Only used in `calibration_single_ended()`""" - for input_item in [st_var, ast_var]: - assert input_item is not None, ( - "For wls define all " "variances (`st_var`, " "`ast_var`) " - ) + nt = self.dts.nt + nx = self.dts.nx + nta = len(trans_att) + + if matching_sections: + matching_indices = match_sections(self, matching_sections) + else: + matching_indices = None + calc_cov = True split = calibration_single_ended_solver( self, @@ -73,6 +76,7 @@ def calibration_single_ended_helper( calc_cov=calc_cov, solver="external_split", matching_indices=matching_indices, + trans_att=trans_att, ) y = split["y"] w = split["w"] @@ -80,12 +84,12 @@ def calibration_single_ended_helper( # Stack all X's if fix_alpha: assert not fix_dalpha, "Use either `fix_dalpha` or `fix_alpha`" - assert fix_alpha[0].size == self.x.size, ( - "fix_alpha also needs to be defined outside the reference " "sections" - ) - assert fix_alpha[1].size == self.x.size, ( - "fix_alpha also needs to be defined outside the reference " "sections" - ) + assert ( + fix_alpha[0].size == nx + ), "fix_alpha also needs to be defined outside the reference sections" + assert ( + fix_alpha[1].size == nx + ), "fix_alpha also needs to be defined outside the reference sections" p_val = split["p0_est_alpha"].copy() if np.any(matching_indices): @@ -196,6 +200,7 @@ def calibration_single_ended_solver( # noqa: MC0001 calc_cov=True, solver="sparse", matching_indices=None, + trans_att=[], verbose=False, ): """ @@ -237,7 +242,7 @@ def calibration_single_ended_solver( # noqa: MC0001 """ # get ix_sec argsort so the sections are in order of increasing x - ix_sec = ds.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") + ix_sec = ds.dts.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") ds_sec = ds.isel(x=ix_sec) x_sec = ds_sec["x"].values @@ -245,7 +250,7 @@ def calibration_single_ended_solver( # noqa: MC0001 nx = x_sec.size nt = ds.time.size no = ds.x.size - nta = ds.trans_att.size + nta = len(trans_att) nm = matching_indices.shape[0] if np.any(matching_indices) else 0 if np.any(matching_indices): @@ -256,7 +261,7 @@ def calibration_single_ended_solver( # noqa: MC0001 p0_est_alpha = np.asarray([485.0] + no * [0.0] + nt * [1.4] + nta * nt * [0.0]) # X \gamma # Eq.34 - cal_ref = ds.ufunc_per_section( + cal_ref = ds.dts.ufunc_per_section( sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" ) # cal_ref = cal_ref # sort by increasing x @@ -287,10 +292,10 @@ def calibration_single_ended_solver( # noqa: MC0001 ) # X ta #not documented - if ds.trans_att.size > 0: + if nta > 0: TA_list = [] - for transient_att_xi in ds.trans_att.values: + for transient_att_xi in trans_att: # first index on the right hand side a the difficult splice # Deal with connector outside of fiber if transient_att_xi >= x_sec[-1]: @@ -339,10 +344,10 @@ def calibration_single_ended_solver( # noqa: MC0001 ) # make TA matrix - if ds.trans_att.size > 0: + if nta > 0: transient_m_data = np.zeros((nm, nta)) for ii, row in enumerate(matching_indices): - for jj, transient_att_xi in enumerate(ds.trans_att.values): + for jj, transient_att_xi in enumerate(trans_att): transient_m_data[ii, jj] = np.logical_and( transient_att_xi > x_all[row[0]], transient_att_xi < x_all[row[1]], @@ -388,8 +393,8 @@ def calibration_single_ended_solver( # noqa: MC0001 # w if st_var is not None: - st_var_sec = parse_st_var(ds, st_var, st_label="st").isel(x=ix_sec).values - ast_var_sec = parse_st_var(ds, ast_var, st_label="ast").isel(x=ix_sec).values + st_var_sec = parse_st_var(ds.st, st_var).isel(x=ix_sec).values + ast_var_sec = parse_st_var(ds.ast, ast_var).isel(x=ix_sec).values w = ( 1 @@ -400,24 +405,16 @@ def calibration_single_ended_solver( # noqa: MC0001 if np.any(matching_indices): st_var_ms0 = ( - parse_st_var(ds, st_var, st_label="st") - .isel(x=matching_indices[:, 0]) - .values + parse_st_var(ds.st, st_var).isel(x=matching_indices[:, 0]).values ) st_var_ms1 = ( - parse_st_var(ds, st_var, st_label="st") - .isel(x=matching_indices[:, 1]) - .values + parse_st_var(ds.st, st_var).isel(x=matching_indices[:, 1]).values ) ast_var_ms0 = ( - parse_st_var(ds, ast_var, st_label="ast") - .isel(x=matching_indices[:, 0]) - .values + parse_st_var(ds.ast, ast_var).isel(x=matching_indices[:, 0]).values ) ast_var_ms1 = ( - parse_st_var(ds, ast_var, st_label="ast") - .isel(x=matching_indices[:, 1]) - .values + parse_st_var(ds.ast, ast_var).isel(x=matching_indices[:, 1]).values ) w_ms = ( @@ -485,7 +482,7 @@ def calibration_single_ended_solver( # noqa: MC0001 return (p_sol, p_var, p_cov) if calc_cov else (p_sol, p_var) -def calibration_double_ended_helper( +def calibrate_double_ended_helper( self, sections, st_var, @@ -497,14 +494,24 @@ def calibration_double_ended_helper( nt, nta, nx, - nx_sec, ix_sec, - matching_indices, + matching_sections, + trans_att, solver, verbose, ): + nt = self.dts.nt + nx = self.dts.nx + + nx_sec = ix_sec.size + + if matching_sections: + matching_indices = match_sections(self, matching_sections) + else: + matching_indices = None + if fix_alpha or fix_gamma: - split = calibration_double_ended_solver( + split = calibrate_double_ended_solver( self, sections, st_var, @@ -514,10 +521,12 @@ def calibration_double_ended_helper( calc_cov=True, solver="external_split", matching_indices=matching_indices, + trans_att=trans_att, + nta=nta, verbose=verbose, ) else: - out = calibration_double_ended_solver( + out = calibrate_double_ended_solver( self, sections, st_var, @@ -527,6 +536,8 @@ def calibration_double_ended_helper( calc_cov=True, solver=solver, matching_indices=matching_indices, + trans_att=trans_att, + nta=nta, verbose=verbose, ) @@ -820,7 +831,7 @@ def calibration_double_ended_helper( # put E outside of reference section in solution # concatenating makes a copy of the data instead of using a # pointer - ds_sub = self[["st", "ast", "rst", "rast", "trans_att"]] + ds_sub = self[["st", "ast", "rst", "rast"]] ds_sub["df"] = (("time",), out[0][:nt]) ds_sub["df_var"] = (("time",), out[1][:nt]) ds_sub["db"] = (("time",), out[0][nt : 2 * nt]) @@ -854,6 +865,7 @@ def calibration_double_ended_helper( rst_var=rst_var, rast_var=rast_var, ix_alpha_is_zero=ix_sec[0], + trans_att=trans_att, talpha_fw=talpha_fw, talpha_bw=talpha_bw, talpha_fw_var=talpha_fw_var, @@ -1105,7 +1117,7 @@ def calibration_double_ended_helper( return p_cov, p_val, p_var -def calibration_double_ended_solver( # noqa: MC0001 +def calibrate_double_ended_solver( # noqa: MC0001 ds, sections=None, st_var=None, @@ -1115,6 +1127,8 @@ def calibration_double_ended_solver( # noqa: MC0001 calc_cov=True, solver="sparse", matching_indices=None, + trans_att=None, + nta=None, verbose=False, ): """ @@ -1173,14 +1187,13 @@ def calibration_double_ended_solver( # noqa: MC0001 ------- """ - ix_sec = ds.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") + ix_sec = ds.dts.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") ds_sec = ds.isel(x=ix_sec) ix_alpha_is_zero = ix_sec[0] # per definition of E x_sec = ds_sec["x"].values nx_sec = x_sec.size nt = ds.time.size - nta = ds.trans_att.size # Calculate E as initial estimate for the E calibration. # Does not require ta to be passed on @@ -1192,6 +1205,7 @@ def calibration_double_ended_solver( # noqa: MC0001 rst_var=rst_var, rast_var=rast_var, ix_alpha_is_zero=ix_alpha_is_zero, + trans_att=trans_att, ) df_est, db_est = calc_df_db_double_est(ds, sections, ix_alpha_is_zero, 485.0) @@ -1202,17 +1216,17 @@ def calibration_double_ended_solver( # noqa: MC0001 Zero_d, Z_TA_fw, Z_TA_bw, - ) = construct_submatrices(sections, nt, nx_sec, ds, ds.trans_att.values, x_sec) + ) = construct_submatrices(sections, nt, nx_sec, ds, trans_att, x_sec) # y # Eq.41--45 y_F = np.log(ds_sec.st / ds_sec.ast).values.ravel() y_B = np.log(ds_sec.rst / ds_sec.rast).values.ravel() # w - st_var_sec = parse_st_var(ds, st_var, st_label="st").isel(x=ix_sec).values - ast_var_sec = parse_st_var(ds, ast_var, st_label="ast").isel(x=ix_sec).values - rst_var_sec = parse_st_var(ds, rst_var, st_label="rst").isel(x=ix_sec).values - rast_var_sec = parse_st_var(ds, rast_var, st_label="rast").isel(x=ix_sec).values + st_var_sec = parse_st_var(ds.st, st_var).isel(x=ix_sec).values + ast_var_sec = parse_st_var(ds.ast, ast_var).isel(x=ix_sec).values + rst_var_sec = parse_st_var(ds.rst, rst_var).isel(x=ix_sec).values + rast_var_sec = parse_st_var(ds.rast, rast_var).isel(x=ix_sec).values w_F = ( 1 @@ -1261,7 +1275,7 @@ def calibration_double_ended_solver( # noqa: MC0001 matching_indices[:, 0], matching_indices[:, 1], nt, - ds.trans_att.values, + trans_att, ) p0_est = np.concatenate( @@ -1335,28 +1349,20 @@ def calibration_double_ended_solver( # noqa: MC0001 y = np.concatenate((y_F, y_B, y_eq1, y_eq2, y_eq3)) - st_var_hix = parse_st_var(ds, st_var, st_label="st").isel(x=hix).values - ast_var_hix = parse_st_var(ds, ast_var, st_label="ast").isel(x=hix).values - rst_var_hix = parse_st_var(ds, rst_var, st_label="rst").isel(x=hix).values - rast_var_hix = parse_st_var(ds, rast_var, st_label="rast").isel(x=hix).values + st_var_hix = parse_st_var(ds.st, st_var).isel(x=hix).values + ast_var_hix = parse_st_var(ds.ast, ast_var).isel(x=hix).values + rst_var_hix = parse_st_var(ds.rst, rst_var).isel(x=hix).values + rast_var_hix = parse_st_var(ds.rast, rast_var).isel(x=hix).values - st_var_tix = parse_st_var(ds, st_var, st_label="st").isel(x=tix).values - ast_var_tix = parse_st_var(ds, ast_var, st_label="ast").isel(x=tix).values - rst_var_tix = parse_st_var(ds, rst_var, st_label="rst").isel(x=tix).values - rast_var_tix = parse_st_var(ds, rast_var, st_label="rast").isel(x=tix).values + st_var_tix = parse_st_var(ds.st, st_var).isel(x=tix).values + ast_var_tix = parse_st_var(ds.ast, ast_var).isel(x=tix).values + rst_var_tix = parse_st_var(ds.rst, rst_var).isel(x=tix).values + rast_var_tix = parse_st_var(ds.rast, rast_var).isel(x=tix).values - st_var_mnc = ( - parse_st_var(ds, st_var, st_label="st").isel(x=ix_match_not_cal).values - ) - ast_var_mnc = ( - parse_st_var(ds, ast_var, st_label="ast").isel(x=ix_match_not_cal).values - ) - rst_var_mnc = ( - parse_st_var(ds, rst_var, st_label="rst").isel(x=ix_match_not_cal).values - ) - rast_var_mnc = ( - parse_st_var(ds, rast_var, st_label="rast").isel(x=ix_match_not_cal).values - ) + st_var_mnc = parse_st_var(ds.st, st_var).isel(x=ix_match_not_cal).values + ast_var_mnc = parse_st_var(ds.ast, ast_var).isel(x=ix_match_not_cal).values + rst_var_mnc = parse_st_var(ds.rst, rst_var).isel(x=ix_match_not_cal).values + rast_var_mnc = parse_st_var(ds.rast, rast_var).isel(x=ix_match_not_cal).values w_eq1 = 1 / ( ( @@ -1445,18 +1451,8 @@ def calibration_double_ended_solver( # noqa: MC0001 elif not calc_cov and not verbose: p_sol, p_var = out - # if verbose: - # from dtscalibration.plot import plot_location_residuals_double_ended - # - # dv = plot_location_residuals_double_ended(ds, werr, hix, tix, ix_sec, - # ix_match_not_cal, nt) - - # p_sol contains the int diff att of all the locations within the - # reference sections. po_sol is its expanded version that contains also - # the int diff att for outside the reference sections. - # calculate talpha_fw and bw for attenuation - if ds.trans_att.size > 0: + if nta > 0: if np.any(matching_indices): ta = p_sol[1 + 2 * nt + ix_from_cal_match_to_glob.size :].reshape( (nt, 2, nta), order="F" @@ -1481,7 +1477,7 @@ def calibration_double_ended_solver( # noqa: MC0001 # put E outside of reference section in solution # concatenating makes a copy of the data instead of using a pointer - ds_sub = ds[["st", "ast", "rst", "rast", "trans_att"]] + ds_sub = ds[["st", "ast", "rst", "rast"]] ds_sub["df"] = (("time",), p_sol[1 : 1 + nt]) ds_sub["df_var"] = (("time",), p_var[1 : 1 + nt]) ds_sub["db"] = (("time",), p_sol[1 + nt : 1 + 2 * nt]) @@ -1494,6 +1490,7 @@ def calibration_double_ended_solver( # noqa: MC0001 rst_var=rst_var, rast_var=rast_var, ix_alpha_is_zero=ix_alpha_is_zero, + trans_att=trans_att, talpha_fw=talpha_fw, talpha_bw=talpha_bw, talpha_fw_var=talpha_fw_var, @@ -1725,7 +1722,7 @@ def construct_submatrices_matching_sections(x, ix_sec, hix, tix, nt, trans_att): Zero_eq3_gamma = sp.coo_matrix(([], ([], [])), shape=(nt * nx_nm, 1)) # TA - if trans_att.size > 0: + if len(trans_att) > 0: # unpublished BdT TA_eq1_list = [] @@ -1846,7 +1843,7 @@ def construct_submatrices(sections, nt, nx, ds, trans_att, x_sec): # Z \gamma # Eq.47 cal_ref = np.array( - ds.ufunc_per_section( + ds.dts.ufunc_per_section( sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" ) ) @@ -1875,7 +1872,7 @@ def construct_submatrices(sections, nt, nx, ds, trans_att, x_sec): # Zero # Eq.45 Zero_d = sp.coo_matrix(([], ([], [])), shape=(nt * nx, nt)) # Zero_E = sp.coo_matrix(([], ([], [])), shape=(nt * nx, (nx - 1))) - if trans_att.size > 0: + if len(trans_att) > 0: # unpublished BdT TA_fw_list = [] @@ -2110,6 +2107,7 @@ def calc_alpha_double( rst_var=None, rast_var=None, ix_alpha_is_zero=-1, + trans_att=None, talpha_fw=None, talpha_bw=None, talpha_fw_var=None, @@ -2138,8 +2136,8 @@ def calc_alpha_double( else: rast_var_val = np.asarray(rast_var) - i_var_fw = ds.i_var(st_var_val, ast_var_val, st_label="st", ast_label="ast") - i_var_bw = ds.i_var(rst_var_val, rast_var_val, st_label="rst", ast_label="rast") + i_var_fw = ds.st**-2 * st_var_val + ds.ast**-2 * ast_var_val + i_var_bw = ds.rst**-2 * rst_var_val + ds.rast**-2 * rast_var_val i_fw = np.log(ds.st / ds.ast) i_bw = np.log(ds.rst / ds.rast) @@ -2154,15 +2152,13 @@ def calc_alpha_double( D_F_var = ds["df_var"] D_B_var = ds["db_var"] - if ds.trans_att.size > 0: + if len(trans_att) > 0: # Can be improved by including covariances. That reduces the # uncert. ta_arr_fw = np.zeros((ds.x.size, ds["time"].size)) ta_arr_fw_var = np.zeros((ds.x.size, ds["time"].size)) - for tai, taxi, tai_var in zip( - talpha_fw.T, ds.trans_att.values, talpha_fw_var.T - ): + for tai, taxi, tai_var in zip(talpha_fw.T, trans_att, talpha_fw_var.T): ta_arr_fw[ds.x.values >= taxi] = ( ta_arr_fw[ds.x.values >= taxi] + tai ) @@ -2172,9 +2168,7 @@ def calc_alpha_double( ta_arr_bw = np.zeros((ds.x.size, ds["time"].size)) ta_arr_bw_var = np.zeros((ds.x.size, ds["time"].size)) - for tai, taxi, tai_var in zip( - talpha_bw.T, ds.trans_att.values, talpha_bw_var.T - ): + for tai, taxi, tai_var in zip(talpha_bw.T, trans_att, talpha_bw_var.T): ta_arr_bw[ds.x.values < taxi] = ta_arr_bw[ds.x.values < taxi] + tai ta_arr_bw_var[ds.x.values < taxi] = ( ta_arr_bw_var[ds.x.values < taxi] + tai_var @@ -2228,10 +2222,10 @@ def calc_df_db_double_est(ds, sections, ix_alpha_is_zero, gamma_est): Ibwx0 = np.log( ds.rst.isel(x=ix_alpha_is_zero) / ds.rast.isel(x=ix_alpha_is_zero) ).values - ref_temps_refs = ds.ufunc_per_section( + ref_temps_refs = ds.dts.ufunc_per_section( sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" ) - ix_sec = ds.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") + ix_sec = ds.dts.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") ref_temps_x0 = ( ref_temps_refs[ix_sec == ix_alpha_is_zero].flatten().compute() + 273.15 ) @@ -2274,14 +2268,20 @@ def match_sections(ds, matching_sections): txs ) - hix = ds.ufunc_per_section( - sections={0: [i[0] for i in matching_sections]}, x_indices=True, calc_per="all" + hix = ds.dts.ufunc_per_section( + sections={0: [i[0] for i in matching_sections]}, + x_indices=True, + calc_per="all", + suppress_section_validation=True, ) tixl = [] for _, tslice, reverse_flag in matching_sections: - ixi = ds.ufunc_per_section( - sections={0: [tslice]}, x_indices=True, calc_per="all" + ixi = ds.dts.ufunc_per_section( + sections={0: [tslice]}, + x_indices=True, + calc_per="all", + suppress_section_validation=True, ) if reverse_flag: diff --git a/src/dtscalibration/calibration/section_utils.py b/src/dtscalibration/calibration/section_utils.py index 2d1ef100..df9edcb7 100644 --- a/src/dtscalibration/calibration/section_utils.py +++ b/src/dtscalibration/calibration/section_utils.py @@ -5,70 +5,130 @@ from dtscalibration.datastore_utils import ufunc_per_section_helper -def set_sections(ds: xr.Dataset, sections: dict[str, list[slice]]) -> xr.Dataset: - sections_validated = None +def set_sections(ds: xr.Dataset, sections: dict[str, list[slice]]): + ds.attrs["_sections"] = yaml.dump(sections) - if sections is not None: - sections_validated = validate_sections(ds, sections=sections) - ds.attrs["_sections"] = yaml.dump(sections_validated) - return ds +def set_matching_sections(ds: xr.Dataset, matching_sections: dict[str, list[slice]]): + ds.attrs["_matching_sections"] = yaml.dump(matching_sections) -def validate_sections(ds: xr.Dataset, sections: dict[str, list[slice]]): - assert isinstance(sections, dict) +def validate_no_overlapping_sections(sections: dict[str, list[slice]]): + """ + Check if the sections do not overlap. + + Parameters + ---------- + sections : dict[str, list[slice]] + The keys of the dictionary are the names of the sections. + The values are lists of slice objects. - # be less restrictive for capitalized labels - # find lower cases label - labels = np.reshape([[s.lower(), s] for s in ds.data_vars.keys()], (-1,)).tolist() + Returns + ------- + None + + Raises + ------ + AssertionError + If the sections overlap. + """ + all_stretches = list() - sections_fix = dict() for k, v in sections.items(): - if k.lower() in labels: - i_lower_case = labels.index(k.lower()) - i_normal_case = i_lower_case + 1 - k_normal_case = labels[i_normal_case] - sections_fix[k_normal_case] = v - else: - assert k in ds.data_vars, ( - "The keys of the " - "sections-dictionary should " - "refer to a valid timeserie " - "already stored in " - "ds.data_vars " - ) + for vi in v: + all_stretches.append(vi) + + # Check for overlapping slices + all_start_stop = [[stretch.start, stretch.stop] for stretch in all_stretches] + isorted_start = np.argsort([i[0] for i in all_start_stop]) + all_start_stop_startsort = [all_start_stop[i] for i in isorted_start] + all_start_stop_startsort_flat = sum(all_start_stop_startsort, []) # type: ignore + assert all_start_stop_startsort_flat == sorted( + all_start_stop_startsort_flat + ), "Sections contains overlapping stretches" + pass + + +def validate_sections_definition(sections: dict[str, list[slice]]): + """ + Check if the sections are defined correctly. The sections are defined + correctly if: + - The keys of the sections-dictionary are strings (assertion) + - The values of the sections-dictionary are lists (assertion) + + Parameters + ---------- + sections : dict[str, list[slice]] + The keys of the dictionary are the names of the sections. + The values are lists of slice objects. + + Returns + ------- + None + + Raises + ------ + AssertionError + If the sections are not defined correctly. + """ + assert isinstance(sections, dict) - sections_fix_slice_fixed = dict() + for k, v in sections.items(): + assert isinstance(k, str), ( + "The keys of the " "sections-dictionary should " "be strings" + ) - for k, v in sections_fix.items(): assert isinstance(v, (list, tuple)), ( "The values of the sections-dictionary " "should be lists of slice objects." ) - for vi in v: - assert isinstance(vi, slice), ( - "The values of the sections-dictionary should " - "be lists of slice objects." - ) +def validate_sections(ds: xr.Dataset, sections: dict[str, list[slice]]): + """ + Check if the sections are valid. The sections are valid if: + - The keys of the sections-dictionary refer to a valid timeserie + already stored in ds.data_vars (assertion) + - The values of the sections-dictionary are lists of slice objects. + (assertion) + - The slices are within the x-dimension (assertion) + - The slices do not overlap (assertion) + + Parameters + ---------- + ds : xr.Dataset + The dataset that contains the timeseries that are referred to in + the sections-dictionary. + sections : dict[str, list[slice]] + The keys of the dictionary are the names of the sections. + The values are lists of slice objects. + + Returns + ------- + None + + Raises + ------ + AssertionError + If the sections are not valid. + """ + validate_sections_definition(sections=sections) + validate_no_overlapping_sections(sections=sections) + + for k, v in sections.items(): + assert k in ds.data_vars, ( + "The keys of the " + "sections-dictionary should " + "refer to a valid timeserie " + "already stored in " + "ds.data_vars " + ) + + for vi in v: assert ds.x.sel(x=vi).size > 0, ( f"Better define the {k} section. You tried {vi}, " "which is not within the x-dimension" ) - - # sorted stretches - stretch_unsort = [slice(float(vi.start), float(vi.stop)) for vi in v] - stretch_start = [i.start for i in stretch_unsort] - stretch_i_sorted = np.argsort(stretch_start) - sections_fix_slice_fixed[k] = [stretch_unsort[i] for i in stretch_i_sorted] - - # Prevent overlapping slices - ix_sec = ufunc_per_section( - ds, sections=sections_fix_slice_fixed, x_indices=True, calc_per="all" - ) - assert np.unique(ix_sec).size == ix_sec.size, "The sections are overlapping" - - return sections_fix_slice_fixed + pass def ufunc_per_section( diff --git a/src/dtscalibration/calibration/utils.py b/src/dtscalibration/calibration/utils.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 71726888..3fe3555a 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -1,5 +1,4 @@ import warnings -from typing import TYPE_CHECKING from typing import Optional from typing import Union @@ -9,12 +8,9 @@ import numpy.typing as npt import xarray as xr -if TYPE_CHECKING: - from dtscalibration import DataStore - def check_dims( - ds: "DataStore", + ds: xr.Dataset, labels: Union[list[str], tuple[str]], correct_dims: Optional[tuple[str]] = None, ) -> None: @@ -397,113 +393,8 @@ def get_taf_values(self, pval, x, trans_att, axis=""): return arr_out -def check_deprecated_kwargs(kwargs): - """ - Internal function that parses the `kwargs` for depreciated keyword - arguments. - - Depreciated keywords raise an error, pending to be depreciated do not. - But this requires that the code currently deals with those arguments. - - Parameters - ---------- - kwargs : Dict - A dictionary with keyword arguments. - - Returns - ------- - - """ - msg = """Previously, it was possible to manually set the label from - which the Stokes and anti-Stokes were read within the DataStore - object. To reduce the clutter in the code base and be able to - maintain it, this option was removed. - See: https://github.com/dtscalibration/python-dts-calibration/issues/81 - - The new **fixed** names are: st, ast, rst, rast. - - It is still possible to use the previous defaults, for example when - reading stored measurements from netCDF, by renaming the labels. The - old default labels were ST, AST, REV-ST, REV-AST. - - ``` - ds = open_datastore(path_to_old_file) - ds = ds.rename_labels() - ds.calibration_double_ended( - st_var=1.5, - ast_var=1.5, - rst_var=1., - rast_var=1., - method='wls') - ``` - - ds.tmpw.plot() - """ - list_of_depr = [ - "st_label", - "ast_label", - "rst_label", - "rast_label", - "transient_asym_att_x", - "transient_att_x", - ] - list_of_pending_depr = [] - - kwargs = {k: v for k, v in kwargs.items() if k not in list_of_pending_depr} - - for k in kwargs: - if k in list_of_depr: - raise NotImplementedError(msg) - - if len(kwargs) != 0: - raise NotImplementedError( - "The following keywords are not " + "supported: " + ", ".join(kwargs.keys()) - ) - - pass - - -def check_timestep_allclose(ds: "DataStore", eps: float = 0.01) -> None: - """ - Check if all timesteps are of equal size. For now it is not possible to calibrate - over timesteps if the acquisition time of timesteps varies, as the Stokes variance - would change over time. - - The acquisition time is stored for single ended measurements in userAcquisitionTime, - for double ended measurements in userAcquisitionTimeFW and userAcquisitionTimeBW. - - Parameters - ---------- - ds : DataStore - eps : float - Default accepts 1% of relative variation between min and max acquisition time. - - Returns - ------- - - """ - dim = ds.channel_configuration["chfw"]["acquisitiontime_label"] - dt = ds[dim].data - dtmin = dt.min() - dtmax = dt.max() - dtavg = (dtmin + dtmax) / 2 - assert (dtmax - dtmin) / dtavg < eps, ( - "Acquisition time is Forward channel not equal for all " "time steps" - ) - - if ds.is_double_ended: - dim = ds.channel_configuration["chbw"]["acquisitiontime_label"] - dt = ds[dim].data - dtmin = dt.min() - dtmax = dt.max() - dtavg = (dtmin + dtmax) / 2 - assert (dtmax - dtmin) / dtavg < eps, ( - "Acquisition time Backward channel is not equal " "for all time steps" - ) - - def get_netcdf_encoding( - ds: "DataStore", zlib: bool = True, complevel: int = 5, **kwargs + ds: xr.Dataset, zlib: bool = True, complevel: int = 5, **kwargs ) -> dict: """Get default netcdf compression parameters. The same for each data variable. @@ -699,7 +590,14 @@ def get_params_from_pval_double_ended(ip, coords, p_val=None, p_cov=None): def get_params_from_pval_single_ended( ip, coords, p_val=None, p_var=None, p_cov=None, fix_alpha=None ): - assert len(p_val) == ip.npar, "Length of p_val is incorrect" + if p_val is not None: + assert len(p_val) == ip.npar, "Length of p_val is incorrect" + + if p_var is not None: + assert len(p_var) == ip.npar, "Length of p_var is incorrect" + + if p_cov is not None: + assert p_cov.shape == (ip.npar, ip.npar), "Shape of p_cov is incorrect" params = xr.Dataset(coords=coords) param_covs = xr.Dataset(coords=coords) @@ -787,12 +685,12 @@ def get_params_from_pval_single_ended( def merge_double_ended( - ds_fw: "DataStore", - ds_bw: "DataStore", + ds_fw: xr.Dataset, + ds_bw: xr.Dataset, cable_length: float, plot_result: bool = True, verbose: bool = True, -) -> "DataStore": +) -> xr.Dataset: """ Some measurements are not set up on the DTS-device as double-ended meausurements. This means that the two channels have to be merged manually. @@ -857,11 +755,11 @@ def merge_double_ended( def merge_double_ended_times( - ds_fw: "DataStore", - ds_bw: "DataStore", + ds_fw: xr.Dataset, + ds_bw: xr.Dataset, verify_timedeltas: bool = True, verbose: bool = True, -) -> tuple["DataStore", "DataStore"]: +) -> tuple[xr.Dataset, xr.Dataset]: """Helper for `merge_double_ended()` to deal with missing measurements. The number of measurements of the forward and backward channels might get out of sync if the device shuts down before the measurement of the last channel @@ -996,8 +894,8 @@ def merge_double_ended_times( def shift_double_ended( - ds: "DataStore", i_shift: int, verbose: bool = True -) -> "DataStore": + ds: xr.Dataset, i_shift: int, verbose: bool = True +) -> xr.Dataset: """ The cable length was initially configured during the DTS measurement. For double ended measurements it is important to enter the correct length so that the forward channel and the @@ -1030,8 +928,6 @@ def shift_double_ended( ds2 : DataStore object With a shifted x-axis """ - from dtscalibration import DataStore - assert isinstance(i_shift, (int, np.integer)) nx = ds.x.size @@ -1073,11 +969,11 @@ def shift_double_ended( if not_included and verbose: print("I dont know what to do with the following data", not_included) - return DataStore(data_vars=d2_data, coords=d2_coords, attrs=ds.attrs) + return xr.Dataset(data_vars=d2_data, coords=d2_coords, attrs=ds.attrs) def suggest_cable_shift_double_ended( - ds: "DataStore", + ds: xr.Dataset, irange: npt.NDArray[np.int_], plot_result: bool = True, **fig_kwargs, @@ -1106,8 +1002,7 @@ def suggest_cable_shift_double_ended( Parameters ---------- - ds : DataSore object - DataStore object that needs to be shifted + ds : Xarray Dataset irange : array-like a numpy array with data of type int. Containing all the shift index that are tested. @@ -1334,7 +1229,7 @@ def ufunc_per_section_helper( 7. x-coordinate index - >>> ix_loc = ufunc_per_section_helperx_coords=d.x) + >>> ix_loc = ufunc_per_section_helper(x_coords=d.x) Note @@ -1376,6 +1271,7 @@ def func(a): assert callable(func) assert calc_per in ["all", "section", "stretch"] + assert "x_indices" not in func_kwargs, "pass x_coords arg instead" if x_coords is None and ( (dataarray is not None and hasattr(dataarray.data, "chunks")) @@ -1395,6 +1291,8 @@ def func(a): assert subtract_from_dataarray is None assert not subtract_reference_from_dataarray assert not ref_temp_broadcasted + assert not func_kwargs, "Unsupported kwargs" + # so it is slicable with x-indices _x_indices = x_coords.astype(int) * 0 + np.arange(x_coords.size) arg1 = _x_indices.sel(x=stretch).data diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/dts_accessor.py similarity index 57% rename from src/dtscalibration/datastore.py rename to src/dtscalibration/dts_accessor.py index 2b1a6064..b841b4c3 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/dts_accessor.py @@ -1,161 +1,66 @@ -import os -import warnings - -import dask import dask.array as da import numpy as np import scipy.stats as sst import xarray as xr import yaml -from dtscalibration.calibrate_utils import calibration_double_ended_helper +from dtscalibration.calibrate_utils import calibrate_double_ended_helper from dtscalibration.calibrate_utils import calibration_single_ended_helper -from dtscalibration.calibrate_utils import match_sections from dtscalibration.calibrate_utils import parse_st_var +from dtscalibration.calibration.section_utils import set_matching_sections from dtscalibration.calibration.section_utils import set_sections +from dtscalibration.calibration.section_utils import validate_no_overlapping_sections from dtscalibration.calibration.section_utils import validate_sections +from dtscalibration.calibration.section_utils import validate_sections_definition from dtscalibration.datastore_utils import ParameterIndexDoubleEnded from dtscalibration.datastore_utils import ParameterIndexSingleEnded -from dtscalibration.datastore_utils import check_deprecated_kwargs -from dtscalibration.datastore_utils import check_timestep_allclose from dtscalibration.datastore_utils import get_params_from_pval_double_ended from dtscalibration.datastore_utils import get_params_from_pval_single_ended from dtscalibration.datastore_utils import ufunc_per_section_helper -from dtscalibration.io.utils import _dim_attrs -from dtscalibration.variance_helpers import variance_stokes_constant_helper -from dtscalibration.variance_helpers import variance_stokes_exponential_helper -from dtscalibration.variance_helpers import variance_stokes_linear_helper - -dtsattr_namelist = ["double_ended_flag"] -dim_attrs = {k: v for kl, v in _dim_attrs.items() for k in kl} -warnings.filterwarnings( - "ignore", message="xarray subclass DataStore should explicitly define __slots__" -) - - -class DataStore(xr.Dataset): - """The data class that stores the measurements, contains calibration - methods to relate Stokes and anti-Stokes to temperature. The user should - never initiate this class directly, but use open_datastore - functions instead. - - Parameters - ---------- - data_vars : dict-like, optional - A mapping from variable names to :py:class:`~xarray.DataArray` - objects, :py:class:`~xarray.Variable` objects or tuples of the - form ``(dims, data[, attrs])`` which can be used as arguments to - create a new ``Variable``. Each dimension must have the same length - in all variables in which it appears. - coords : dict-like, optional - Another mapping in the same form as the `variables` argument, - except the each item is saved on the datastore as a "coordinate". - These variables have an associated meaning: they describe - constant/fixed/independent quantities, unlike the - varying/measured/dependent quantities that belong in `variables`. - Coordinates values may be given by 1-dimensional arrays or scalars, - in which case `dims` do not need to be supplied: 1D arrays will be - assumed to give index values along the dimension with the same - name. - attrs : dict-like, optional - Global attributes to save on this datastore. - sections : Dict[str, List[slice]], optional - Sections for calibration. The dictionary should contain key-var - couples in which the key is the name of the calibration temp time - series. And the var is a list of slice objects as 'slice(start, - stop)'; start and stop in meter (float). - compat : {'broadcast_equals', 'equals', 'identical'}, optional - String indicating how to compare variables of the same name for - potential conflicts when initializing this datastore: - - 'broadcast_equals': all values must be equal when variables are - broadcast against each other to ensure common dimensions. - - 'equals': all values and dimensions must be the same. - - 'identical': all values, dimensions and attributes must be the - same. - - See Also - -------- - dtscalibration.open_datastore : Load (calibrated) measurements from - netCDF-like file - """ - - def __init__(self, *args, autofill_dim_attrs=True, **kwargs): - with warnings.catch_warnings(): - # Filter out nanosecond precision warning: no good way to avoid ATM. - warnings.filterwarnings( - "ignore", - message="Converting non-nanosecond precision timedelta values to nanosecond precision.", - ) - super().__init__(*args, **kwargs) - - # check order of the dimensions of the data_vars - # first 'x' (if in initiated DataStore), then 'time', then the rest - ideal_dim = [] # perfect order dims - all_dim = list(self.dims) - - if all_dim: - if "x" in all_dim: - ideal_dim.append("x") - all_dim.pop(all_dim.index("x")) - - if "time": - if "time" in all_dim: - ideal_dim.append("time") - all_dim.pop(all_dim.index("time")) - - ideal_dim += all_dim - - for name, var in self._variables.items(): - var_dims = tuple( - dim for dim in ideal_dim if dim in (var.dims + (...,)) - ) - self._variables[name] = var.transpose(*var_dims) +from dtscalibration.io.utils import dim_attrs - if "trans_att" not in self.coords: - self.set_trans_att(trans_att=[]) - # Get attributes from dataset - for arg in args: - if isinstance(arg, xr.Dataset): - self.attrs = arg.attrs +@xr.register_dataset_accessor("dts") +class DtsAccessor: + def __init__(self, xarray_obj): + # cache xarray_obj + self._obj = xarray_obj + self.attrs = xarray_obj.attrs - # Add attributes to loaded dimensions - if autofill_dim_attrs: - for name, data_arri in self.coords.items(): - if name in dim_attrs and not self.coords[name].attrs: - self.coords[name].attrs = dim_attrs[name] + # alias commonly used variables + self.x = xarray_obj.x + self.nx = self.x.size + self.time = xarray_obj.time + self.nt = self.time.size - if "_sections" not in self.attrs: - self.attrs["_sections"] = yaml.dump(None) + # None if doesn't exist + self.st = xarray_obj.get("st") + self.ast = xarray_obj.get("ast") + self.rst = xarray_obj.get("rst") + self.rast = xarray_obj.get("rast") - if "sections" in kwargs: - self = set_sections(self, kwargs["sections"]) - - pass + self.acquisitiontime_fw = xarray_obj.get("userAcquisitionTimeFW") + self.acquisitiontime_bw = xarray_obj.get("userAcquisitionTimeBW") def __repr__(self): # __repr__ from xarray is used and edited. # 'xarray' is prepended. so we remove it and add 'dtscalibration' - s = xr.core.formatting.dataset_repr(self) - name_module = type(self).__name__ - preamble_new = "" % name_module + s = xr.core.formatting.dataset_repr(self._obj) + name_module = type(self._obj).__name__ + preamble_new = f"" # Add sections to new preamble preamble_new += "\nSections:" - if hasattr(self, "_sections") and self.sections: + if hasattr(self._obj, "_sections") and self.sections: preamble_new += "\n" - - if "units" in self.x: - unit = self.x.units - else: - unit = "" + unit = self.x.attrs.get("unit", "") for k, v in self.sections.items(): preamble_new += f" {k: <23}" # Compute statistics reference section timeseries - sec_stat = f"({float(self[k].mean()):6.2f}" - sec_stat += f" +/-{float(self[k].std()):5.2f}" + sec_stat = f"({float(self._obj[k].mean()):6.2f}" + sec_stat += f" +/-{float(self._obj[k].std()):5.2f}" sec_stat += "\N{DEGREE SIGN}C)\t" preamble_new += sec_stat @@ -209,14 +114,14 @@ def sections(self): ------- """ - if "_sections" not in self.attrs: - self.attrs["_sections"] = yaml.dump(None) + if "_sections" not in self._obj.attrs: + self._obj.attrs["_sections"] = yaml.dump(None) - return yaml.load(self.attrs["_sections"], Loader=yaml.UnsafeLoader) + return yaml.load(self._obj.attrs["_sections"], Loader=yaml.UnsafeLoader) @sections.deleter def sections(self): - self.attrs["_sections"] = yaml.dump(None) + self._obj.attrs["_sections"] = yaml.dump(None) @sections.setter def sections(self, value): @@ -224,390 +129,49 @@ def sections(self, value): "Not possible anymore. Instead, pass the sections as an argument to \n" "ds.dts.calibrate_single_ended() or ds.dts.calibrate_double_ended()." ) - raise DeprecationWarning(msg) - - def check_reference_section_values(self): - """ - Checks if the values of the used sections are of the right datatype - (floats), if there are finite number (no NaN/inf), and if the time - dimension corresponds with the time dimension of the st/ast data. - - Parameters - ---------- - - Returns - ------- - - """ - for key in self.sections.keys(): - if not np.issubdtype(self[key].dtype, np.floating): - raise ValueError( - 'Data of reference temperature "' - + key - + '" does not have a float data type. Please ensure that ' - "the data is of a valid type (e.g. np.float32)" - ) - - if np.any(~np.isfinite(self[key].values)): - raise ValueError( - 'NaN/inf value(s) found in reference temperature "' + key + '"' - ) - - if self[key].dims != ("time",): - raise ValueError( - "Time dimension of the reference temperature timeseries " - + key - + "is not the same as the time dimension" - + " of the Stokes measurement. See examples/notebooks/09" - + "Import_timeseries.ipynb for more info" - ) - - @property - def is_double_ended(self) -> float: - """ - Whether or not the data is loaded from a double-ended setup. - - Returns - ------- - - """ - if "isDoubleEnded" in self.attrs: - return bool(int(self.attrs["isDoubleEnded"])) - elif "customData:isDoubleEnded" in self.attrs: - # backward compatible to when only silixa files were supported - return bool(int(self.attrs["customData:isDoubleEnded"])) - else: - raise ValueError( - "Could not determine if the data was from a double-ended setup." - ) - - @is_double_ended.setter - def is_double_ended(self, flag: bool): - self.attrs["isDoubleEnded"] = flag - pass - - @property - def chfw(self) -> float: - """ - Zero based channel index of the forward measurements - - Returns - ------- - - """ - return int(self.attrs["forwardMeasurementChannel"]) - 1 # zero-based + raise NotImplementedError(msg) + # noinspection PyIncorrectDocstring @property - def chbw(self): - """ - Zero based channel index of the backward measurements - - Returns - ------- - + def matching_sections(self): """ - if self.is_double_ended: - return int(self.attrs["reverseMeasurementChannel"]) - 1 # zero-based - else: - return None + Define calibration sections. Each matching_section requires a reference + temperature time series, such as the temperature measured by an + external temperature sensor. They should already be part of the + DataStore object. - @property - def channel_configuration(self): - """ - Renaming conversion dictionary + Please look at the example notebook on `matching_sections` if you encounter + difficulties. + Parameters + ---------- + matching_sections : List[Tuple[slice, slice, bool]], optional + Provide a list of tuples. A tuple per matching section. Each tuple + has three items. The first two items are the slices of the sections + that are matched. The third item is a boolean and is True if the two + sections have a reverse direction ("J-configuration"). Returns ------- """ - d = { - "chfw": { - "st_label": "st", - "ast_label": "ast", - "acquisitiontime_label": "userAcquisitionTimeFW", - "time_start_label": "timeFWstart", - "time_label": "timeFW", - "time_end_label": "timeFWend", - }, - "chbw": { - "st_label": "rst", - "ast_label": "rast", - "acquisitiontime_label": "userAcquisitionTimeBW", - "time_start_label": "timeBWstart", - "time_label": "timeBW", - "time_end_label": "timeBWend", - }, - } - return d - - @property - def timeseries_keys(self): - """ - Returns the keys of all timeseires that can be used for calibration. - """ - return [k for k, v in self.data_vars.items() if v.dims == ("time",)] - - def to_netcdf( - self, - path=None, - mode="w", - format=None, - group=None, - engine=None, - encoding=None, - unlimited_dims=None, - compute=True, - ): - """Write datastore contents to a netCDF file. + if "_matching_sections" not in self._obj.attrs: + self._obj.attrs["_matching_sections"] = yaml.dump(None) - Parameters - ---------- - path : str, Path or file-like object, optional - Path to which to save this dataset. File-like objects are only - supported by the scipy engine. If no path is provided, this - function returns the resulting netCDF file as bytes; in this case, - we need to use scipy, which does not support netCDF version 4 (the - default format becomes NETCDF3_64BIT). - mode : {'w', 'a'}, optional - Write ('w') or append ('a') mode. If mode='w', any existing file at - this location will be overwritten. If mode='a', existing variables - will be overwritten. - format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', - 'NETCDF3_CLASSIC'}, optional - File format for the resulting netCDF file: - * NETCDF4: Data is stored in an HDF5 file, using netCDF4 API - features. - * NETCDF4_CLASSIC: Data is stored in an HDF5 file, using only - netCDF 3 compatible API features. - * NETCDF3_64BIT: 64-bit offset version of the netCDF 3 file format, - which fully supports 2+ GB files, but is only compatible with - clients linked against netCDF version 3.6.0 or later. - * NETCDF3_CLASSIC: The classic netCDF 3 file format. It does not - handle 2+ GB files very well. - All formats are supported by the netCDF4-python library. - scipy.io.netcdf only supports the last two formats. - The default format is NETCDF4 if you are saving a file to disk and - have the netCDF4-python library available. Otherwise, xarray falls - back to using scipy to write netCDF files and defaults to the - NETCDF3_64BIT format (scipy does not support netCDF4). - group : str, optional - Path to the netCDF4 group in the given file to open (only works for - format='NETCDF4'). The group(s) will be created if necessary. - engine : {'netcdf4', 'scipy', 'h5netcdf'}, optional - Engine to use when writing netCDF files. If not provided, the - default engine is chosen based on available dependencies, with a - preference for 'netcdf4' if writing to a file on disk. - encoding : dict, optional - defaults to reasonable compression. Use encoding={} to disable - encoding. - Nested dictionary with variable names as keys and dictionaries of - variable specific encodings as values, e.g., - ``{'my_variable': {'dtype': 'int16', 'scale_factor': 0.1, - 'zlib': True}, ...}`` - The `h5netcdf` engine supports both the NetCDF4-style compression - encoding parameters ``{'zlib': True, 'complevel': 9}`` and the h5py - ones ``{'compression': 'gzip', 'compression_opts': 9}``. - This allows using any compression plugin installed in the HDF5 - library, e.g. LZF. - unlimited_dims : sequence of str, optional - Dimension(s) that should be serialized as unlimited dimensions. - By default, no dimensions are treated as unlimited dimensions. - Note that unlimited_dims may also be set via - ``dataset.encoding['unlimited_dims']``. - compute: boolean - If true compute immediately, otherwise return a - ``dask.delayed.Delayed`` object that can be computed later. - """ - if encoding is None: - encoding = self.get_default_encoding() - - if engine is None: - engine = "netcdf4" - - # Fix Bart Schilperoort: netCDF doesn't like None's - for attribute, value in self.attrs.items(): - if value is None: - self.attrs[attribute] = "" - - return super().to_netcdf( - path, - mode, - format=format, - group=group, - engine=engine, - encoding=encoding, - unlimited_dims=unlimited_dims, - compute=compute, + return yaml.load( + self._obj.attrs["_matching_sections"], Loader=yaml.UnsafeLoader ) - def to_mf_netcdf( - self, - folder_path=None, - filename_preamble="file_", - filename_extension=".nc", - format="netCDF4", - engine="netcdf4", - encoding=None, - mode="w", - compute=True, - time_chunks_from_key="st", - ): - """Write DataStore to multiple netCDF files. - - Splits the DataStore along the time dimension using the chunks. It - first checks if all chunks in `ds` are time aligned. If this is not - the case, calculate optimal chunk sizes using the - `time_chunks_from_key` array. The files are written per time-chunk to - disk. - - Almost similar to xarray.save_mfdataset, - - Parameters - ---------- - folder_path : str, Path - Folder to place the files - filename_preamble : str - Filename is `filename_preamble + '0000' + filename_extension - filename_extension : str - Filename is `filename_preamble + '0000' + filename_extension - mode : {'w', 'a'}, optional - Write ('w') or append ('a') mode. If mode='w', any existing file at - these locations will be overwritten. - format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', - 'NETCDF3_CLASSIC'}, optional - File format for the resulting netCDF file: - * NETCDF4: Data is stored in an HDF5 file, using netCDF4 API - features. - * NETCDF4_CLASSIC: Data is stored in an HDF5 file, using only - netCDF 3 compatible API features. - * NETCDF3_64BIT: 64-bit offset version of the netCDF 3 file format, - which fully supports 2+ GB files, but is only compatible with - clients linked against netCDF version 3.6.0 or later. - * NETCDF3_CLASSIC: The classic netCDF 3 file format. It does not - handle 2+ GB files very well. - All formats are supported by the netCDF4-python library. - scipy.io.netcdf only supports the last two formats. - The default format is NETCDF4 if you are saving a file to disk and - have the netCDF4-python library available. Otherwise, xarray falls - back to using scipy to write netCDF files and defaults to the - NETCDF3_64BIT format (scipy does not support netCDF4). - engine : {'netcdf4', 'scipy', 'h5netcdf'}, optional - Engine to use when writing netCDF files. If not provided, the - default engine is chosen based on available dependencies, with a - preference for 'netcdf4' if writing to a file on disk. - See `Dataset.to_netcdf` for additional information. - encoding : list of dict, optional - Defaults to reasonable compression/encoding. - If you want to define your own encoding, you first needs to know the - time-chunk sizes this routine will write to disk. After which you - need to provide a list with the encoding specified for each chunk. - Use a list of empty dicts to disable encoding. - Nested dictionary with variable names as keys and dictionaries of - variable specific encodings as values, e.g., - ``{'my_variable': {'dtype': 'int16', 'scale_factor': 0.1, - 'zlib': True}, ...}`` - The `h5netcdf` engine supports both the NetCDF4-style compression - encoding parameters ``{'zlib': True, 'complevel': 9}`` and the h5py - ones ``{'compression': 'gzip', 'compression_opts': 9}``. - This allows using any compression plugin installed in the HDF5 - library, e.g. LZF. - compute: boolean - If true compute immediately, otherwise return a - ``dask.delayed.Delayed`` object that can be computed later. - time_chunks_from_key: str - - Examples - -------- - ds.to_mf_netcdf(folder_path='.') - - See Also - -------- - dtscalibration.open_mf_datastore - xarray.save_mfdataset - - """ - - try: - # This fails if not all chunks of the data_vars are time aligned. - # In case we let Dask estimate an optimal chunk size. - t_chunks = self.chunks["time"] - - except: # noqa: E722 - if self[time_chunks_from_key].dims == ("x", "time"): - _, t_chunks = da.ones( - self[time_chunks_from_key].shape, - chunks=(-1, "auto"), - dtype="float64", - ).chunks - - elif self[time_chunks_from_key].dims == ("time", "x"): - _, t_chunks = da.ones( - self[time_chunks_from_key].shape, - chunks=("auto", -1), - dtype="float64", - ).chunks - else: - assert 0, "something went wrong with your Stokes dimensions" - - bnds = np.cumsum((0,) + t_chunks) - x = [range(bu, bd) for bu, bd in zip(bnds[:-1], bnds[1:])] - - datasets = [self.isel(time=xi) for xi in x] - paths = [ - os.path.join( - folder_path, - filename_preamble + f"{ix:04d}" + filename_extension, - ) - for ix in range(len(x)) - ] - - encodings = [] - for ids, ds in enumerate(datasets): - if encoding is None: - encodings.append( - ds.get_default_encoding(time_chunks_from_key=time_chunks_from_key) - ) + @matching_sections.deleter + def matching_sections(self): + self._obj.attrs["_matching_sections"] = yaml.dump(None) - else: - encodings.append(encoding[ids]) - - writers, stores = zip( - *[ - xr.backends.api.to_netcdf( - ds, - path, - mode, - format, - None, - engine, - compute=compute, - multifile=True, - encoding=enc, - ) - for ds, path, enc in zip(datasets, paths, encodings) - ] + @matching_sections.setter + def matching_sections(self, value): + msg = ( + "Not possible anymore. Instead, pass the matching_sections as an argument to \n" + "ds.dts.calibrate_single_ended() or ds.dts.calibrate_double_ended()." ) - - try: - writes = [w.sync(compute=compute) for w in writers] - finally: - if compute: - for store in stores: - store.close() - - if not compute: - - def _finalize_store(write, store): - """Finalize this store by explicitly syncing and closing""" - del write # ensure writing is done first - store.close() - pass - - return dask.delayed( - [dask.delayed(_finalize_store)(w, s) for w, s in zip(writes, stores)] - ) - - pass + raise NotImplementedError(msg) def get_default_encoding(self, time_chunks_from_key=None): """ @@ -649,8 +213,8 @@ def get_default_encoding(self, time_chunks_from_key=None): compcoords = dict(zlib=True, complevel=4) # construct encoding dict - encoding = {var: compdata.copy() for var in self.data_vars} - encoding.update({var: compcoords.copy() for var in self.coords}) + encoding = {var: compdata.copy() for var in self._obj.data_vars} + encoding.update({var: compcoords.copy() for var in self._obj.coords}) for k, v in encoding.items(): if k in float32l: @@ -660,8 +224,8 @@ def get_default_encoding(self, time_chunks_from_key=None): v["dtype"] = "int32" # v['_FillValue'] = -9999 # Int does not support NaN - if np.issubdtype(self[k].dtype, str) or np.issubdtype( - self[k].dtype, object + if np.issubdtype(self._obj[k].dtype, str) or np.issubdtype( + self._obj[k].dtype, object ): # Compression not supported for variable length strings # https://github.com/Unidata/netcdf4-python/issues/1205 @@ -669,18 +233,18 @@ def get_default_encoding(self, time_chunks_from_key=None): if time_chunks_from_key is not None: # obtain optimal chunk sizes in time and x dim - if self[time_chunks_from_key].dims == ("x", "time"): + if self._obj[time_chunks_from_key].dims == ("x", "time"): x_chunk, t_chunk = da.ones( - self[time_chunks_from_key].shape, + self._obj[time_chunks_from_key].shape, chunks=(-1, "auto"), - dtype="float64", + dtype="float32", ).chunks - elif self[time_chunks_from_key].dims == ("time", "x"): + elif self._obj[time_chunks_from_key].dims == ("time", "x"): x_chunk, t_chunk = da.ones( - self[time_chunks_from_key].shape, + self._obj[time_chunks_from_key].shape, chunks=("auto", -1), - dtype="float64", + dtype="float32", ).chunks else: assert 0, "something went wrong with your Stokes dimensions" @@ -688,16 +252,16 @@ def get_default_encoding(self, time_chunks_from_key=None): for k, v in encoding.items(): # By writing and compressing the data in chunks, some sort of # parallism is possible. - if self[k].dims == ("x", "time"): + if self._obj[k].dims == ("x", "time"): chunks = (x_chunk[0], t_chunk[0]) - elif self[k].dims == ("time", "x"): + elif self._obj[k].dims == ("time", "x"): chunks = (t_chunk[0], x_chunk[0]) - elif self[k].dims == ("x",): + elif self._obj[k].dims == ("x",): chunks = (x_chunk[0],) - elif self[k].dims == ("time",): + elif self._obj[k].dims == ("time",): chunks = (t_chunk[0],) else: @@ -707,140 +271,36 @@ def get_default_encoding(self, time_chunks_from_key=None): return encoding - def get_section_indices(self, sec): - """Returns the x-indices of the section. `sec` is a slice.""" - xis = self.x.astype(int) * 0 + np.arange(self.x.size, dtype=int) - return xis.sel(x=sec).values - - def rename_labels(self, assertion=True): + def get_timeseries_keys(self): """ - Renames the `ST` DataArrays (old convention) to `st` (new convention). - The new naming convention simplifies the notation of the reverse Stokes - `ds['REV-ST']` becomes `ds.rst`. Plus the parameter-naming convention in - Python in lowercase. - - Parameters - ---------- - assertion : bool - If set to `True`, raises an error if complications occur. - - Returns - ------- - + Returns a list of the keys of the time series variables. """ - re_dict = { - "ST": "st", - "AST": "ast", - "REV-ST": "rst", - "REV-AST": "rast", - "TMP": "tmp", - "TMPF": "tmpf", - "TMPB": "tmpb", - "TMPW": "tmpw", - } - - re_dict_err = { - k: v - for k, v in re_dict.items() - if k in self.data_vars and v in self.data_vars - } - - msg = ( - "Unable to rename the st_labels automagically. \n" - "Please manually rename ST->st and REV-ST->rst. The \n" - f"parameters {re_dict_err.values()} were already present" - ) - - if assertion: - assert len(re_dict_err) == 0, msg - elif len(re_dict_err) != 0: - print(msg) - for v in re_dict_err.values(): - print(f"Variable {v} was not renamed") + return [k for k, v in self._obj.data_vars.items() if v.dims == ("time",)] - re_dict2 = { - k: v - for k, v in re_dict.items() - if k in self.data_vars and v not in self.data_vars - } - - return self.rename(re_dict2) - - def variance_stokes(self, *args, **kwargs): - """Backwards compatibility. See `ds.variance_stokes_constant()`""" - return self.variance_stokes_constant(*args, **kwargs) - - def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=True): + def ufunc_per_section( + self, + sections=None, + func=None, + label=None, + subtract_from_label=None, + temp_err=False, + x_indices=False, + ref_temp_broadcasted=False, + calc_per="stretch", + suppress_section_validation=False, + **func_kwargs, + ): """ - Approximate the variance of the noise in Stokes intensity measurements - with one value, suitable for small setups. - - * `ds.variance_stokes_constant()` for small setups with small variations in\ - intensity. Variance of the Stokes measurements is assumed to be the same\ - along the entire fiber. - - * `ds.variance_stokes_exponential()` for small setups with very few time\ - steps. Too many degrees of freedom results in an under estimation of the\ - noise variance. Almost never the case, but use when calibrating pre time\ - step. - - * `ds.variance_stokes_linear()` for larger setups with more time steps.\ - Assumes Poisson distributed noise with the following model:: - - st_var = a * ds.st + b - - - where `a` and `b` are constants. Requires reference sections at - beginning and end of the fiber, to have residuals at high and low - intensity measurements. - - The Stokes and anti-Stokes intensities are measured with detectors, - which inherently introduce noise to the measurements. Knowledge of the - distribution of the measurement noise is needed for a calibration with - weighted observations (Sections 5 and 6 of [1]_) - and to project the associated uncertainty to the temperature confidence - intervals (Section 7 of [1]_). Two sources dominate the noise - in the Stokes and anti-Stokes intensity measurements - (Hartog, 2017, p.125). Close to the laser, noise from the conversion of - backscatter to electricity dominates the measurement noise. The - detecting component, an avalanche photodiode, produces Poisson- - distributed noise with a variance that increases linearly with the - intensity. The Stokes and anti-Stokes intensities are commonly much - larger than the standard deviation of the noise, so that the Poisson - distribution can be approximated with a Normal distribution with a mean - of zero and a variance that increases linearly with the intensity. At - the far-end of the fiber, noise from the electrical circuit dominates - the measurement noise. It produces Normal-distributed noise with a mean - of zero and a variance that is independent of the intensity. - - Calculates the variance between the measurements and a best fit - at each reference section. This fits a function to the nt * nx - measurements with ns * nt + nx parameters, where nx are the total - number of reference locations along all sections. The temperature is - constant along the reference sections, so the expression of the - Stokes power can be split in a time series per reference section and - a constant per observation location. - - Idea from Discussion at page 127 in Richter, P. H. (1995). Estimating - errors in least-squares fitting. - - The timeseries and the constant are, of course, highly correlated - (Equations 20 and 21 in [1]_), but that is not relevant here as only the - product is of interest. The residuals between the fitted product and the - Stokes intensity measurements are attributed to the - noise from the detector. The variance of the residuals is used as a - proxy for the variance of the noise in the Stokes and anti-Stokes - intensity measurements. A non-uniform temperature of - the reference sections results in an over estimation of the noise - variance estimate because all temperature variation is attributed to - the noise. + User function applied to parts of the cable. Super useful, + many options and slightly + complicated. + + The function `func` is taken over all the timesteps and calculated + per `calc_per`. This + is returned as a dictionary Parameters ---------- - reshape_residuals - st_label : str - label of the Stokes, anti-Stokes measurement. - E.g., st, ast, rst, rast sections : Dict[str, List[slice]], optional If `None` is supplied, `ds.sections` is used. Define calibration sections. Each section requires a reference temperature time series, @@ -851,546 +311,153 @@ def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=Tr lists of slice objects, where each slice object is a fiber stretch that has the reference temperature. Afterwards, `sections` is stored under `ds.sections`. + func : callable, str + A numpy function, or lambda function to apple to each 'calc_per'. + label + subtract_from_label + temp_err : bool + The argument of the function is label minus the reference + temperature. + x_indices : bool + To retreive an integer array with the indices of the + x-coordinates in the section/stretch. The indices are sorted. + ref_temp_broadcasted : bool + calc_per : {'all', 'section', 'stretch'} + func_kwargs : dict + Dictionary with options that are passed to func - Returns - ------- - I_var : float - Variance of the residuals between measured and best fit - resid : array_like - Residuals between measured and best fit - - Notes - ----- - - * Because there are a large number of unknowns, spend time on\ - calculating an initial estimate. Can be turned off by setting to False. + TODO: Spend time on creating a slice instead of appendng everything\ + to a list and concatenating after. - * It is often not needed to use measurements from all time steps. If\ - your variance estimate does not change when including measurements from\ - more time steps, you have included enough measurements. - References - ---------- - .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation - of Temperature and Associated Uncertainty from Fiber-Optic Raman- - Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. - https://doi.org/10.3390/s20082235 + Returns + ------- Examples -------- - - `Example notebook 4: Calculate variance Stokes intensity measurements\ - `_ - """ - if sections is None: - sections = self.sections - else: - sections = validate_sections(self, sections) - assert self[st_label].dims[0] == "x", f"{st_label} are transposed" - check_timestep_allclose(self, eps=0.01) + 1. Calculate the variance of the residuals in the along ALL the\ + reference sections wrt the temperature of the water baths - # should maybe be per section. But then residuals - # seem to be correlated between stretches. I don't know why.. BdT. - data_dict = da.compute( - self.ufunc_per_section( - sections=sections, label=st_label, calc_per="stretch" - ) - )[0] + >>> tmpf_var = d.ufunc_per_section( + >>> sections=sections, + >>> func='var', + >>> calc_per='all', + >>> label='tmpf', + >>> temp_err=True) - var_I, resid = variance_stokes_constant_helper(data_dict) + 2. Calculate the variance of the residuals in the along PER\ + reference section wrt the temperature of the water baths - if not reshape_residuals: - return var_I, resid + >>> tmpf_var = d.ufunc_per_section( + >>> sections=sections, + >>> func='var', + >>> calc_per='stretch', + >>> label='tmpf', + >>> temp_err=True) - else: - ix_resid = self.ufunc_per_section( - sections=sections, x_indices=True, calc_per="all" - ) + 3. Calculate the variance of the residuals in the along PER\ + water bath wrt the temperature of the water baths - resid_sorted = np.full(shape=self[st_label].shape, fill_value=np.nan) - resid_sorted[ix_resid, :] = resid - resid_da = xr.DataArray(data=resid_sorted, coords=self[st_label].coords) + >>> tmpf_var = d.ufunc_per_section( + >>> sections=sections, + >>> func='var', + >>> calc_per='section', + >>> label='tmpf', + >>> temp_err=True) - return var_I, resid_da + 4. Obtain the coordinates of the measurements per section - def variance_stokes_exponential( - self, - st_label, - sections=None, - use_statsmodels=False, - suppress_info=True, - reshape_residuals=True, - ): - """ - Approximate the variance of the noise in Stokes intensity measurements - with one value, suitable for small setups with measurements from only - a few times. - - * `ds.variance_stokes_constant()` for small setups with small variations in\ - intensity. Variance of the Stokes measurements is assumed to be the same\ - along the entire fiber. - - * `ds.variance_stokes_exponential()` for small setups with very few time\ - steps. Too many degrees of freedom results in an under estimation of the\ - noise variance. Almost never the case, but use when calibrating pre time\ - step. - - * `ds.variance_stokes_linear()` for larger setups with more time steps.\ - Assumes Poisson distributed noise with the following model:: - - st_var = a * ds.st + b - - - where `a` and `b` are constants. Requires reference sections at - beginning and end of the fiber, to have residuals at high and low - intensity measurements. - - The Stokes and anti-Stokes intensities are measured with detectors, - which inherently introduce noise to the measurements. Knowledge of the - distribution of the measurement noise is needed for a calibration with - weighted observations (Sections 5 and 6 of [1]_) - and to project the associated uncertainty to the temperature confidence - intervals (Section 7 of [1]_). Two sources dominate the noise - in the Stokes and anti-Stokes intensity measurements - (Hartog, 2017, p.125). Close to the laser, noise from the conversion of - backscatter to electricity dominates the measurement noise. The - detecting component, an avalanche photodiode, produces Poisson- - distributed noise with a variance that increases linearly with the - intensity. The Stokes and anti-Stokes intensities are commonly much - larger than the standard deviation of the noise, so that the Poisson - distribution can be approximated with a Normal distribution with a mean - of zero and a variance that increases linearly with the intensity. At - the far-end of the fiber, noise from the electrical circuit dominates - the measurement noise. It produces Normal-distributed noise with a mean - of zero and a variance that is independent of the intensity. - - Calculates the variance between the measurements and a best fit - at each reference section. This fits a function to the nt * nx - measurements with ns * nt + nx parameters, where nx are the total - number of reference locations along all sections. The temperature is - constant along the reference sections. This fits a two-parameter - exponential to the stokes measurements. The temperature is constant - and there are no splices/sharp bends in each reference section. - Therefore all signal decrease is due to differential attenuation, - which is the same for each reference section. The scale of the - exponential does differ per reference section. - - Assumptions: 1) the temperature is the same along a reference - section. 2) no sharp bends and splices in the reference sections. 3) - Same type of optical cable in each reference section. - - Idea from discussion at page 127 in Richter, P. H. (1995). Estimating - errors in least-squares fitting. For weights used error propagation: - w^2 = 1/sigma(lny)^2 = y^2/sigma(y)^2 = y^2 - - The timeseries and the constant are, of course, highly correlated - (Equations 20 and 21 in [1]_), but that is not relevant here as only the - product is of interest. The residuals between the fitted product and the - Stokes intensity measurements are attributed to the - noise from the detector. The variance of the residuals is used as a - proxy for the variance of the noise in the Stokes and anti-Stokes - intensity measurements. A non-uniform temperature of - the reference sections results in an over estimation of the noise - variance estimate because all temperature variation is attributed to - the noise. + >>> locs = d.ufunc_per_section( + >>> sections=sections, + >>> func=None, + >>> label='x', + >>> temp_err=False, + >>> ref_temp_broadcasted=False, + >>> calc_per='stretch') - Parameters - ---------- - suppress_info : bool, optional - Suppress print statements. - use_statsmodels : bool, optional - Use statsmodels to fit the exponential. If `False`, use scipy. - reshape_residuals : bool, optional - Reshape the residuals to the shape of the Stokes intensity - st_label : str - label of the Stokes, anti-Stokes measurement. - E.g., st, ast, rst, rast - sections : Dict[str, List[slice]], optional - If `None` is supplied, `ds.sections` is used. Define calibration - sections. Each section requires a reference temperature time series, - such as the temperature measured by an external temperature sensor. - They should already be part of the DataStore object. `sections` - is defined with a dictionary with its keywords of the - names of the reference temperature time series. Its values are - lists of slice objects, where each slice object is a fiber stretch - that has the reference temperature. Afterwards, `sections` is stored - under `ds.sections`. + 5. Number of observations per stretch - Returns - ------- - I_var : float - Variance of the residuals between measured and best fit - resid : array_like - Residuals between measured and best fit + >>> nlocs = d.ufunc_per_section( + >>> sections=sections, + >>> func=len, + >>> label='x', + >>> temp_err=False, + >>> ref_temp_broadcasted=False, + >>> calc_per='stretch') - Notes - ----- + 6. broadcast the temperature of the reference sections to\ + stretch/section/all dimensions. The value of the reference\ + temperature (a timeseries) is broadcasted to the shape of self[\ + label]. The self[label] is not used for anything else. - * Because there are a large number of unknowns, spend time on\ - calculating an initial estimate. Can be turned off by setting to False. + >>> temp_ref = d.ufunc_per_section( + >>> label='st', + >>> ref_temp_broadcasted=True, + >>> calc_per='all') - * It is often not needed to use measurements from all time steps. If\ - your variance estimate does not change when including measurements from\ - more time steps, you have included enough measurements. + 7. x-coordinate index - References - ---------- - .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation - of Temperature and Associated Uncertainty from Fiber-Optic Raman- - Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. - https://doi.org/10.3390/s20082235 - - Examples - -------- - - `Example notebook 4: Calculate variance Stokes intensity measurements\ - `_ - """ - if sections is None: - sections = self.sections - else: - sections = validate_sections(self, sections) - - assert self[st_label].dims[0] == "x", "Stokes are transposed" - - check_timestep_allclose(self, eps=0.01) - - nt = self.time.size - - len_stretch_list = [] # number of reference points per section ( - # spatial) - y_list = [] # intensities of stokes - x_list = [] # length rel to start of section. for alpha - - for k, stretches in sections.items(): - for stretch in stretches: - y_list.append(self[st_label].sel(x=stretch).data.T.reshape(-1)) - _x = self.x.sel(x=stretch).data.copy() - _x -= _x[0] - x_list.append(da.tile(_x, nt)) - len_stretch_list.append(_x.size) - - x = np.concatenate(x_list) # coordinates are already in memory - y = np.concatenate(y_list) - - var_I, resid = variance_stokes_exponential_helper( - nt, x, y, len_stretch_list, use_statsmodels, suppress_info - ) - - if not reshape_residuals: - return var_I, resid - - else: - # restructure the residuals, such that they can be plotted and - # added to ds - resid_res = [] - for leni, lenis, lenie in zip( - len_stretch_list, - nt * np.cumsum([0] + len_stretch_list[:-1]), - nt * np.cumsum(len_stretch_list), - ): - try: - resid_res.append(resid[lenis:lenie].reshape((leni, nt), order="F")) - except: # noqa: E722 - # Dask array does not support order - resid_res.append(resid[lenis:lenie].T.reshape((nt, leni)).T) - - _resid = np.concatenate(resid_res) - _resid_x = self.ufunc_per_section( - sections=sections, label="x", calc_per="all" - ) - isort = np.argsort(_resid_x) - resid_x = _resid_x[isort] # get indices from ufunc directly - resid = _resid[isort, :] - - ix_resid = np.array([np.argmin(np.abs(ai - self.x.data)) for ai in resid_x]) - - resid_sorted = np.full(shape=self[st_label].shape, fill_value=np.nan) - resid_sorted[ix_resid, :] = resid - resid_da = xr.DataArray(data=resid_sorted, coords=self[st_label].coords) + >>> ix_loc = d.ufunc_per_section(sections=sections, x_indices=True) - return var_I, resid_da - def variance_stokes_linear( - self, st_label, sections=None, nbin=50, through_zero=False, plot_fit=False - ): + Note + ---- + If `self[label]` or `self[subtract_from_label]` is a Dask array, a Dask + array is returned else a numpy array is returned """ - Approximate the variance of the noise in Stokes intensity measurements - with a linear function of the intensity, suitable for large setups. - - * `ds.variance_stokes_constant()` for small setups with small variations in\ - intensity. Variance of the Stokes measurements is assumed to be the same\ - along the entire fiber. - - * `ds.variance_stokes_exponential()` for small setups with very few time\ - steps. Too many degrees of freedom results in an under estimation of the\ - noise variance. Almost never the case, but use when calibrating pre time\ - step. - - * `ds.variance_stokes_linear()` for larger setups with more time steps.\ - Assumes Poisson distributed noise with the following model:: - - st_var = a * ds.st + b - - - where `a` and `b` are constants. Requires reference sections at - beginning and end of the fiber, to have residuals at high and low - intensity measurements. - - The Stokes and anti-Stokes intensities are measured with detectors, - which inherently introduce noise to the measurements. Knowledge of the - distribution of the measurement noise is needed for a calibration with - weighted observations (Sections 5 and 6 of [1]_) - and to project the associated uncertainty to the temperature confidence - intervals (Section 7 of [1]_). Two sources dominate the noise - in the Stokes and anti-Stokes intensity measurements - (Hartog, 2017, p.125). Close to the laser, noise from the conversion of - backscatter to electricity dominates the measurement noise. The - detecting component, an avalanche photodiode, produces Poisson- - distributed noise with a variance that increases linearly with the - intensity. The Stokes and anti-Stokes intensities are commonly much - larger than the standard deviation of the noise, so that the Poisson - distribution can be approximated with a Normal distribution with a mean - of zero and a variance that increases linearly with the intensity. At - the far-end of the fiber, noise from the electrical circuit dominates - the measurement noise. It produces Normal-distributed noise with a mean - of zero and a variance that is independent of the intensity. - - Calculates the variance between the measurements and a best fit - at each reference section. This fits a function to the nt * nx - measurements with ns * nt + nx parameters, where nx are the total - number of reference locations along all sections. The temperature is - constant along the reference sections, so the expression of the - Stokes power can be split in a time series per reference section and - a constant per observation location. - - Idea from Discussion at page 127 in Richter, P. H. (1995). Estimating - errors in least-squares fitting. - - The timeseries and the constant are, of course, highly correlated - (Equations 20 and 21 in [1]_), but that is not relevant here as only the - product is of interest. The residuals between the fitted product and the - Stokes intensity measurements are attributed to the - noise from the detector. The variance of the residuals is used as a - proxy for the variance of the noise in the Stokes and anti-Stokes - intensity measurements. A non-uniform temperature of - the reference sections results in an over estimation of the noise - variance estimate because all temperature variation is attributed to - the noise. - - Notes - ----- - - * Because there are a large number of unknowns, spend time on\ - calculating an initial estimate. Can be turned off by setting to False. - - * It is often not needed to use measurements from all time steps. If\ - your variance estimate does not change when including measurements \ - from more time steps, you have included enough measurements. - - References - ---------- - .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation - of Temperature and Associated Uncertainty from Fiber-Optic Raman- - Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. - https://doi.org/10.3390/s20082235 - - Examples - -------- - - `Example notebook 4: Calculate variance Stokes intensity \ - measurements `_ + if not suppress_section_validation: + validate_sections_definition(sections=sections) + validate_no_overlapping_sections(sections=sections) - Parameters - ---------- - st_label : str - Key under which the Stokes DataArray is stored. E.g., 'st', 'rst' - sections : dict, optional - Define sections. See documentation - nbin : int - Number of bins to compute the variance for, through which the - linear function is fitted. Make sure that that are at least 50 - residuals per bin to compute the variance from. - through_zero : bool - If True, the variance is computed as: VAR(Stokes) = slope * Stokes - If False, VAR(Stokes) = slope * Stokes + offset. - From what we can tell from our inital trails, is that the offset - seems relatively small, so that True seems a better option for - setups where a reference section with very low Stokes intensities - is missing. If data with low Stokes intensities available, it is - better to not fit through zero, but determine the offset from - the data. - plot_fit : bool - If True plot the variances for each bin and plot the fitted - linear function - """ - import matplotlib.pyplot as plt + if temp_err or ref_temp_broadcasted: + for k in sections: + assert ( + k in self._obj + ), f"{k} is not in the Dataset but is in `sections` and is required to compute temp_err" - if sections is None: - sections = self.sections + if label is None: + dataarray = None else: - sections = validate_sections(self, sections) - - assert self[st_label].dims[0] == "x", "Stokes are transposed" - _, resid = self.variance_stokes( - sections=sections, st_label=st_label, reshape_residuals=False - ) - - ix_sec = self.ufunc_per_section( - sections=sections, x_indices=True, calc_per="all" - ) - - st = self[st_label].isel(x=ix_sec).values.ravel() - diff_st = resid.ravel() - - ( - slope, - offset, - st_sort_mean, - st_sort_var, - resid, - var_fun, - ) = variance_stokes_linear_helper(st, diff_st, nbin, through_zero) - - if plot_fit: - plt.figure() - plt.scatter(st_sort_mean, st_sort_var, marker=".", c="black") - plt.plot( - [0.0, st_sort_mean[-1]], - [var_fun(0.0), var_fun(st_sort_mean[-1])], - c="white", - lw=1.3, - ) - plt.plot( - [0.0, st_sort_mean[-1]], - [var_fun(0.0), var_fun(st_sort_mean[-1])], - c="black", - lw=0.8, - ) - plt.xlabel(st_label + " intensity") - plt.ylabel(st_label + " intensity variance") - - return slope, offset, st_sort_mean, st_sort_var, resid, var_fun - - def i_var(self, st_var, ast_var, st_label="st", ast_label="ast"): - r""" - Compute the variance of an observation given the stokes and anti-Stokes - intensities and their variance. - The variance, :math:`\sigma^2_{I_{m,n}}`, of the distribution of the - noise in the observation at location :math:`m`, time :math:`n`, is a - function of the variance of the noise in the Stokes and anti-Stokes - intensity measurements (:math:`\sigma_{P_+}^2` and - :math:`\sigma_{P_-}^2`), and is approximated with (Ku et al., 1966): - - .. math:: - - \sigma^2_{I_{m,n}} \\approx \left[\\frac{\partial I_{m,n}}{\partial\ - P_{m,n+}}\\right]^2\sigma^2_{P_{+}} + \left[\\frac{\partial\ - I_{m,n}}{\partial\ - P_{m,n-}}\\right]^2\sigma^2_{P_{-}} - - .. math:: - - \sigma^2_{I_{m,n}} \\approx \\frac{1}{P_{m,n+}^2}\sigma^2_{P_{+}} +\ - \\frac{1}{P_{m,n-}^2}\sigma^2_{P_{-}} - - The variance of the noise in the Stokes and anti-Stokes intensity - measurements is estimated directly from Stokes and anti-Stokes intensity - measurements using the steps outlined in Section 4. - - Parameters - ---------- - st_var, ast_var : float, callable, array-like, optional - The variance of the measurement noise of the Stokes signals in the - forward direction. If `float` the variance of the noise from the - Stokes detector is described with a single value. - If `callable` the variance of the noise from the Stokes detector is - a function of the intensity, as defined in the callable function. - Or manually define a variance with a DataArray of the shape - `ds.st.shape`, where the variance can be a function of time and/or - x. - st_label : {'st', 'rst'} - ast_label : {'ast', 'rast'} - - Returns - ------- + dataarray = self._obj[label] - """ - st = self[st_label] - ast = self[ast_label] - - if callable(st_var): - st_var = st_var(self[st_label]).values - else: - st_var = np.asarray(st_var, dtype=float) + if x_indices: + x_coords = self.x + reference_dataset = None - if callable(ast_var): - ast_var = ast_var(self[ast_label]).values else: - ast_var = np.asarray(ast_var, dtype=float) - - return st**-2 * st_var + ast**-2 * ast_var - - def set_trans_att(self, trans_att=None): - """Gracefully set the locations that introduce directional differential - attenuation - - Parameters - ---------- - trans_att : iterable, optional - Splices can cause jumps in differential attenuation. Normal single - ended calibration assumes these are not present. An additional loss - term is added in the 'shadow' of the splice. Each location - introduces an additional nt parameters to solve for. Requiring - either an additional calibration section or matching sections. - If multiple locations are defined, the losses are added. - - """ - if "trans_att" in self.coords and self["trans_att"].size > 0: - raise_warning = 0 - - del_keys = [] - for k, v in self.data_vars.items(): - if "trans_att" in v.dims: - del_keys.append(k) - - for del_key in del_keys: - del self[del_key] - - if raise_warning: - m = ( - "trans_att was set before. All `data_vars` that make use " - "of the `trans_att` coordinates were deleted: " + str(del_keys) - ) - warnings.warn(m) - - if trans_att is None: - trans_att = [] + x_coords = None + reference_dataset = {k: self._obj[k] for k in sections} - self["trans_att"] = trans_att - self["trans_att"].attrs = dim_attrs["trans_att"] - pass + out = ufunc_per_section_helper( + x_coords=x_coords, + sections=sections, + func=func, + dataarray=dataarray, + subtract_from_dataarray=subtract_from_label, + reference_dataset=reference_dataset, + subtract_reference_from_dataarray=temp_err, + ref_temp_broadcasted=ref_temp_broadcasted, + calc_per=calc_per, + **func_kwargs, + ) + return out - def calibration_single_ended( + def calibrate_single_ended( self, sections, - st_var=None, - ast_var=None, + st_var, + ast_var, method="wls", solver="sparse", p_val=None, p_var=None, p_cov=None, matching_sections=None, - trans_att=None, + trans_att=[], fix_gamma=None, fix_dalpha=None, fix_alpha=None, - **kwargs, ): r""" Calibrate the Stokes (`ds.st`) and anti-Stokes (`ds.ast`) data to @@ -1452,9 +519,8 @@ def calibration_single_ended( If set to False, no uncertainty in the parameters is propagated into the confidence intervals. Similar to the spec sheets of the DTS manufacturers. And similar to passing an array filled with zeros. - sections : Dict[str, List[slice]], optional - If `None` is supplied, `ds.sections` is used. Define calibration - sections. Each section requires a reference temperature time series, + sections : Dict[str, List[slice]] + Each section requires a reference temperature time series, such as the temperature measured by an external temperature sensor. They should already be part of the DataStore object. `sections` is defined with a dictionary with its keywords of the @@ -1524,28 +590,23 @@ def calibration_single_ended( 07Calibrate_single_wls.ipynb>`_ """ - check_deprecated_kwargs(kwargs) - self.set_trans_att(trans_att=trans_att, **kwargs) - - self = set_sections(self, sections) # TODO: don't change object in-place. - - if method == "wls": - assert st_var is not None and ast_var is not None, "Set `st_var`" + # out contains the state + out = xr.Dataset( + coords={"x": self.x, "time": self.time, "trans_att": trans_att} + ).copy() + out.coords["x"].attrs = dim_attrs["x"] + out.coords["trans_att"].attrs = dim_attrs["trans_att"] - self.check_reference_section_values() + nta = len(trans_att) - nx = self.x.size - nt = self["time"].size - nta = self.trans_att.size + # check and store sections and matching_sections + validate_sections(self._obj, sections=sections) + set_sections(out, sections) + set_matching_sections(out, matching_sections) - assert self["st"].dims[0] == "x", "Stokes are transposed" + assert self.st.dims[0] == "x", "Stokes are transposed" assert self.ast.dims[0] == "x", "Stokes are transposed" - if matching_sections: - matching_indices = match_sections(self, matching_sections) - else: - matching_indices = None - ix_sec = self.ufunc_per_section( sections=sections, x_indices=True, calc_per="all" ) @@ -1560,17 +621,15 @@ def calibration_single_ended( if method == "wls": p_cov, p_val, p_var = calibration_single_ended_helper( - self, + self._obj, sections, st_var, ast_var, fix_alpha, fix_dalpha, fix_gamma, - matching_indices, - nt, - nta, - nx, + matching_sections, + trans_att, solver, ) @@ -1586,11 +645,11 @@ def calibration_single_ended( # all below require the following solution sizes if fix_alpha: ip = ParameterIndexSingleEnded( - nt, nx, nta, includes_alpha=True, includes_dalpha=False + self.nt, self.nx, nta, includes_alpha=True, includes_dalpha=False ) else: ip = ParameterIndexSingleEnded( - nt, nx, nta, includes_alpha=False, includes_dalpha=True + self.nt, self.nx, nta, includes_alpha=False, includes_dalpha=True ) # npar = 1 + 1 + nt + nta * nt @@ -1599,17 +658,17 @@ def calibration_single_ended( assert p_cov.shape == (ip.npar, ip.npar) # store calibration parameters in DataStore - coords = {"x": self["x"], "time": self["time"], "trans_att": self["trans_att"]} params, param_covs = get_params_from_pval_single_ended( - ip, coords, p_val=p_val, p_var=p_var, p_cov=p_cov, fix_alpha=fix_alpha + ip, out.coords, p_val=p_val, p_var=p_var, p_cov=p_cov, fix_alpha=fix_alpha ) tmpf = params["gamma"] / ( (np.log(self.st / self.ast) + (params["c"] + params["talpha_fw_full"])) + params["alpha"] ) - out = xr.Dataset({"tmpf": tmpf - 273.15}) - out["tmpf"].attrs.update(_dim_attrs[("tmpf",)]) + + out["tmpf"] = tmpf - 273.15 + out["tmpf"].attrs.update(dim_attrs["tmpf"]) # tmpf_var deriv_dict = dict( @@ -1624,9 +683,8 @@ def calibration_single_ended( deriv_ds = xr.Dataset(deriv_dict) var_fw_dict = dict( - dT_dst=deriv_ds.T_st_fw**2 * parse_st_var(self, st_var, st_label="st"), - dT_dast=deriv_ds.T_ast_fw**2 - * parse_st_var(self, ast_var, st_label="ast"), + dT_dst=deriv_ds.T_st_fw**2 * parse_st_var(self.st, st_var), + dT_dast=deriv_ds.T_ast_fw**2 * parse_st_var(self.ast, ast_var), dT_gamma=deriv_ds.T_gamma_fw**2 * param_covs["gamma"], dT_dc=deriv_ds.T_c_fw**2 * param_covs["c"], dT_ddalpha=deriv_ds.T_alpha_fw**2 @@ -1668,14 +726,7 @@ def calibration_single_ended( out["var_fw_da"] = xr.Dataset(var_fw_dict).to_array(dim="comp_fw") out["tmpf_var"] = out["var_fw_da"].sum(dim="comp_fw") - out["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) - - drop_vars = [ - k for k, v in self.items() if {"params1", "params2"}.intersection(v.dims) - ] - - for k in drop_vars: - del self[k] + out["tmpf_var"].attrs.update(dim_attrs["tmpf_var"]) out["p_val"] = (("params1",), p_val) out["p_cov"] = (("params1", "params2"), p_cov) @@ -1684,28 +735,25 @@ def calibration_single_ended( for key, dataarray in param_covs.data_vars.items(): out[key + "_var"] = dataarray - self.update(out) - pass + return out - def calibration_double_ended( + def calibrate_double_ended( self, sections, - st_var=None, - ast_var=None, - rst_var=None, - rast_var=None, + st_var, + ast_var, + rst_var, + rast_var, method="wls", solver="sparse", p_val=None, p_var=None, p_cov=None, - trans_att=None, + trans_att=[], fix_gamma=None, fix_alpha=None, matching_sections=None, - matching_indices=None, verbose=False, - **kwargs, ): r""" See example notebook 8 for an explanation on how to use this function. @@ -1909,28 +957,29 @@ def calibration_double_ended( dtscalibration/python-dts-calibration/blob/master/examples/notebooks/\ 08Calibrate_double_wls.ipynb>`_ """ - # TODO: confidence intervals using the variance approximated by linear error propagation - check_deprecated_kwargs(kwargs) - - self.set_trans_att(trans_att=trans_att, **kwargs) - - self = set_sections(self, sections) # TODO: don't change object in-place. + # out contains the state + out = xr.Dataset( + coords={"x": self.x, "time": self.time, "trans_att": trans_att} + ).copy() + out.coords["x"].attrs = dim_attrs["x"] + out.coords["trans_att"].attrs = dim_attrs["trans_att"] - self.check_reference_section_values() + nta = len(trans_att) - nx = self.x.size - nt = self["time"].size - nta = self.trans_att.size - ix_sec = self.ufunc_per_section( - sections=sections, x_indices=True, calc_per="all" - ) - nx_sec = ix_sec.size + # check and store sections and matching_sections + validate_sections(self._obj, sections=sections) + set_sections(out, sections) + set_matching_sections(out, matching_sections) + # TODO: confidence intervals using the variance approximated by linear error propagation assert self.st.dims[0] == "x", "Stokes are transposed" assert self.ast.dims[0] == "x", "Stokes are transposed" assert self.rst.dims[0] == "x", "Stokes are transposed" assert self.rast.dims[0] == "x", "Stokes are transposed" + ix_sec = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) assert not np.any(self.st.isel(x=ix_sec) <= 0.0), ( "There is uncontrolled noise in the ST signal. Are your sections" "correctly defined?" @@ -1949,26 +998,8 @@ def calibration_double_ended( ) if method == "wls": - for input_item in [st_var, ast_var, rst_var, rast_var]: - assert input_item is not None, ( - "For wls define all variances (`st_var`, `ast_var`," - + " `rst_var`, `rast_var`)" - ) - - if np.any(matching_indices): - assert ( - not matching_sections - ), "Either define `matching_sections` or `matching_indices`" - - if matching_sections: - assert ( - not matching_indices - ), "Either define `matching_sections` or `matching_indices" - matching_indices = match_sections(self, matching_sections) - - if method == "wls": - p_cov, p_val, p_var = calibration_double_ended_helper( - self, + p_cov, p_val, p_var = calibrate_double_ended_helper( + self._obj, sections, st_var, ast_var, @@ -1976,12 +1007,12 @@ def calibration_double_ended( rast_var, fix_alpha, fix_gamma, - nt, + self.nt, nta, - nx, - nx_sec, + self.nx, ix_sec, - matching_indices, + matching_sections, + trans_att, solver, verbose, ) @@ -1997,42 +1028,32 @@ def calibration_double_ended( raise ValueError("Choose a valid method") # all below require the following solution sizes - ip = ParameterIndexDoubleEnded(nt, nx, nta) + ip = ParameterIndexDoubleEnded(self.nt, self.nx, nta) # npar = 1 + 2 * nt + nx + 2 * nt * nta assert p_val.size == ip.npar assert p_var.size == ip.npar assert p_cov.shape == (ip.npar, ip.npar) - coords = {"x": self["x"], "time": self["time"], "trans_att": self["trans_att"]} - params = get_params_from_pval_double_ended(ip, coords, p_val=p_val) + params = get_params_from_pval_double_ended(ip, out.coords, p_val=p_val) param_covs = get_params_from_pval_double_ended( - ip, coords, p_val=p_var, p_cov=p_cov + ip, out.coords, p_val=p_var, p_cov=p_cov ) - out = xr.Dataset( - { - "tmpf": params["gamma"] - / ( - np.log(self.st / self.ast) - + params["df"] - + params["alpha"] - + params["talpha_fw_full"] - ) - - 273.15, - "tmpb": params["gamma"] - / ( - np.log(self.rst / self.rast) - + params["db"] - - params["alpha"] - + params["talpha_bw_full"] - ) - - 273.15, - } + tmpf = params["gamma"] / ( + np.log(self.st / self.ast) + + params["df"] + + params["alpha"] + + params["talpha_fw_full"] ) - - tmpf = out["tmpf"] + 273.15 - tmpb = out["tmpb"] + 273.15 + tmpb = params["gamma"] / ( + np.log(self.rst / self.rast) + + params["db"] + - params["alpha"] + + params["talpha_bw_full"] + ) + out["tmpf"] = tmpf - 273.15 + out["tmpb"] = tmpb - 273.15 deriv_dict = dict( T_gamma_fw=tmpf / params["gamma"], @@ -2052,9 +1073,8 @@ def calibration_double_ended( out["deriv"] = deriv_ds.to_array(dim="com2") var_fw_dict = dict( - dT_dst=deriv_ds.T_st_fw**2 * parse_st_var(self, st_var, st_label="st"), - dT_dast=deriv_ds.T_ast_fw**2 - * parse_st_var(self, ast_var, st_label="ast"), + dT_dst=deriv_ds.T_st_fw**2 * parse_st_var(self.st, st_var), + dT_dast=deriv_ds.T_ast_fw**2 * parse_st_var(self.ast, ast_var), dT_gamma=deriv_ds.T_gamma_fw**2 * param_covs["gamma"], dT_ddf=deriv_ds.T_df_fw**2 * param_covs["df"], dT_dalpha=deriv_ds.T_alpha_fw**2 * param_covs["alpha"], @@ -2080,10 +1100,8 @@ def calibration_double_ended( ), ) var_bw_dict = dict( - dT_drst=deriv_ds.T_rst_bw**2 - * parse_st_var(self, rst_var, st_label="rst"), - dT_drast=deriv_ds.T_rast_bw**2 - * parse_st_var(self, rast_var, st_label="rast"), + dT_drst=deriv_ds.T_rst_bw**2 * parse_st_var(self.rst, rst_var), + dT_drast=deriv_ds.T_rast_bw**2 * parse_st_var(self.rast, rast_var), dT_gamma=deriv_ds.T_gamma_bw**2 * param_covs["gamma"], dT_ddb=deriv_ds.T_db_bw**2 * param_covs["db"], dT_dalpha=deriv_ds.T_alpha_bw**2 * param_covs["alpha"], @@ -2143,13 +1161,10 @@ def calibration_double_ended( # TODO: sigma2_tafw_tabw var_w_dict = dict( - dT_dst=deriv_ds2.T_st_w**2 * parse_st_var(self, st_var, st_label="st"), - dT_dast=deriv_ds2.T_ast_w**2 - * parse_st_var(self, ast_var, st_label="ast"), - dT_drst=deriv_ds2.T_rst_w**2 - * parse_st_var(self, rst_var, st_label="rst"), - dT_drast=deriv_ds2.T_rast_w**2 - * parse_st_var(self, rast_var, st_label="rast"), + dT_dst=deriv_ds2.T_st_w**2 * parse_st_var(self.st, st_var), + dT_dast=deriv_ds2.T_ast_w**2 * parse_st_var(self.ast, ast_var), + dT_drst=deriv_ds2.T_rst_w**2 * parse_st_var(self.rst, rst_var), + dT_drast=deriv_ds2.T_rast_w**2 * parse_st_var(self.rast, rast_var), dT_gamma=deriv_ds2.T_gamma_w**2 * param_covs["gamma"], dT_ddf=deriv_ds2.T_df_w**2 * param_covs["df"], dT_ddb=deriv_ds2.T_db_w**2 * param_covs["db"], @@ -2203,22 +1218,14 @@ def calibration_double_ended( ) out["tmpw_var" + "_lower"] = 1 / (1 / tmpf_var_excl_par + 1 / tmpb_var_excl_par) - out["tmpf"].attrs.update(_dim_attrs[("tmpf",)]) - out["tmpb"].attrs.update(_dim_attrs[("tmpb",)]) - out["tmpw"].attrs.update(_dim_attrs[("tmpw",)]) - out["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) - out["tmpb_var"].attrs.update(_dim_attrs[("tmpb_var",)]) - out["tmpw_var"].attrs.update(_dim_attrs[("tmpw_var",)]) - out["tmpw_var" + "_approx"].attrs.update(_dim_attrs[("tmpw_var_approx",)]) - out["tmpw_var" + "_lower"].attrs.update(_dim_attrs[("tmpw_var_lower",)]) - - drop_vars = [ - k for k, v in self.items() if {"params1", "params2"}.intersection(v.dims) - ] - - for k in drop_vars: - print(f"removing {k}") - del self[k] + out["tmpf"].attrs.update(dim_attrs["tmpf"]) + out["tmpb"].attrs.update(dim_attrs["tmpb"]) + out["tmpw"].attrs.update(dim_attrs["tmpw"]) + out["tmpf_var"].attrs.update(dim_attrs["tmpf_var"]) + out["tmpb_var"].attrs.update(dim_attrs["tmpb_var"]) + out["tmpw_var"].attrs.update(dim_attrs["tmpw_var"]) + out["tmpw_var" + "_approx"].attrs.update(dim_attrs["tmpw_var_approx"]) + out["tmpw_var" + "_lower"].attrs.update(dim_attrs["tmpw_var_lower"]) out["p_val"] = (("params1",), p_val) out["p_cov"] = (("params1", "params2"), p_cov) @@ -2227,493 +1234,307 @@ def calibration_double_ended( for key, dataarray in param_covs.data_vars.items(): out[key + "_var"] = dataarray - self.update(out) - pass + return out - def average_single_ended( + def monte_carlo_single_ended( self, - p_val="p_val", - p_cov="p_cov", - st_var=None, - ast_var=None, - conf_ints=None, + result, + st_var, + ast_var, + conf_ints=[], mc_sample_size=100, - ci_avg_time_flag1=False, - ci_avg_time_flag2=False, - ci_avg_time_sel=None, - ci_avg_time_isel=None, - ci_avg_x_flag1=False, - ci_avg_x_flag2=False, - ci_avg_x_sel=None, - ci_avg_x_isel=None, - var_only_sections=None, da_random_state=None, - mc_remove_set_flag=True, reduce_memory_usage=False, - **kwargs, + mc_remove_set_flag=True, ): + """The result object is what comes out of the single_ended_calibration routine) + + TODO: Use get_params_from_pval_single_ended() to extract parameter sets from mc """ - Average temperatures from single-ended setups. + assert self.st.dims[0] == "x", "Stokes are transposed" + assert self.ast.dims[0] == "x", "Stokes are transposed" - Four types of averaging are implemented. Please see Example Notebook 16. + if da_random_state: + state = da_random_state + else: + state = da.random.RandomState() + # out contains the state + out = xr.Dataset( + coords={"x": self.x, "time": self.time, "trans_att": result["trans_att"]} + ).copy() + out.coords["x"].attrs = dim_attrs["x"] + out.coords["trans_att"].attrs = dim_attrs["trans_att"] + out.coords["CI"] = conf_ints - Parameters - ---------- - p_val : array-like, optional - Define `p_val`, `p_var`, `p_cov` if you used an external function - for calibration. Has size 2 + `nt`. First value is :math:`\\gamma`, - second is :math:`\\Delta \\alpha`, others are :math:`C` for each - timestep. - If set to False, no uncertainty in the parameters is propagated - into the confidence intervals. Similar to the spec sheets of the DTS - manufacturers. And similar to passing an array filled with zeros - p_cov : array-like, optional - The covariances of `p_val`. - st_var, ast_var : float, callable, array-like, optional - The variance of the measurement noise of the Stokes signals in the - forward direction. If `float` the variance of the noise from the - Stokes detector is described with a single value. - If `callable` the variance of the noise from the Stokes detector is - a function of the intensity, as defined in the callable function. - Or manually define a variance with a DataArray of the shape - `ds.st.shape`, where the variance can be a function of time and/or - x. Required if method is wls. - conf_ints : iterable object of float - A list with the confidence boundaries that are calculated. Valid - values are between - [0, 1]. - mc_sample_size : int - Size of the monte carlo parameter set used to calculate the - confidence interval - ci_avg_time_flag1 : bool - The confidence intervals differ each time step. Assumes the - temperature varies during the measurement period. Computes the - arithmic temporal mean. If you would like to know the confidence - interfal of: - (1) a single additional measurement. So you can state "if another - measurement were to be taken, it would have this ci" - (2) all measurements. So you can state "The temperature remained - during the entire measurement period between these ci bounds". - Adds "tmpw" + '_avg1' and "tmpw" + '_mc_avg1_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg1` are added to the DataStore. Works independently of the - ci_avg_time_flag2 and ci_avg_x_flag. - ci_avg_time_flag2 : bool - The confidence intervals differ each time step. Assumes the - temperature remains constant during the measurement period. - Computes the inverse-variance-weighted-temporal-mean temperature - and its uncertainty. - If you would like to know the confidence interfal of: - (1) I want to estimate a background temperature with confidence - intervals. I hereby assume the temperature does not change over - time and average all measurements to get a better estimate of the - background temperature. - Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg2` are added to the DataStore. Works independently of the - ci_avg_time_flag1 and ci_avg_x_flag. - ci_avg_time_sel : slice - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_time_isel : iterable of int - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_x_flag1 : bool - The confidence intervals differ at each location. Assumes the - temperature varies over `x` and over time. Computes the - arithmic spatial mean. If you would like to know the confidence - interfal of: - (1) a single additional measurement location. So you can state "if - another measurement location were to be taken, - it would have this ci" - (2) all measurement locations. So you can state "The temperature - along the fiber remained between these ci bounds". - Adds "tmpw" + '_avgx1' and "tmpw" + '_mc_avgx1_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avgx1` are added to the DataStore. Works independently of the - ci_avg_time_flag1, ci_avg_time_flag2 and ci_avg_x2_flag. - ci_avg_x_flag2 : bool - The confidence intervals differ at each location. Assumes the - temperature is the same at each location but varies over time. - Computes the inverse-variance-weighted-spatial-mean temperature - and its uncertainty. - If you would like to know the confidence interfal of: - (1) I have put a lot of fiber in water, and I know that the - temperature variation in the water is much smaller than along - other parts of the fiber. And I would like to average the - measurements from multiple locations to improve the estimated - temperature. - Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg2` are added to the DataStore. Works independently of the - ci_avg_time_flag1 and ci_avg_x_flag. - ci_avg_x_sel : slice - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_x_isel : iterable of int - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - var_only_sections : bool - useful if using the ci_avg_x_flag. Only calculates the var over the - sections, so that the values can be compared with accuracy along the - reference sections. Where the accuracy is the variance of the - residuals between the estimated temperature and temperature of the - water baths. - da_random_state - For testing purposes. Similar to random seed. The seed for dask. - Makes random not so random. To produce reproducable results for - testing environments. - mc_remove_set_flag : bool - Remove the monte carlo data set, from which the CI and the - variance are calculated. - reduce_memory_usage : bool - Use less memory but at the expense of longer computation time + set_sections(out, result.dts.sections) + set_matching_sections(out, result.dts.matching_sections) - Returns - ------- - - """ - check_deprecated_kwargs(kwargs) - - if var_only_sections is not None: - raise NotImplementedError() - - out = xr.Dataset() - - mcparams = self.conf_int_single_ended( - p_val=p_val, - p_cov=p_cov, - st_var=st_var, - ast_var=ast_var, - conf_ints=None, - mc_sample_size=mc_sample_size, - da_random_state=da_random_state, - mc_remove_set_flag=False, - reduce_memory_usage=reduce_memory_usage, - **kwargs, - ) - mcparams["tmpf"] = self["tmpf"] + params = out.copy() + params.coords["mc"] = range(mc_sample_size) - if ci_avg_time_sel is not None: - time_dim2 = "time" + "_avg" - x_dim2 = "x" - mcparams.coords[time_dim2] = ( - (time_dim2,), - mcparams["time"].sel(**{"time": ci_avg_time_sel}).data, - ) - mcparams["tmpf_avgsec"] = ( - ("x", time_dim2), - mcparams["tmpf"].sel(**{"time": ci_avg_time_sel}).data, - ) - mcparams["tmpf_mc_set"] = ( - ("mc", "x", time_dim2), - mcparams["tmpf" + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, - ) + no, nt = self.st.shape + nta = result["trans_att"].size - elif ci_avg_time_isel is not None: - time_dim2 = "time" + "_avg" - x_dim2 = "x" - mcparams.coords[time_dim2] = ( - (time_dim2,), - mcparams["time"].isel(**{"time": ci_avg_time_isel}).data, - ) - mcparams["tmpf_avgsec"] = ( - ("x", time_dim2), - mcparams["tmpf"].isel(**{"time": ci_avg_time_isel}).data, - ) - mcparams["tmpf_mc_set"] = ( - ("mc", "x", time_dim2), - mcparams["tmpf" + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, - ) + p_val = result["p_val"].data + p_cov = result["p_cov"].data - elif ci_avg_x_sel is not None: - time_dim2 = "time" - x_dim2 = "x_avg" - mcparams.coords[x_dim2] = ((x_dim2,), mcparams.x.sel(x=ci_avg_x_sel).data) - mcparams["tmpf_avgsec"] = ( - (x_dim2, "time"), - mcparams["tmpf"].sel(x=ci_avg_x_sel).data, - ) - mcparams["tmpf_mc_set"] = ( - ("mc", x_dim2, "time"), - mcparams["tmpf_mc_set"].sel(x=ci_avg_x_sel).data, - ) + npar = p_val.size - elif ci_avg_x_isel is not None: - time_dim2 = "time" - x_dim2 = "x_avg" - mcparams.coords[x_dim2] = ((x_dim2,), mcparams.x.isel(x=ci_avg_x_isel).data) - mcparams["tmpf_avgsec"] = ( - (x_dim2, time_dim2), - mcparams["tmpf"].isel(x=ci_avg_x_isel).data, - ) - mcparams["tmpf_mc_set"] = ( - ("mc", x_dim2, time_dim2), - mcparams["tmpf_mc_set"].isel(x=ci_avg_x_isel).data, - ) + # check number of parameters + if npar == nt + 2 + nt * nta: + fixed_alpha = False + elif npar == 1 + no + nt + nt * nta: + fixed_alpha = True else: - mcparams["tmpf_avgsec"] = mcparams["tmpf"] - x_dim2 = "x" - time_dim2 = "time" - - # subtract the mean temperature - q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] - out["tmpf_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) - - if ci_avg_x_flag1: - # unweighted mean - out["tmpf_avgx1"] = mcparams["tmpf" + "_avgsec"].mean(dim=x_dim2) - - q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] - qvar = q.var(dim=["mc", x_dim2], ddof=1) - out["tmpf_mc_avgx1_var"] = qvar - - if conf_ints: - new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[2]) - avg_axis = mcparams["tmpf_mc_set"].get_axis_num(["mc", x_dim2]) - q = mcparams["tmpf_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, - # avg dimensions are dropped from input arr - new_axis=0, - ) # The new CI dim is added as firsaxis + raise Exception("The size of `p_val` is not what I expected") - out["tmpf_mc_avgx1"] = (("CI", time_dim2), q) + p_mc = sst.multivariate_normal.rvs(mean=p_val, cov=p_cov, size=mc_sample_size) - if ci_avg_x_flag2: - q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] + if fixed_alpha: + params["alpha_mc"] = (("mc", "x"), p_mc[:, 1 : no + 1]) + params["c_mc"] = (("mc", "time"), p_mc[:, 1 + no : 1 + no + nt]) + else: + params["dalpha_mc"] = (("mc",), p_mc[:, 1]) + params["c_mc"] = (("mc", "time"), p_mc[:, 2 : nt + 2]) - qvar = q.var(dim=["mc"], ddof=1) + params["gamma_mc"] = (("mc",), p_mc[:, 0]) + if nta: + params["ta_mc"] = ( + ("mc", "trans_att", "time"), + np.reshape(p_mc[:, -nt * nta :], (mc_sample_size, nta, nt)), + ) - # Inverse-variance weighting - avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) + rsize = (params.mc.size, params.x.size, params.time.size) - out["tmpf_mc_avgx2_var"] = avg_x_var + if reduce_memory_usage: + memchunk = da.ones( + (mc_sample_size, no, nt), chunks={0: -1, 1: 1, 2: "auto"} + ).chunks + else: + memchunk = da.ones( + (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} + ).chunks - mcparams["tmpf" + "_mc_avgx2_set"] = (mcparams["tmpf_mc_set"] / qvar).sum( - dim=x_dim2 - ) * avg_x_var - out["tmpf" + "_avgx2"] = mcparams["tmpf" + "_mc_avgx2_set"].mean(dim="mc") + # Draw from the normal distributions for the Stokes intensities + for key_mc, sti, st_vari in zip( + ["r_st", "r_ast"], [self.st, self.ast], [st_var, ast_var] + ): + # Load the mean as chunked Dask array, otherwise eats memory + if type(sti.data) == da.core.Array: + loc = da.asarray(sti.data, chunks=memchunk[1:]) + else: + loc = da.from_array(sti.data, chunks=memchunk[1:]) - if conf_ints: - new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[2]) - avg_axis_avgx = mcparams["tmpf_mc_set"].get_axis_num("mc") + # Make sure variance is of size (no, nt) + if np.size(st_vari) > 1: + if st_vari.shape == sti.shape: + pass + else: + st_vari = np.broadcast_to(st_vari, (no, nt)) + else: + pass - qq = mcparams["tmpf_mc_avgx2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), - chunks=new_chunks, # - drop_axis=avg_axis_avgx, - # avg dimensions are dropped from input arr - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # firsaxis - out["tmpf_mc_avgx2"] = (("CI", time_dim2), qq) + # Load variance as chunked Dask array, otherwise eats memory + if type(st_vari) == da.core.Array: + st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) - if ci_avg_time_flag1 is not None: - # unweighted mean - out["tmpf_avg1"] = mcparams["tmpf_avgsec"].mean(dim=time_dim2) + elif callable(st_vari) and type(sti.data) == da.core.Array: + st_vari_da = da.asarray(st_vari(sti).data, chunks=memchunk[1:]) - q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] - qvar = q.var(dim=["mc", time_dim2], ddof=1) - out["tmpf_mc_avg1_var"] = qvar + elif callable(st_vari) and type(sti.data) != da.core.Array: + st_vari_da = da.from_array(st_vari(sti).data, chunks=memchunk[1:]) - if conf_ints: - new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[1]) - avg_axis = mcparams["tmpf_mc_set"].get_axis_num(["mc", time_dim2]) - q = mcparams["tmpf_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, - # avg dimensions are dropped from input arr - new_axis=0, - ) # The new CI dim is added as firsaxis + else: + st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) - out["tmpf_mc_avg1"] = (("CI", x_dim2), q) + params[key_mc] = ( + ("mc", "x", "time"), + state.normal( + loc=loc, # has chunks=memchunk[1:] + scale=st_vari_da**0.5, + size=rsize, + chunks=memchunk, + ), + ) - if ci_avg_time_flag2: - q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] + ta_arr = np.zeros((mc_sample_size, no, nt)) - qvar = q.var(dim=["mc"], ddof=1) + if nta: + for ii, ta in enumerate(params["ta_mc"]): + for tai, taxi in zip(ta.values, result["trans_att"].values): + ta_arr[ii, self.x.values >= taxi] = ( + ta_arr[ii, self.x.values >= taxi] + tai + ) + params["ta_mc_arr"] = (("mc", "x", "time"), ta_arr) - # Inverse-variance weighting - avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) + if fixed_alpha: + params["tmpf_mc_set"] = ( + params["gamma_mc"] + / ( + ( + np.log(params["r_st"]) + - np.log(params["r_ast"]) + + (params["c_mc"] + params["ta_mc_arr"]) + ) + + params["alpha_mc"] + ) + - 273.15 + ) + else: + params["tmpf_mc_set"] = ( + params["gamma_mc"] + / ( + ( + np.log(params["r_st"]) + - np.log(params["r_ast"]) + + (params["c_mc"] + params["ta_mc_arr"]) + ) + + (params["dalpha_mc"] * params.x) + ) + - 273.15 + ) - out["tmpf_mc_avg2_var"] = avg_time_var + avg_dims = ["mc"] + avg_axis = params["tmpf_mc_set"].get_axis_num(avg_dims) + out["tmpf_mc_var"] = (params["tmpf_mc_set"] - result["tmpf"]).var( + dim=avg_dims, ddof=1 + ) - mcparams["tmpf" + "_mc_avg2_set"] = (mcparams["tmpf_mc_set"] / qvar).sum( - dim=time_dim2 - ) * avg_time_var - out["tmpf_avg2"] = mcparams["tmpf" + "_mc_avg2_set"].mean(dim="mc") + if conf_ints: + new_chunks = ((len(conf_ints),),) + params["tmpf_mc_set"].chunks[1:] - if conf_ints: - new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[1]) - avg_axis_avg2 = mcparams["tmpf_mc_set"].get_axis_num("mc") + qq = params["tmpf_mc_set"] - qq = mcparams["tmpf_mc_avg2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), - chunks=new_chunks, # - drop_axis=avg_axis_avg2, - # avg dimensions are dropped from input arr - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # firsaxis - out["tmpf_mc_avg2"] = (("CI", x_dim2), qq) + q = qq.data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, # avg dimesnions are dropped from input arr + new_axis=0, + ) # The new CI dimension is added as first axis - # Clean up the garbage. All arrays with a Monte Carlo dimension. - if mc_remove_set_flag: - remove_mc_set = [ - "r_st", - "r_ast", - "gamma_mc", - "dalpha_mc", - "c_mc", - "x_avg", - "time_avg", - "mc", - "ta_mc_arr", - ] - remove_mc_set.append("tmpf_avgsec") - remove_mc_set.append("tmpf_mc_set") - remove_mc_set.append("tmpf_mc_avg2_set") - remove_mc_set.append("tmpf_mc_avgx2_set") - remove_mc_set.append("tmpf_mc_avgsec_var") + out["tmpf_mc"] = (("CI", "x", "time"), q) - for k in remove_mc_set: - if k in out: - del out[k] + if not mc_remove_set_flag: + out.update(params) - self.update(out) - pass + return out - def average_double_ended( + def monte_carlo_double_ended( self, - sections=None, - p_val="p_val", - p_cov="p_cov", - st_var=None, - ast_var=None, - rst_var=None, - rast_var=None, - conf_ints=None, + result, + st_var, + ast_var, + rst_var, + rast_var, + conf_ints, mc_sample_size=100, - ci_avg_time_flag1=False, - ci_avg_time_flag2=False, - ci_avg_time_sel=None, - ci_avg_time_isel=None, - ci_avg_x_flag1=False, - ci_avg_x_flag2=False, - ci_avg_x_sel=None, - ci_avg_x_isel=None, + var_only_sections=False, + exclude_parameter_uncertainty=False, da_random_state=None, mc_remove_set_flag=True, reduce_memory_usage=False, - **kwargs, ): - """ - Average temperatures from double-ended setups. + r""" + Estimation of the confidence intervals for the temperatures measured + with a double-ended setup. + Double-ended setups require four additional steps to estimate the + confidence intervals for the temperature. First, the variances of the + Stokes and anti-Stokes intensity measurements of the forward and + backward channels are estimated following the steps in + Section 4 [1]_. See `variance_stokes_constant()`. + A Normal distribution is assigned to each + intensity measurement that is centered at the measurement and using the + estimated variance. Second, a multi-variate Normal distribution is + assigned to the estimated parameters using the covariance matrix from + the calibration procedure presented in Section 6 [1]_ (`p_cov`). Third, + Normal distributions are assigned for :math:`A` (`ds.alpha`) + for each location + outside of the reference sections. These distributions are centered + around :math:`A_p` and have variance :math:`\sigma^2\left[A_p\\right]` + given by Equations 44 and 45. Fourth, the distributions are sampled + and :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` are + computed with Equations 16 and 17, respectively. Fifth, step four is repeated to + compute, e.g., 10,000 realizations (`mc_sample_size`) of :math:`T_{\mathrm{F},m,n}` and + :math:`T_{\mathrm{B},m,n}` to approximate their probability density + functions. Sixth, the standard uncertainties of + :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` + (:math:`\sigma\left[T_{\mathrm{F},m,n}\\right]` and + :math:`\sigma\left[T_{\mathrm{B},m,n}\\right]`) are estimated with the + standard deviation of their realizations. Seventh, for each realization + :math:`i` the temperature :math:`T_{m,n,i}` is computed as the weighted + average of :math:`T_{\mathrm{F},m,n,i}` and + :math:`T_{\mathrm{B},m,n,i}`: - Four types of averaging are implemented. Please see Example Notebook 16. + .. math:: - Parameters - ---------- - p_val : array-like, optional - Define `p_val`, `p_var`, `p_cov` if you used an external function - for calibration. Has size 2 + `nt`. First value is :math:`\\gamma`, - second is :math:`\\Delta \\alpha`, others are :math:`C` for each - timestep. - If set to False, no uncertainty in the parameters is propagated - into the confidence intervals. Similar to the spec sheets of the DTS - manufacturers. And similar to passing an array filled with zeros - p_cov : array-like, optional - The covariances of `p_val`. - st_var, ast_var, rst_var, rast_var : float, callable, array-like, optional - The variance of the measurement noise of the Stokes signals in the - forward direction. If `float` the variance of the noise from the - Stokes detector is described with a single value. - If `callable` the variance of the noise from the Stokes detector is - a function of the intensity, as defined in the callable function. - Or manually define a variance with a DataArray of the shape - `ds.st.shape`, where the variance can be a function of time and/or - x. Required if method is wls. - conf_ints : iterable object of float - A list with the confidence boundaries that are calculated. Valid - values are between - [0, 1]. - mc_sample_size : int - Size of the monte carlo parameter set used to calculate the - confidence interval - ci_avg_time_flag1 : bool - The confidence intervals differ each time step. Assumes the - temperature varies during the measurement period. Computes the - arithmic temporal mean. If you would like to know the confidence - interfal of: - (1) a single additional measurement. So you can state "if another - measurement were to be taken, it would have this ci" - (2) all measurements. So you can state "The temperature remained - during the entire measurement period between these ci bounds". - Adds "tmpw" + '_avg1' and "tmpw" + '_mc_avg1_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg1` are added to the DataStore. Works independently of the - ci_avg_time_flag2 and ci_avg_x_flag. - ci_avg_time_flag2 : bool - The confidence intervals differ each time step. Assumes the - temperature remains constant during the measurement period. - Computes the inverse-variance-weighted-temporal-mean temperature - and its uncertainty. - If you would like to know the confidence interfal of: - (1) I want to estimate a background temperature with confidence - intervals. I hereby assume the temperature does not change over - time and average all measurements to get a better estimate of the - background temperature. - Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg2` are added to the DataStore. Works independently of the - ci_avg_time_flag1 and ci_avg_x_flag. - ci_avg_time_sel : slice - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_time_isel : iterable of int - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_x_flag1 : bool - The confidence intervals differ at each location. Assumes the - temperature varies over `x` and over time. Computes the - arithmic spatial mean. If you would like to know the confidence - interfal of: - (1) a single additional measurement location. So you can state "if - another measurement location were to be taken, - it would have this ci" - (2) all measurement locations. So you can state "The temperature - along the fiber remained between these ci bounds". - Adds "tmpw" + '_avgx1' and "tmpw" + '_mc_avgx1_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avgx1` are added to the DataStore. Works independently of the - ci_avg_time_flag1, ci_avg_time_flag2 and ci_avg_x2_flag. - ci_avg_x_flag2 : bool - The confidence intervals differ at each location. Assumes the - temperature is the same at each location but varies over time. - Computes the inverse-variance-weighted-spatial-mean temperature - and its uncertainty. - If you would like to know the confidence interfal of: - (1) I have put a lot of fiber in water, and I know that the - temperature variation in the water is much smaller than along - other parts of the fiber. And I would like to average the - measurements from multiple locations to improve the estimated - temperature. - Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg2` are added to the DataStore. Works independently of the - ci_avg_time_flag1 and ci_avg_x_flag. - ci_avg_x_sel : slice - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_x_isel : iterable of int - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data + T_{m,n,i} =\ + \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n,i}}{\ + \sigma^2\left[T_{\mathrm{F},m,n}\\right]} +\ + \\frac{T_{\mathrm{B},m,n,i}}{\ + \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) + + where + + .. math:: + + \sigma^2\left[T_{m,n}\\right] = \\frac{1}{1 /\ + \sigma^2\left[T_{\mathrm{F},m,n}\\right] + 1 /\ + \sigma^2\left[T_{\mathrm{B},m,n}\\right]} + + The best estimate of the temperature :math:`T_{m,n}` is computed + directly from the best estimates of :math:`T_{\mathrm{F},m,n}` and + :math:`T_{\mathrm{B},m,n}` as: + + .. math:: + T_{m,n} =\ + \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n}}{\ + \sigma^2\left[T_{\mathrm{F},m,n}\\right]} + \\frac{T_{\mathrm{B},m,n}}{\ + \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) + + Alternatively, the best estimate of :math:`T_{m,n}` can be approximated + with the mean of the :math:`T_{m,n,i}` values. Finally, the 95\% + confidence interval for :math:`T_{m,n}` are estimated with the 2.5\% and + 97.5\% percentiles of :math:`T_{m,n,i}`. + + Assumes sections are set. + + Parameters + ---------- + p_val : array-like, optional + Define `p_val`, `p_var`, `p_cov` if you used an external function + for calibration. Has size `1 + 2 * nt + nx + 2 * nt * nta`. + First value is :math:`\gamma`, then `nt` times + :math:`D_\mathrm{F}`, then `nt` times + :math:`D_\mathrm{B}`, then for each location :math:`D_\mathrm{B}`, + then for each connector that introduces directional attenuation two + parameters per time step. + p_cov : array-like, optional + The covariances of `p_val`. Square matrix. + If set to False, no uncertainty in the parameters is propagated + into the confidence intervals. Similar to the spec sheets of the DTS + manufacturers. And similar to passing an array filled with zeros. + st_var, ast_var, rst_var, rast_var : float, callable, array-like, optional + The variance of the measurement noise of the Stokes signals in the + forward direction. If `float` the variance of the noise from the + Stokes detector is described with a single value. + If `callable` the variance of the noise from the Stokes detector is + a function of the intensity, as defined in the callable function. + Or manually define a variance with a DataArray of the shape + `ds.st.shape`, where the variance can be a function of time and/or + x. Required if method is wls. + conf_ints : iterable object of float + A list with the confidence boundaries that are calculated. Valid + values are between [0, 1]. + mc_sample_size : int + Size of the monte carlo parameter set used to calculate the + confidence interval + var_only_sections : bool + useful if using the ci_avg_x_flag. Only calculates the var over the + sections, so that the values can be compared with accuracy along the + reference sections. Where the accuracy is the variance of the + residuals between the estimated temperature and temperature of the + water baths. da_random_state For testing purposes. Similar to random seed. The seed for dask. Makes random not so random. To produce reproducable results for @@ -2727,6 +1548,14 @@ def average_double_ended( Returns ------- + References + ---------- + .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation + of Temperature and Associated Uncertainty from Fiber-Optic Raman- + Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. + https://doi.org/10.3390/s20082235 + + TODO: Use get_params_from_pval_double_ended() to extract parameter sets from mc """ def create_da_ta2(no, i_splice, direction="fw", chunks=None): @@ -2758,346 +1587,307 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): ).rechunk((1, chunks[1], 1)) return arr - check_deprecated_kwargs(kwargs) - - if (ci_avg_x_flag1 or ci_avg_x_flag2) and ( - ci_avg_time_flag1 or ci_avg_time_flag2 - ): - raise NotImplementedError( - "Incompatible flags. Can not pick " "the right chunks" - ) - - elif not ( - ci_avg_x_flag1 or ci_avg_x_flag2 or ci_avg_time_flag1 or ci_avg_time_flag2 - ): - raise NotImplementedError("Pick one of the averaging options") - + if da_random_state: + # In testing environments + assert isinstance(da_random_state, da.random.RandomState) + state = da_random_state else: - pass - - out = xr.Dataset() - - mcparams = self.conf_int_double_ended( - sections=sections, - p_val=p_val, - p_cov=p_cov, - st_var=st_var, - ast_var=ast_var, - rst_var=rst_var, - rast_var=rast_var, - conf_ints=None, - mc_sample_size=mc_sample_size, - da_random_state=da_random_state, - mc_remove_set_flag=False, - reduce_memory_usage=reduce_memory_usage, - **kwargs, - ) - - for label in ["tmpf", "tmpb"]: - if ci_avg_time_sel is not None: - time_dim2 = "time" + "_avg" - x_dim2 = "x" - mcparams.coords[time_dim2] = ( - (time_dim2,), - mcparams["time"].sel(**{"time": ci_avg_time_sel}).data, - ) - mcparams[label + "_avgsec"] = ( - ("x", time_dim2), - self[label].sel(**{"time": ci_avg_time_sel}).data, - ) - mcparams[label + "_mc_set"] = ( - ("mc", "x", time_dim2), - mcparams[label + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, - ) - - elif ci_avg_time_isel is not None: - time_dim2 = "time" + "_avg" - x_dim2 = "x" - mcparams.coords[time_dim2] = ( - (time_dim2,), - mcparams["time"].isel(**{"time": ci_avg_time_isel}).data, - ) - mcparams[label + "_avgsec"] = ( - ("x", time_dim2), - self[label].isel(**{"time": ci_avg_time_isel}).data, - ) - mcparams[label + "_mc_set"] = ( - ("mc", "x", time_dim2), - mcparams[label + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, - ) - - elif ci_avg_x_sel is not None: - time_dim2 = "time" - x_dim2 = "x_avg" - mcparams.coords[x_dim2] = ( - (x_dim2,), - mcparams.x.sel(x=ci_avg_x_sel).data, - ) - mcparams[label + "_avgsec"] = ( - (x_dim2, "time"), - self[label].sel(x=ci_avg_x_sel).data, - ) - mcparams[label + "_mc_set"] = ( - ("mc", x_dim2, "time"), - mcparams[label + "_mc_set"].sel(x=ci_avg_x_sel).data, - ) - - elif ci_avg_x_isel is not None: - time_dim2 = "time" - x_dim2 = "x_avg" - mcparams.coords[x_dim2] = ( - (x_dim2,), - mcparams.x.isel(x=ci_avg_x_isel).data, - ) - mcparams[label + "_avgsec"] = ( - (x_dim2, time_dim2), - self[label].isel(x=ci_avg_x_isel).data, - ) - mcparams[label + "_mc_set"] = ( - ("mc", x_dim2, time_dim2), - mcparams[label + "_mc_set"].isel(x=ci_avg_x_isel).data, - ) - else: - mcparams[label + "_avgsec"] = self[label] - x_dim2 = "x" - time_dim2 = "time" + state = da.random.RandomState() - memchunk = mcparams[label + "_mc_set"].chunks + out = xr.Dataset( + coords={"x": self.x, "time": self.time, "trans_att": result["trans_att"]} + ).copy() + out.coords["x"].attrs = dim_attrs["x"] + out.coords["trans_att"].attrs = dim_attrs["trans_att"] + out.coords["CI"] = conf_ints - # subtract the mean temperature - q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] - out[label + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + set_sections(out, result.dts.sections) + set_matching_sections(out, result.dts.matching_sections) - if ci_avg_x_flag1: - # unweighted mean - out[label + "_avgx1"] = mcparams[label + "_avgsec"].mean(dim=x_dim2) + params = out.copy() # Contains all mc sampled parameters + params.coords["mc"] = range(mc_sample_size) - q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] - qvar = q.var(dim=["mc", x_dim2], ddof=1) - out[label + "_mc_avgx1_var"] = qvar + no, nt = self.st.shape + nta = result["trans_att"].size - if conf_ints: - new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[2]) - avg_axis = mcparams[label + "_mc_set"].get_axis_num(["mc", x_dim2]) - q = mcparams[label + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, - # avg dimensions are dropped from input arr - new_axis=0, - ) # The new CI dim is added as firsaxis + p_val = result["p_val"].data + p_cov = result["p_cov"].data - out[label + "_mc_avgx1"] = (("CI", time_dim2), q) + npar = p_val.size + npar_valid = 1 + 2 * nt + no + nt * 2 * nta + assert npar == npar_valid, "Inconsistent result object" - if ci_avg_x_flag2: - q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] + rsize = (mc_sample_size, no, nt) - qvar = q.var(dim=["mc"], ddof=1) + if reduce_memory_usage: + memchunk = da.ones( + (mc_sample_size, no, nt), chunks={0: -1, 1: 1, 2: "auto"} + ).chunks + else: + memchunk = da.ones( + (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} + ).chunks - # Inverse-variance weighting - avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) + if exclude_parameter_uncertainty: + # Exclude parameter uncertainty if p_cov == False + gamma = p_val[0] + d_fw = p_val[1 : nt + 1] + d_bw = p_val[1 + nt : 2 * nt + 1] + alpha = p_val[2 * nt + 1 : 2 * nt + 1 + no] - out[label + "_mc_avgx2_var"] = avg_x_var + params["gamma_mc"] = (tuple(), gamma) + params["alpha_mc"] = (("x",), alpha) + params["df_mc"] = (("time",), d_fw) + params["db_mc"] = (("time",), d_bw) - mcparams[label + "_mc_avgx2_set"] = ( - mcparams[label + "_mc_set"] / qvar - ).sum(dim=x_dim2) * avg_x_var - out[label + "_avgx2"] = mcparams[label + "_mc_avgx2_set"].mean(dim="mc") + if nta: + ta = p_val[2 * nt + 1 + no :].reshape((nt, 2, nta), order="F") + ta_fw = ta[:, 0, :] + ta_bw = ta[:, 1, :] - if conf_ints: - new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[2]) - avg_axis_avgx = mcparams[label + "_mc_set"].get_axis_num("mc") + ta_fw_arr = np.zeros((no, nt)) + for tai, taxi in zip(ta_fw.T, params.coords["trans_att"].values): + ta_fw_arr[params.x.values >= taxi] = ( + ta_fw_arr[params.x.values >= taxi] + tai + ) - qq = mcparams[label + "_mc_avgx2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), - chunks=new_chunks, # - drop_axis=avg_axis_avgx, - # avg dimensions are dropped from input arr - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # firsaxis - out[label + "_mc_avgx2"] = (("CI", time_dim2), qq) + ta_bw_arr = np.zeros((no, nt)) + for tai, taxi in zip(ta_bw.T, params.coords["trans_att"].values): + ta_bw_arr[params.x.values < taxi] = ( + ta_bw_arr[params.x.values < taxi] + tai + ) - if ci_avg_time_flag1 is not None: - # unweighted mean - out[label + "_avg1"] = mcparams[label + "_avgsec"].mean(dim=time_dim2) + params["talpha_fw_mc"] = (("x", "time"), ta_fw_arr) + params["talpha_bw_mc"] = (("x", "time"), ta_bw_arr) - q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] - qvar = q.var(dim=["mc", time_dim2], ddof=1) - out[label + "_mc_avg1_var"] = qvar + else: + sections = result.dts.sections + ix_sec = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) + nx_sec = ix_sec.size + from_i = np.concatenate( + ( + np.arange(1 + 2 * nt), + 1 + 2 * nt + ix_sec, + np.arange(1 + 2 * nt + no, 1 + 2 * nt + no + nt * 2 * nta), + ) + ) + iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") + po_val = p_val[from_i] + po_cov = p_cov[iox_sec1, iox_sec2] - if conf_ints: - new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[1]) - avg_axis = mcparams[label + "_mc_set"].get_axis_num( - ["mc", time_dim2] - ) - q = mcparams[label + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, - # avg dimensions are dropped from input arr - new_axis=0, - ) # The new CI dim is added as firsaxis + po_mc = sst.multivariate_normal.rvs( + mean=po_val, cov=po_cov, size=mc_sample_size + ) - out[label + "_mc_avg1"] = (("CI", x_dim2), q) + gamma = po_mc[:, 0] + d_fw = po_mc[:, 1 : nt + 1] + d_bw = po_mc[:, 1 + nt : 2 * nt + 1] - if ci_avg_time_flag2: - q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] + params["gamma_mc"] = (("mc",), gamma) + params["df_mc"] = (("mc", "time"), d_fw) + params["db_mc"] = (("mc", "time"), d_bw) - qvar = q.var(dim=["mc"], ddof=1) + # calculate alpha seperately + alpha = np.zeros((mc_sample_size, no), dtype=float) + alpha[:, ix_sec] = po_mc[:, 1 + 2 * nt : 1 + 2 * nt + nx_sec] - # Inverse-variance weighting - avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) + not_ix_sec = np.array([i for i in range(no) if i not in ix_sec]) - out[label + "_mc_avg2_var"] = avg_time_var + if np.any(not_ix_sec): + not_alpha_val = p_val[2 * nt + 1 + not_ix_sec] + not_alpha_var = p_cov[2 * nt + 1 + not_ix_sec, 2 * nt + 1 + not_ix_sec] - mcparams[label + "_mc_avg2_set"] = ( - mcparams[label + "_mc_set"] / qvar - ).sum(dim=time_dim2) * avg_time_var - out[label + "_avg2"] = mcparams[label + "_mc_avg2_set"].mean(dim="mc") + not_alpha_mc = np.random.normal( + loc=not_alpha_val, + scale=not_alpha_var**0.5, + size=(mc_sample_size, not_alpha_val.size), + ) - if conf_ints: - new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[1]) - avg_axis_avg2 = mcparams[label + "_mc_set"].get_axis_num("mc") + alpha[:, not_ix_sec] = not_alpha_mc - qq = mcparams[label + "_mc_avg2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), - chunks=new_chunks, # - drop_axis=avg_axis_avg2, - # avg dimensions are dropped from input arr - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # firsaxis - out[label + "_mc_avg2"] = (("CI", x_dim2), qq) + params["alpha_mc"] = (("mc", "x"), alpha) - # Weighted mean of the forward and backward - tmpw_var = 1 / ( - 1 / out["tmpf_mc" + "_avgsec_var"] + 1 / out["tmpb_mc" + "_avgsec_var"] - ) + if nta: + ta = po_mc[:, 2 * nt + 1 + nx_sec :].reshape( + (mc_sample_size, nt, 2, nta), order="F" + ) + ta_fw = ta[:, :, 0, :] + ta_bw = ta[:, :, 1, :] - q = ( - mcparams["tmpf_mc_set"] / out["tmpf_mc" + "_avgsec_var"] - + mcparams["tmpb_mc_set"] / out["tmpb_mc" + "_avgsec_var"] - ) * tmpw_var + ta_fw_arr = da.zeros( + (mc_sample_size, no, nt), chunks=memchunk, dtype=float + ) + for tai, taxi in zip( + ta_fw.swapaxes(0, 2), params.coords["trans_att"].values + ): + # iterate over the splices + i_splice = sum(params.x.values < taxi) + mask = create_da_ta2(no, i_splice, direction="fw", chunks=memchunk) - mcparams["tmpw" + "_mc_set"] = q # + ta_fw_arr += mask * tai.T[:, None, :] - # out["tmpw"] = out["tmpw" + '_mc_set'].mean(dim='mc') - out["tmpw" + "_avgsec"] = ( - mcparams["tmpf_avgsec"] / out["tmpf_mc" + "_avgsec_var"] - + mcparams["tmpb_avgsec"] / out["tmpb_mc" + "_avgsec_var"] - ) * tmpw_var + ta_bw_arr = da.zeros( + (mc_sample_size, no, nt), chunks=memchunk, dtype=float + ) + for tai, taxi in zip( + ta_bw.swapaxes(0, 2), params.coords["trans_att"].values + ): + i_splice = sum(params.x.values < taxi) + mask = create_da_ta2(no, i_splice, direction="bw", chunks=memchunk) - q = mcparams["tmpw" + "_mc_set"] - out["tmpw_avgsec"] - out["tmpw" + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + ta_bw_arr += mask * tai.T[:, None, :] - if ci_avg_time_flag1: - out["tmpw" + "_avg1"] = out["tmpw" + "_avgsec"].mean(dim=time_dim2) + params["talpha_fw_mc"] = (("mc", "x", "time"), ta_fw_arr) + params["talpha_bw_mc"] = (("mc", "x", "time"), ta_bw_arr) - out["tmpw" + "_mc_avg1_var"] = mcparams["tmpw" + "_mc_set"].var( - dim=["mc", time_dim2] - ) + # Draw from the normal distributions for the Stokes intensities + for k, sti, st_vari in zip( + ["r_st", "r_ast", "r_rst", "r_rast"], + [self.st, self.ast, self.rst, self.rast], + [st_var, ast_var, rst_var, rast_var], + ): + # Load the mean as chunked Dask array, otherwise eats memory + if type(sti.data) == da.core.Array: + loc = da.asarray(sti.data, chunks=memchunk[1:]) + else: + loc = da.from_array(sti.data, chunks=memchunk[1:]) - if conf_ints: - new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis = mcparams["tmpw" + "_mc_set"].get_axis_num(["mc", time_dim2]) - q2 = mcparams["tmpw" + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks_weighted, - # Explicitly define output chunks - drop_axis=avg_axis, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # first axis - out["tmpw" + "_mc_avg1"] = (("CI", x_dim2), q2) + # Make sure variance is of size (no, nt) + if np.size(st_vari) > 1: + if st_vari.shape == sti.shape: + pass + else: + st_vari = np.broadcast_to(st_vari, (no, nt)) + else: + pass - if ci_avg_time_flag2: - tmpw_var_avg2 = 1 / ( - 1 / out["tmpf_mc_avg2_var"] + 1 / out["tmpb_mc_avg2_var"] - ) + # Load variance as chunked Dask array, otherwise eats memory + if type(st_vari) == da.core.Array: + st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) - q = ( - mcparams["tmpf_mc_avg2_set"] / out["tmpf_mc_avg2_var"] - + mcparams["tmpb_mc_avg2_set"] / out["tmpb_mc_avg2_var"] - ) * tmpw_var_avg2 + elif callable(st_vari) and type(sti.data) == da.core.Array: + st_vari_da = da.asarray(st_vari(sti).data, chunks=memchunk[1:]) - mcparams["tmpw" + "_mc_avg2_set"] = q # + elif callable(st_vari) and type(sti.data) != da.core.Array: + st_vari_da = da.from_array(st_vari(sti).data, chunks=memchunk[1:]) - out["tmpw" + "_avg2"] = ( - out["tmpf_avg2"] / out["tmpf_mc_avg2_var"] - + out["tmpb_avg2"] / out["tmpb_mc_avg2_var"] - ) * tmpw_var_avg2 + else: + st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) - out["tmpw" + "_mc_avg2_var"] = tmpw_var_avg2 + params[k] = ( + ("mc", "x", "time"), + state.normal( + loc=loc, # has chunks=memchunk[1:] + scale=st_vari_da**0.5, + size=rsize, + chunks=memchunk, + ), + ) - if conf_ints: - # We first need to know the x-dim-chunk-size - new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis_avg2 = mcparams["tmpw" + "_mc_avg2_set"].get_axis_num("mc") - q2 = mcparams["tmpw" + "_mc_avg2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), - chunks=new_chunks_weighted, - # Explicitly define output chunks - drop_axis=avg_axis_avg2, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as firstax - out["tmpw" + "_mc_avg2"] = (("CI", x_dim2), q2) + for label in ["tmpf", "tmpb"]: + if label == "tmpf": + if nta: + params["tmpf_mc_set"] = ( + params["gamma_mc"] + / ( + np.log(params["r_st"] / params["r_ast"]) + + params["df_mc"] + + params["alpha_mc"] + + params["talpha_fw_mc"] + ) + - 273.15 + ) + else: + params["tmpf_mc_set"] = ( + params["gamma_mc"] + / ( + np.log(params["r_st"] / params["r_ast"]) + + params["df_mc"] + + params["alpha_mc"] + ) + - 273.15 + ) + else: + if nta: + params["tmpb_mc_set"] = ( + params["gamma_mc"] + / ( + np.log(params["r_rst"] / params["r_rast"]) + + params["db_mc"] + - params["alpha_mc"] + + params["talpha_bw_mc"] + ) + - 273.15 + ) + else: + params["tmpb_mc_set"] = ( + params["gamma_mc"] + / ( + np.log(params["r_rst"] / params["r_rast"]) + + params["db_mc"] + - params["alpha_mc"] + ) + - 273.15 + ) - if ci_avg_x_flag1: - out["tmpw" + "_avgx1"] = out["tmpw" + "_avgsec"].mean(dim=x_dim2) + if var_only_sections: + # sets the values outside the reference sections to NaN + xi = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) + x_mask_ = [True if ix in xi else False for ix in range(params.x.size)] + x_mask = np.reshape(x_mask_, (1, -1, 1)) + params[label + "_mc_set"] = params[label + "_mc_set"].where(x_mask) - out["tmpw" + "_mc_avgx1_var"] = mcparams["tmpw" + "_mc_set"].var(dim=x_dim2) + # subtract the mean temperature + q = params[label + "_mc_set"] - result[label] + out[label + "_mc_var"] = q.var(dim="mc", ddof=1) if conf_ints: - new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis = mcparams["tmpw" + "_mc_set"].get_axis_num(["mc", x_dim2]) - q2 = mcparams["tmpw" + "_mc_set"].data.map_blocks( + new_chunks = list(params[label + "_mc_set"].chunks) + new_chunks[0] = (len(conf_ints),) + avg_axis = params[label + "_mc_set"].get_axis_num("mc") + q = params[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks_weighted, - # Explicitly define output chunks - drop_axis=avg_axis, # avg dimensions are dropped + chunks=new_chunks, # + drop_axis=avg_axis, + # avg dimensions are dropped from input arr new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # first axis - out["tmpw" + "_mc_avgx1"] = (("CI", time_dim2), q2) + ) # The new CI dimension is added as firsaxis - if ci_avg_x_flag2: - tmpw_var_avgx2 = 1 / ( - 1 / out["tmpf_mc_avgx2_var"] + 1 / out["tmpb_mc_avgx2_var"] - ) + out[label + "_mc"] = (("CI", "x", "time"), q) - q = ( - mcparams["tmpf_mc_avgx2_set"] / out["tmpf_mc_avgx2_var"] - + mcparams["tmpb_mc_avgx2_set"] / out["tmpb_mc_avgx2_var"] - ) * tmpw_var_avgx2 + # Weighted mean of the forward and backward + tmpw_var = 1 / (1 / out["tmpf_mc_var"] + 1 / out["tmpb_mc_var"]) - mcparams["tmpw" + "_mc_avgx2_set"] = q # + q = ( + params["tmpf_mc_set"] / out["tmpf_mc_var"] + + params["tmpb_mc_set"] / out["tmpb_mc_var"] + ) * tmpw_var - out["tmpw" + "_avgx2"] = ( - out["tmpf_avgx2"] / out["tmpf_mc_avgx2_var"] - + out["tmpb_avgx2"] / out["tmpb_mc_avgx2_var"] - ) * tmpw_var_avgx2 + params["tmpw" + "_mc_set"] = q # - out["tmpw" + "_mc_avgx2_var"] = tmpw_var_avgx2 + out["tmpw"] = ( + result["tmpf"] / out["tmpf_mc_var"] + result["tmpb"] / out["tmpb_mc_var"] + ) * tmpw_var - if conf_ints: - # We first need to know the x-dim-chunk-size - new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis_avgx2 = mcparams["tmpw" + "_mc_avgx2_set"].get_axis_num("mc") - q2 = mcparams["tmpw" + "_mc_avgx2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx2), - chunks=new_chunks_weighted, - # Explicitly define output chunks - drop_axis=avg_axis_avgx2, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as firstax - out["tmpw" + "_mc_avgx2"] = (("CI", time_dim2), q2) + q = params["tmpw" + "_mc_set"] - result["tmpw"] + out["tmpw" + "_mc_var"] = q.var(dim="mc", ddof=1) + + # Calculate the CI of the weighted MC_set + if conf_ints: + new_chunks_weighted = ((len(conf_ints),),) + memchunk[1:] + avg_axis = params["tmpw" + "_mc_set"].get_axis_num("mc") + q2 = params["tmpw" + "_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks_weighted, # Explicitly define output chunks + drop_axis=avg_axis, # avg dimensions are dropped + new_axis=0, + dtype=float, + ) # The new CI dimension is added as first axis + out["tmpw" + "_mc"] = (("CI", "x", "time"), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: @@ -3110,76 +1900,54 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): "alpha_mc", "df_mc", "db_mc", - "x_avg", - "time_avg", - "mc", ] for i in ["tmpf", "tmpb", "tmpw"]: - remove_mc_set.append(i + "_avgsec") remove_mc_set.append(i + "_mc_set") - remove_mc_set.append(i + "_mc_avg2_set") - remove_mc_set.append(i + "_mc_avgx2_set") - remove_mc_set.append(i + "_mc_avgsec_var") - if "trans_att" in mcparams and mcparams.trans_att.size: + if nta: remove_mc_set.append('talpha"_fw_mc') remove_mc_set.append('talpha"_bw_mc') for k in remove_mc_set: if k in out: - print(f"Removed from results: {k}") del out[k] - self.update(out) - pass + if not mc_remove_set_flag: + out.update(params) + + return out - def conf_int_single_ended( + def average_monte_carlo_single_ended( self, - p_val="p_val", - p_cov="p_cov", - st_var=None, - ast_var=None, + result, + st_var, + ast_var, conf_ints=None, mc_sample_size=100, + ci_avg_time_flag1=False, + ci_avg_time_flag2=False, + ci_avg_time_sel=None, + ci_avg_time_isel=None, + ci_avg_x_flag1=False, + ci_avg_x_flag2=False, + ci_avg_x_sel=None, + ci_avg_x_isel=None, da_random_state=None, mc_remove_set_flag=True, reduce_memory_usage=False, - **kwargs, ): - r""" - Estimation of the confidence intervals for the temperatures measured - with a single-ended setup. It consists of five steps. First, the variances - of the Stokes and anti-Stokes intensity measurements are estimated - following the steps in Section 4 [1]_. A Normal - distribution is assigned to each intensity measurement that is centered - at the measurement and using the estimated variance. Second, a multi- - variate Normal distribution is assigned to the estimated parameters - using the covariance matrix from the calibration procedure presented in - Section 5 [1]_. Third, the distributions are sampled, and the - temperature is computed with Equation 12 [1]_. Fourth, step - three is repeated, e.g., 10,000 times for each location and for each - time. The resulting 10,000 realizations of the temperatures - approximate the probability density functions of the estimated - temperature at that location and time. Fifth, the standard uncertainties - are computed with the standard deviations of the realizations of the - temperatures, and the 95\% confidence intervals are computed from the - 2.5\% and 97.5\% percentiles of the realizations of the temperatures. + """ + Average temperatures from single-ended setups. + + Four types of averaging are implemented. Please see Example Notebook 16. Parameters ---------- - p_val : array-like, optional - Define `p_val`, `p_var`, `p_cov` if you used an external function - for calibration. Has size 2 + `nt`. First value is :math:`\gamma`, - second is :math:`\Delta \\alpha`, others are :math:`C` for each - timestep. - If set to False, no uncertainty in the parameters is propagated - into the confidence intervals. Similar to the spec sheets of the DTS - manufacturers. And similar to passing an array filled with zeros - p_cov : array-like, optional - The covariances of `p_val`. - st_var, ast_var : float, callable, array-like, optional + result : xr.Dataset + The result from the `calibrate_single_ended()` method. + st_var, ast_var : float, callable, array-like The variance of the measurement noise of the Stokes signals in the forward direction. If `float` the variance of the noise from the Stokes detector is described with a single value. @@ -3195,6 +1963,74 @@ def conf_int_single_ended( mc_sample_size : int Size of the monte carlo parameter set used to calculate the confidence interval + ci_avg_time_flag1 : bool + The confidence intervals differ each time step. Assumes the + temperature varies during the measurement period. Computes the + arithmic temporal mean. If you would like to know the confidence + interfal of: + (1) a single additional measurement. So you can state "if another + measurement were to be taken, it would have this ci" + (2) all measurements. So you can state "The temperature remained + during the entire measurement period between these ci bounds". + Adds "tmpw" + '_avg1' and "tmpw" + '_mc_avg1_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg1` are added to the DataStore. Works independently of the + ci_avg_time_flag2 and ci_avg_x_flag. + ci_avg_time_flag2 : bool + The confidence intervals differ each time step. Assumes the + temperature remains constant during the measurement period. + Computes the inverse-variance-weighted-temporal-mean temperature + and its uncertainty. + If you would like to know the confidence interfal of: + (1) I want to estimate a background temperature with confidence + intervals. I hereby assume the temperature does not change over + time and average all measurements to get a better estimate of the + background temperature. + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg2` are added to the DataStore. Works independently of the + ci_avg_time_flag1 and ci_avg_x_flag. + ci_avg_time_sel : slice + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_time_isel : iterable of int + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_x_flag1 : bool + The confidence intervals differ at each location. Assumes the + temperature varies over `x` and over time. Computes the + arithmic spatial mean. If you would like to know the confidence + interfal of: + (1) a single additional measurement location. So you can state "if + another measurement location were to be taken, + it would have this ci" + (2) all measurement locations. So you can state "The temperature + along the fiber remained between these ci bounds". + Adds "tmpw" + '_avgx1' and "tmpw" + '_mc_avgx1_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avgx1` are added to the DataStore. Works independently of the + ci_avg_time_flag1, ci_avg_time_flag2 and ci_avg_x2_flag. + ci_avg_x_flag2 : bool + The confidence intervals differ at each location. Assumes the + temperature is the same at each location but varies over time. + Computes the inverse-variance-weighted-spatial-mean temperature + and its uncertainty. + If you would like to know the confidence interfal of: + (1) I have put a lot of fiber in water, and I know that the + temperature variation in the water is much smaller than along + other parts of the fiber. And I would like to average the + measurements from multiple locations to improve the estimated + temperature. + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg2` are added to the DataStore. Works independently of the + ci_avg_time_flag1 and ci_avg_x_flag. + ci_avg_x_sel : slice + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_x_isel : iterable of int + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data da_random_state For testing purposes. Similar to random seed. The seed for dask. Makes random not so random. To produce reproducable results for @@ -3205,291 +2041,255 @@ def conf_int_single_ended( reduce_memory_usage : bool Use less memory but at the expense of longer computation time + Returns + ------- - References - ---------- - .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation - of Temperature and Associated Uncertainty from Fiber-Optic Raman- - Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. - https://doi.org/10.3390/s20082235 """ - check_deprecated_kwargs(kwargs) - - out = xr.Dataset() - params = xr.Dataset() - - if da_random_state: - state = da_random_state - else: - state = da.random.RandomState() - - no, nt = self.st.data.shape - if "trans_att" in self.keys(): - nta = self.trans_att.size + # out contains the state + out = xr.Dataset( + coords={"x": self.x, "time": self.time, "trans_att": result["trans_att"]} + ).copy() + out.coords["x"].attrs = dim_attrs["x"] + out.coords["trans_att"].attrs = dim_attrs["trans_att"] + out.coords["CI"] = conf_ints + + mcparams = self.monte_carlo_single_ended( + result=result, + st_var=st_var, + ast_var=ast_var, + conf_ints=None, + mc_sample_size=mc_sample_size, + da_random_state=da_random_state, + mc_remove_set_flag=False, + reduce_memory_usage=reduce_memory_usage, + ) + mcparams["tmpf"] = result["tmpf"] - else: - nta = 0 + if ci_avg_time_sel is not None: + time_dim2 = "time" + "_avg" + x_dim2 = "x" + mcparams.coords[time_dim2] = ( + (time_dim2,), + mcparams["time"].sel(**{"time": ci_avg_time_sel}).data, + ) + mcparams["tmpf_avgsec"] = ( + ("x", time_dim2), + mcparams["tmpf"].sel(**{"time": ci_avg_time_sel}).data, + ) + mcparams["tmpf_mc_set"] = ( + ("mc", "x", time_dim2), + mcparams["tmpf" + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, + ) - assert isinstance(p_val, (str, np.ndarray, np.generic)) - if isinstance(p_val, str): - p_val = self[p_val].data + elif ci_avg_time_isel is not None: + time_dim2 = "time" + "_avg" + x_dim2 = "x" + mcparams.coords[time_dim2] = ( + (time_dim2,), + mcparams["time"].isel(**{"time": ci_avg_time_isel}).data, + ) + mcparams["tmpf_avgsec"] = ( + ("x", time_dim2), + mcparams["tmpf"].isel(**{"time": ci_avg_time_isel}).data, + ) + mcparams["tmpf_mc_set"] = ( + ("mc", "x", time_dim2), + mcparams["tmpf" + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, + ) - npar = p_val.size + elif ci_avg_x_sel is not None: + time_dim2 = "time" + x_dim2 = "x_avg" + mcparams.coords[x_dim2] = ((x_dim2,), mcparams.x.sel(x=ci_avg_x_sel).data) + mcparams["tmpf_avgsec"] = ( + (x_dim2, "time"), + mcparams["tmpf"].sel(x=ci_avg_x_sel).data, + ) + mcparams["tmpf_mc_set"] = ( + ("mc", x_dim2, "time"), + mcparams["tmpf_mc_set"].sel(x=ci_avg_x_sel).data, + ) - # number of parameters - if npar == nt + 2 + nt * nta: - fixed_alpha = False - elif npar == 1 + no + nt + nt * nta: - fixed_alpha = True + elif ci_avg_x_isel is not None: + time_dim2 = "time" + x_dim2 = "x_avg" + mcparams.coords[x_dim2] = ((x_dim2,), mcparams.x.isel(x=ci_avg_x_isel).data) + mcparams["tmpf_avgsec"] = ( + (x_dim2, time_dim2), + mcparams["tmpf"].isel(x=ci_avg_x_isel).data, + ) + mcparams["tmpf_mc_set"] = ( + ("mc", x_dim2, time_dim2), + mcparams["tmpf_mc_set"].isel(x=ci_avg_x_isel).data, + ) else: - raise Exception("The size of `p_val` is not what I expected") + mcparams["tmpf_avgsec"] = mcparams["tmpf"] + x_dim2 = "x" + time_dim2 = "time" - params.coords["mc"] = range(mc_sample_size) - params.coords["x"] = self.x - params.coords["time"] = self.time + # subtract the mean temperature + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] + out["tmpf_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) - if conf_ints: - out.coords["CI"] = conf_ints - params.coords["CI"] = conf_ints + if ci_avg_x_flag1: + # unweighted mean + out["tmpf_avgx1"] = mcparams["tmpf" + "_avgsec"].mean(dim=x_dim2) - # WLS - if isinstance(p_cov, str): - p_cov = self[p_cov].data - assert p_cov.shape == (npar, npar) + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] + qvar = q.var(dim=["mc", x_dim2], ddof=1) + out["tmpf_mc_avgx1_var"] = qvar - p_mc = sst.multivariate_normal.rvs(mean=p_val, cov=p_cov, size=mc_sample_size) + if conf_ints: + new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[2]) + avg_axis = mcparams["tmpf_mc_set"].get_axis_num(["mc", x_dim2]) + q = mcparams["tmpf_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, + # avg dimensions are dropped from input arr + new_axis=0, + ) # The new CI dim is added as firsaxis - if fixed_alpha: - params["alpha_mc"] = (("mc", "x"), p_mc[:, 1 : no + 1]) - params["c_mc"] = (("mc", "time"), p_mc[:, 1 + no : 1 + no + nt]) - else: - params["dalpha_mc"] = (("mc",), p_mc[:, 1]) - params["c_mc"] = (("mc", "time"), p_mc[:, 2 : nt + 2]) + out["tmpf_mc_avgx1"] = (("CI", time_dim2), q) - params["gamma_mc"] = (("mc",), p_mc[:, 0]) - if nta: - params["ta_mc"] = ( - ("mc", "trans_att", "time"), - np.reshape(p_mc[:, -nt * nta :], (mc_sample_size, nta, nt)), - ) + if ci_avg_x_flag2: + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] - rsize = (params.mc.size, params.x.size, params.time.size) + qvar = q.var(dim=["mc"], ddof=1) - if reduce_memory_usage: - memchunk = da.ones( - (mc_sample_size, no, nt), chunks={0: -1, 1: 1, 2: "auto"} - ).chunks - else: - memchunk = da.ones( - (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} - ).chunks + # Inverse-variance weighting + avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - # Draw from the normal distributions for the Stokes intensities - for k, st_labeli, st_vari in zip( - ["r_st", "r_ast"], ["st", "ast"], [st_var, ast_var] - ): - # Load the mean as chunked Dask array, otherwise eats memory - if type(self[st_labeli].data) == da.core.Array: - loc = da.asarray(self[st_labeli].data, chunks=memchunk[1:]) - else: - loc = da.from_array(self[st_labeli].data, chunks=memchunk[1:]) + out["tmpf_mc_avgx2_var"] = avg_x_var - # Make sure variance is of size (no, nt) - if np.size(st_vari) > 1: - if st_vari.shape == self[st_labeli].shape: - pass - else: - st_vari = np.broadcast_to(st_vari, (no, nt)) - else: - pass + mcparams["tmpf" + "_mc_avgx2_set"] = (mcparams["tmpf_mc_set"] / qvar).sum( + dim=x_dim2 + ) * avg_x_var + out["tmpf" + "_avgx2"] = mcparams["tmpf" + "_mc_avgx2_set"].mean(dim="mc") - # Load variance as chunked Dask array, otherwise eats memory - if type(st_vari) == da.core.Array: - st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) + if conf_ints: + new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[2]) + avg_axis_avgx = mcparams["tmpf_mc_set"].get_axis_num("mc") - elif callable(st_vari) and type(self[st_labeli].data) == da.core.Array: - st_vari_da = da.asarray( - st_vari(self[st_labeli]).data, chunks=memchunk[1:] - ) + qq = mcparams["tmpf_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), + chunks=new_chunks, # + drop_axis=avg_axis_avgx, + # avg dimensions are dropped from input arr + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # firsaxis + out["tmpf_mc_avgx2"] = (("CI", time_dim2), qq) - elif callable(st_vari) and type(self[st_labeli].data) != da.core.Array: - st_vari_da = da.from_array( - st_vari(self[st_labeli]).data, chunks=memchunk[1:] - ) + if ci_avg_time_flag1 is not None: + # unweighted mean + out["tmpf_avg1"] = mcparams["tmpf_avgsec"].mean(dim=time_dim2) - else: - st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] + qvar = q.var(dim=["mc", time_dim2], ddof=1) + out["tmpf_mc_avg1_var"] = qvar - params[k] = ( - ("mc", "x", "time"), - state.normal( - loc=loc, # has chunks=memchunk[1:] - scale=st_vari_da**0.5, - size=rsize, - chunks=memchunk, - ), - ) + if conf_ints: + new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[1]) + avg_axis = mcparams["tmpf_mc_set"].get_axis_num(["mc", time_dim2]) + q = mcparams["tmpf_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, + # avg dimensions are dropped from input arr + new_axis=0, + ) # The new CI dim is added as firsaxis - ta_arr = np.zeros((mc_sample_size, no, nt)) + out["tmpf_mc_avg1"] = (("CI", x_dim2), q) - if nta: - for ii, ta in enumerate(params["ta_mc"]): - for tai, taxi in zip(ta.values, self.trans_att.values): - ta_arr[ii, self.x.values >= taxi] = ( - ta_arr[ii, self.x.values >= taxi] + tai - ) - params["ta_mc_arr"] = (("mc", "x", "time"), ta_arr) + if ci_avg_time_flag2: + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] - if fixed_alpha: - params["tmpf_mc_set"] = ( - params["gamma_mc"] - / ( - ( - np.log(params["r_st"]) - - np.log(params["r_ast"]) - + (params["c_mc"] + params["ta_mc_arr"]) - ) - + params["alpha_mc"] - ) - - 273.15 - ) - else: - params["tmpf_mc_set"] = ( - params["gamma_mc"] - / ( - ( - np.log(params["r_st"]) - - np.log(params["r_ast"]) - + (params["c_mc"] + params["ta_mc_arr"]) - ) - + (params["dalpha_mc"] * params.x) - ) - - 273.15 - ) + qvar = q.var(dim=["mc"], ddof=1) - avg_dims = ["mc"] - avg_axis = params["tmpf_mc_set"].get_axis_num(avg_dims) - out["tmpf_mc_var"] = (params["tmpf_mc_set"] - self["tmpf"]).var( - dim=avg_dims, ddof=1 - ) + # Inverse-variance weighting + avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - if conf_ints: - new_chunks = ((len(conf_ints),),) + params["tmpf_mc_set"].chunks[1:] + out["tmpf_mc_avg2_var"] = avg_time_var - qq = params["tmpf_mc_set"] + mcparams["tmpf" + "_mc_avg2_set"] = (mcparams["tmpf_mc_set"] / qvar).sum( + dim=time_dim2 + ) * avg_time_var + out["tmpf_avg2"] = mcparams["tmpf" + "_mc_avg2_set"].mean(dim="mc") - q = qq.data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, # avg dimesnions are dropped from input arr - new_axis=0, - ) # The new CI dimension is added as first axis + if conf_ints: + new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[1]) + avg_axis_avg2 = mcparams["tmpf_mc_set"].get_axis_num("mc") - out["tmpf_mc"] = (("CI", "x", "time"), q) + qq = mcparams["tmpf_mc_avg2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), + chunks=new_chunks, # + drop_axis=avg_axis_avg2, + # avg dimensions are dropped from input arr + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # firsaxis + out["tmpf_mc_avg2"] = (("CI", x_dim2), qq) - if not mc_remove_set_flag: - out.update(params) + # Clean up the garbage. All arrays with a Monte Carlo dimension. + if mc_remove_set_flag: + remove_mc_set = [ + "r_st", + "r_ast", + "gamma_mc", + "dalpha_mc", + "c_mc", + "x_avg", + "time_avg", + "mc", + "ta_mc_arr", + ] + remove_mc_set.append("tmpf_avgsec") + remove_mc_set.append("tmpf_mc_set") + remove_mc_set.append("tmpf_mc_avg2_set") + remove_mc_set.append("tmpf_mc_avgx2_set") + remove_mc_set.append("tmpf_mc_avgsec_var") + + for k in remove_mc_set: + if k in out: + del out[k] - self.update(out) return out - def conf_int_double_ended( + def average_monte_carlo_double_ended( self, - sections=None, - p_val="p_val", - p_cov="p_cov", - st_var=None, - ast_var=None, - rst_var=None, - rast_var=None, + result, + st_var, + ast_var, + rst_var, + rast_var, conf_ints=None, mc_sample_size=100, - var_only_sections=False, + ci_avg_time_flag1=False, + ci_avg_time_flag2=False, + ci_avg_time_sel=None, + ci_avg_time_isel=None, + ci_avg_x_flag1=False, + ci_avg_x_flag2=False, + ci_avg_x_sel=None, + ci_avg_x_isel=None, da_random_state=None, mc_remove_set_flag=True, reduce_memory_usage=False, **kwargs, ): - r""" - Estimation of the confidence intervals for the temperatures measured - with a double-ended setup. - Double-ended setups require four additional steps to estimate the - confidence intervals for the temperature. First, the variances of the - Stokes and anti-Stokes intensity measurements of the forward and - backward channels are estimated following the steps in - Section 4 [1]_. See `ds.variance_stokes_constant()`. - A Normal distribution is assigned to each - intensity measurement that is centered at the measurement and using the - estimated variance. Second, a multi-variate Normal distribution is - assigned to the estimated parameters using the covariance matrix from - the calibration procedure presented in Section 6 [1]_ (`p_cov`). Third, - Normal distributions are assigned for :math:`A` (`ds.alpha`) - for each location - outside of the reference sections. These distributions are centered - around :math:`A_p` and have variance :math:`\sigma^2\left[A_p\\right]` - given by Equations 44 and 45. Fourth, the distributions are sampled - and :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` are - computed with Equations 16 and 17, respectively. Fifth, step four is repeated to - compute, e.g., 10,000 realizations (`mc_sample_size`) of :math:`T_{\mathrm{F},m,n}` and - :math:`T_{\mathrm{B},m,n}` to approximate their probability density - functions. Sixth, the standard uncertainties of - :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` - (:math:`\sigma\left[T_{\mathrm{F},m,n}\\right]` and - :math:`\sigma\left[T_{\mathrm{B},m,n}\\right]`) are estimated with the - standard deviation of their realizations. Seventh, for each realization - :math:`i` the temperature :math:`T_{m,n,i}` is computed as the weighted - average of :math:`T_{\mathrm{F},m,n,i}` and - :math:`T_{\mathrm{B},m,n,i}`: - - .. math:: - - T_{m,n,i} =\ - \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n,i}}{\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right]} +\ - \\frac{T_{\mathrm{B},m,n,i}}{\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) - - where - - .. math:: - - \sigma^2\left[T_{m,n}\\right] = \\frac{1}{1 /\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right] + 1 /\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]} - - The best estimate of the temperature :math:`T_{m,n}` is computed - directly from the best estimates of :math:`T_{\mathrm{F},m,n}` and - :math:`T_{\mathrm{B},m,n}` as: - - .. math:: - T_{m,n} =\ - \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n}}{\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right]} + \\frac{T_{\mathrm{B},m,n}}{\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) - - Alternatively, the best estimate of :math:`T_{m,n}` can be approximated - with the mean of the :math:`T_{m,n,i}` values. Finally, the 95\% - confidence interval for :math:`T_{m,n}` are estimated with the 2.5\% and - 97.5\% percentiles of :math:`T_{m,n,i}`. + """ + Average temperatures from double-ended setups. - Assumes sections are set. + Four types of averaging are implemented. Please see Example Notebook 16. Parameters ---------- - p_val : array-like, optional - Define `p_val`, `p_var`, `p_cov` if you used an external function - for calibration. Has size `1 + 2 * nt + nx + 2 * nt * nta`. - First value is :math:`\gamma`, then `nt` times - :math:`D_\mathrm{F}`, then `nt` times - :math:`D_\mathrm{B}`, then for each location :math:`D_\mathrm{B}`, - then for each connector that introduces directional attenuation two - parameters per time step. - p_cov : array-like, optional - The covariances of `p_val`. Square matrix. - If set to False, no uncertainty in the parameters is propagated - into the confidence intervals. Similar to the spec sheets of the DTS - manufacturers. And similar to passing an array filled with zeros. - st_var, ast_var, rst_var, rast_var : float, callable, array-like, optional + result : xr.Dataset + The result from the `calibrate_double_ended()` method. + st_var, ast_var, rst_var, rast_var : float, callable, array-like The variance of the measurement noise of the Stokes signals in the forward direction. If `float` the variance of the noise from the Stokes detector is described with a single value. @@ -3500,16 +2300,79 @@ def conf_int_double_ended( x. Required if method is wls. conf_ints : iterable object of float A list with the confidence boundaries that are calculated. Valid - values are between [0, 1]. - mc_sample_size : int - Size of the monte carlo parameter set used to calculate the - confidence interval - var_only_sections : bool - useful if using the ci_avg_x_flag. Only calculates the var over the - sections, so that the values can be compared with accuracy along the - reference sections. Where the accuracy is the variance of the - residuals between the estimated temperature and temperature of the - water baths. + values are between + [0, 1]. + mc_sample_size : int + Size of the monte carlo parameter set used to calculate the + confidence interval + ci_avg_time_flag1 : bool + The confidence intervals differ each time step. Assumes the + temperature varies during the measurement period. Computes the + arithmic temporal mean. If you would like to know the confidence + interfal of: + (1) a single additional measurement. So you can state "if another + measurement were to be taken, it would have this ci" + (2) all measurements. So you can state "The temperature remained + during the entire measurement period between these ci bounds". + Adds "tmpw" + '_avg1' and "tmpw" + '_mc_avg1_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg1` are added to the DataStore. Works independently of the + ci_avg_time_flag2 and ci_avg_x_flag. + ci_avg_time_flag2 : bool + The confidence intervals differ each time step. Assumes the + temperature remains constant during the measurement period. + Computes the inverse-variance-weighted-temporal-mean temperature + and its uncertainty. + If you would like to know the confidence interfal of: + (1) I want to estimate a background temperature with confidence + intervals. I hereby assume the temperature does not change over + time and average all measurements to get a better estimate of the + background temperature. + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg2` are added to the DataStore. Works independently of the + ci_avg_time_flag1 and ci_avg_x_flag. + ci_avg_time_sel : slice + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_time_isel : iterable of int + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_x_flag1 : bool + The confidence intervals differ at each location. Assumes the + temperature varies over `x` and over time. Computes the + arithmic spatial mean. If you would like to know the confidence + interfal of: + (1) a single additional measurement location. So you can state "if + another measurement location were to be taken, + it would have this ci" + (2) all measurement locations. So you can state "The temperature + along the fiber remained between these ci bounds". + Adds "tmpw" + '_avgx1' and "tmpw" + '_mc_avgx1_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avgx1` are added to the DataStore. Works independently of the + ci_avg_time_flag1, ci_avg_time_flag2 and ci_avg_x2_flag. + ci_avg_x_flag2 : bool + The confidence intervals differ at each location. Assumes the + temperature is the same at each location but varies over time. + Computes the inverse-variance-weighted-spatial-mean temperature + and its uncertainty. + If you would like to know the confidence interfal of: + (1) I have put a lot of fiber in water, and I know that the + temperature variation in the water is much smaller than along + other parts of the fiber. And I would like to average the + measurements from multiple locations to improve the estimated + temperature. + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg2` are added to the DataStore. Works independently of the + ci_avg_time_flag1 and ci_avg_x_flag. + ci_avg_x_sel : slice + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_x_isel : iterable of int + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data da_random_state For testing purposes. Similar to random seed. The seed for dask. Makes random not so random. To produce reproducable results for @@ -3523,656 +2386,409 @@ def conf_int_double_ended( Returns ------- - References - ---------- - .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation - of Temperature and Associated Uncertainty from Fiber-Optic Raman- - Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. - https://doi.org/10.3390/s20082235 - """ - def create_da_ta2(no, i_splice, direction="fw", chunks=None): - """create mask array mc, o, nt""" - - if direction == "fw": - arr = da.concatenate( - ( - da.zeros((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), - da.ones( - (1, no - i_splice, 1), - chunks=(1, no - i_splice, 1), - dtype=bool, - ), - ), - axis=1, - ).rechunk((1, chunks[1], 1)) - else: - arr = da.concatenate( - ( - da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), - da.zeros( - (1, no - i_splice, 1), - chunks=(1, no - i_splice, 1), - dtype=bool, - ), - ), - axis=1, - ).rechunk((1, chunks[1], 1)) - return arr - - check_deprecated_kwargs(kwargs) - - out = xr.Dataset() - params = xr.Dataset() + # def create_da_ta2(no, i_splice, direction="fw", chunks=None): + # """create mask array mc, o, nt""" + + # if direction == "fw": + # arr = da.concatenate( + # ( + # da.zeros((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), + # da.ones( + # (1, no - i_splice, 1), + # chunks=(1, no - i_splice, 1), + # dtype=bool, + # ), + # ), + # axis=1, + # ).rechunk((1, chunks[1], 1)) + # else: + # arr = da.concatenate( + # ( + # da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), + # da.zeros( + # (1, no - i_splice, 1), + # chunks=(1, no - i_splice, 1), + # dtype=bool, + # ), + # ), + # axis=1, + # ).rechunk((1, chunks[1], 1)) + # return arr - if da_random_state: - # In testing environments - assert isinstance(da_random_state, da.random.RandomState) - state = da_random_state - else: - state = da.random.RandomState() + out = xr.Dataset( + coords={"x": self.x, "time": self.time, "trans_att": result["trans_att"]} + ).copy() + out.coords["x"].attrs = dim_attrs["x"] + out.coords["trans_att"].attrs = dim_attrs["trans_att"] + out.coords["CI"] = conf_ints - if conf_ints: - assert "tmpw", ( - "Current implementation requires you to " - 'define "tmpw" when estimating confidence ' - "intervals" + if (ci_avg_x_flag1 or ci_avg_x_flag2) and ( + ci_avg_time_flag1 or ci_avg_time_flag2 + ): + raise NotImplementedError( + "Incompatible flags. Can not pick " "the right chunks" ) - no, nt = self.st.shape - nta = self.trans_att.size - npar = 1 + 2 * nt + no + nt * 2 * nta # number of parameters - - rsize = (mc_sample_size, no, nt) + elif not ( + ci_avg_x_flag1 or ci_avg_x_flag2 or ci_avg_time_flag1 or ci_avg_time_flag2 + ): + raise NotImplementedError("Pick one of the averaging options") - if reduce_memory_usage: - memchunk = da.ones( - (mc_sample_size, no, nt), chunks={0: -1, 1: 1, 2: "auto"} - ).chunks else: - memchunk = da.ones( - (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} - ).chunks - - params.coords["mc"] = range(mc_sample_size) - params.coords["x"] = self.x - params.coords["time"] = self.time + pass - if conf_ints: - self.coords["CI"] = conf_ints - params.coords["CI"] = conf_ints - - assert isinstance(p_val, (str, np.ndarray, np.generic)) - if isinstance(p_val, str): - p_val = self[p_val].values - assert p_val.shape == (npar,), ( - "Did you set 'talpha' as " - "keyword argument of the " - "conf_int_double_ended() function?" + mcparams = self.monte_carlo_double_ended( + result=result, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, + conf_ints=None, + mc_sample_size=mc_sample_size, + da_random_state=da_random_state, + mc_remove_set_flag=False, + reduce_memory_usage=reduce_memory_usage, + **kwargs, ) - assert isinstance(p_cov, (str, np.ndarray, np.generic, bool)) - - if isinstance(p_cov, bool) and not p_cov: - # Exclude parameter uncertainty if p_cov == False - gamma = p_val[0] - d_fw = p_val[1 : nt + 1] - d_bw = p_val[1 + nt : 2 * nt + 1] - alpha = p_val[2 * nt + 1 : 2 * nt + 1 + no] - - params["gamma_mc"] = (tuple(), gamma) - params["alpha_mc"] = (("x",), alpha) - params["df_mc"] = (("time",), d_fw) - params["db_mc"] = (("time",), d_bw) - - if nta: - ta = p_val[2 * nt + 1 + no :].reshape((nt, 2, nta), order="F") - ta_fw = ta[:, 0, :] - ta_bw = ta[:, 1, :] - - ta_fw_arr = np.zeros((no, nt)) - for tai, taxi in zip(ta_fw.T, params.coords["trans_att"].values): - ta_fw_arr[params.x.values >= taxi] = ( - ta_fw_arr[params.x.values >= taxi] + tai - ) - - ta_bw_arr = np.zeros((no, nt)) - for tai, taxi in zip(ta_bw.T, params.coords["trans_att"].values): - ta_bw_arr[params.x.values < taxi] = ( - ta_bw_arr[params.x.values < taxi] + tai - ) - - params["talpha_fw_mc"] = (("x", "time"), ta_fw_arr) - params["talpha_bw_mc"] = (("x", "time"), ta_bw_arr) - - elif isinstance(p_cov, bool) and p_cov: - raise NotImplementedError("Not an implemented option. Check p_cov argument") - - else: - # WLS - if isinstance(p_cov, str): - p_cov = self[p_cov].values - assert p_cov.shape == (npar, npar) - - assert sections is not None, "Define sections" - ix_sec = self.ufunc_per_section( - sections=sections, x_indices=True, calc_per="all" - ) - nx_sec = ix_sec.size - from_i = np.concatenate( - ( - np.arange(1 + 2 * nt), - 1 + 2 * nt + ix_sec, - np.arange(1 + 2 * nt + no, 1 + 2 * nt + no + nt * 2 * nta), + for label in ["tmpf", "tmpb"]: + if ci_avg_time_sel is not None: + time_dim2 = "time" + "_avg" + x_dim2 = "x" + mcparams.coords[time_dim2] = ( + (time_dim2,), + mcparams["time"].sel(**{"time": ci_avg_time_sel}).data, ) - ) - iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") - po_val = p_val[from_i] - po_cov = p_cov[iox_sec1, iox_sec2] - - po_mc = sst.multivariate_normal.rvs( - mean=po_val, cov=po_cov, size=mc_sample_size - ) - - gamma = po_mc[:, 0] - d_fw = po_mc[:, 1 : nt + 1] - d_bw = po_mc[:, 1 + nt : 2 * nt + 1] - - params["gamma_mc"] = (("mc",), gamma) - params["df_mc"] = (("mc", "time"), d_fw) - params["db_mc"] = (("mc", "time"), d_bw) - - # calculate alpha seperately - alpha = np.zeros((mc_sample_size, no), dtype=float) - alpha[:, ix_sec] = po_mc[:, 1 + 2 * nt : 1 + 2 * nt + nx_sec] - - not_ix_sec = np.array([i for i in range(no) if i not in ix_sec]) - - if np.any(not_ix_sec): - not_alpha_val = p_val[2 * nt + 1 + not_ix_sec] - not_alpha_var = p_cov[2 * nt + 1 + not_ix_sec, 2 * nt + 1 + not_ix_sec] - - not_alpha_mc = np.random.normal( - loc=not_alpha_val, - scale=not_alpha_var**0.5, - size=(mc_sample_size, not_alpha_val.size), + mcparams[label + "_avgsec"] = ( + ("x", time_dim2), + result[label].sel(**{"time": ci_avg_time_sel}).data, + ) + mcparams[label + "_mc_set"] = ( + ("mc", "x", time_dim2), + mcparams[label + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, ) - alpha[:, not_ix_sec] = not_alpha_mc - - params["alpha_mc"] = (("mc", "x"), alpha) - - if nta: - ta = po_mc[:, 2 * nt + 1 + nx_sec :].reshape( - (mc_sample_size, nt, 2, nta), order="F" + elif ci_avg_time_isel is not None: + time_dim2 = "time" + "_avg" + x_dim2 = "x" + mcparams.coords[time_dim2] = ( + (time_dim2,), + mcparams["time"].isel(**{"time": ci_avg_time_isel}).data, + ) + mcparams[label + "_avgsec"] = ( + ("x", time_dim2), + result[label].isel(**{"time": ci_avg_time_isel}).data, + ) + mcparams[label + "_mc_set"] = ( + ("mc", "x", time_dim2), + mcparams[label + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, ) - ta_fw = ta[:, :, 0, :] - ta_bw = ta[:, :, 1, :] - ta_fw_arr = da.zeros( - (mc_sample_size, no, nt), chunks=memchunk, dtype=float + elif ci_avg_x_sel is not None: + time_dim2 = "time" + x_dim2 = "x_avg" + mcparams.coords[x_dim2] = ( + (x_dim2,), + mcparams.x.sel(x=ci_avg_x_sel).data, ) - for tai, taxi in zip( - ta_fw.swapaxes(0, 2), params.coords["trans_att"].values - ): - # iterate over the splices - i_splice = sum(params.x.values < taxi) - mask = create_da_ta2(no, i_splice, direction="fw", chunks=memchunk) - - ta_fw_arr += mask * tai.T[:, None, :] - - ta_bw_arr = da.zeros( - (mc_sample_size, no, nt), chunks=memchunk, dtype=float + mcparams[label + "_avgsec"] = ( + (x_dim2, "time"), + result[label].sel(x=ci_avg_x_sel).data, ) - for tai, taxi in zip( - ta_bw.swapaxes(0, 2), params.coords["trans_att"].values - ): - i_splice = sum(params.x.values < taxi) - mask = create_da_ta2(no, i_splice, direction="bw", chunks=memchunk) - - ta_bw_arr += mask * tai.T[:, None, :] - - params["talpha_fw_mc"] = (("mc", "x", "time"), ta_fw_arr) - params["talpha_bw_mc"] = (("mc", "x", "time"), ta_bw_arr) - - # Draw from the normal distributions for the Stokes intensities - for k, st_labeli, st_vari in zip( - ["r_st", "r_ast", "r_rst", "r_rast"], - ["st", "ast", "rst", "rast"], - [st_var, ast_var, rst_var, rast_var], - ): - # Load the mean as chunked Dask array, otherwise eats memory - if type(self[st_labeli].data) == da.core.Array: - loc = da.asarray(self[st_labeli].data, chunks=memchunk[1:]) - else: - loc = da.from_array(self[st_labeli].data, chunks=memchunk[1:]) - - # Make sure variance is of size (no, nt) - if np.size(st_vari) > 1: - if st_vari.shape == self[st_labeli].shape: - pass - else: - st_vari = np.broadcast_to(st_vari, (no, nt)) - else: - pass - - # Load variance as chunked Dask array, otherwise eats memory - if type(st_vari) == da.core.Array: - st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) - - elif callable(st_vari) and type(self[st_labeli].data) == da.core.Array: - st_vari_da = da.asarray( - st_vari(self[st_labeli]).data, chunks=memchunk[1:] + mcparams[label + "_mc_set"] = ( + ("mc", x_dim2, "time"), + mcparams[label + "_mc_set"].sel(x=ci_avg_x_sel).data, ) - elif callable(st_vari) and type(self[st_labeli].data) != da.core.Array: - st_vari_da = da.from_array( - st_vari(self[st_labeli]).data, chunks=memchunk[1:] + elif ci_avg_x_isel is not None: + time_dim2 = "time" + x_dim2 = "x_avg" + mcparams.coords[x_dim2] = ( + (x_dim2,), + mcparams.x.isel(x=ci_avg_x_isel).data, + ) + mcparams[label + "_avgsec"] = ( + (x_dim2, time_dim2), + result[label].isel(x=ci_avg_x_isel).data, + ) + mcparams[label + "_mc_set"] = ( + ("mc", x_dim2, time_dim2), + mcparams[label + "_mc_set"].isel(x=ci_avg_x_isel).data, ) - else: - st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) + mcparams[label + "_avgsec"] = result[label] + x_dim2 = "x" + time_dim2 = "time" - params[k] = ( - ("mc", "x", "time"), - state.normal( - loc=loc, # has chunks=memchunk[1:] - scale=st_vari_da**0.5, - size=rsize, - chunks=memchunk, - ), - ) + memchunk = mcparams[label + "_mc_set"].chunks - for label in ["tmpf", "tmpb"]: - if "tmpw" or label: - if label == "tmpf": - if nta: - params["tmpf_mc_set"] = ( - params["gamma_mc"] - / ( - np.log(params["r_st"] / params["r_ast"]) - + params["df_mc"] - + params["alpha_mc"] - + params["talpha_fw_mc"] - ) - - 273.15 - ) - else: - params["tmpf_mc_set"] = ( - params["gamma_mc"] - / ( - np.log(params["r_st"] / params["r_ast"]) - + params["df_mc"] - + params["alpha_mc"] - ) - - 273.15 - ) - else: - if nta: - params["tmpb_mc_set"] = ( - params["gamma_mc"] - / ( - np.log(params["r_rst"] / params["r_rast"]) - + params["db_mc"] - - params["alpha_mc"] - + params["talpha_bw_mc"] - ) - - 273.15 - ) - else: - params["tmpb_mc_set"] = ( - params["gamma_mc"] - / ( - np.log(params["r_rst"] / params["r_rast"]) - + params["db_mc"] - - params["alpha_mc"] - ) - - 273.15 - ) + # subtract the mean temperature + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] + out[label + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) - if var_only_sections: - # sets the values outside the reference sections to NaN - xi = self.ufunc_per_section( - sections=sections, x_indices=True, calc_per="all" - ) - x_mask_ = [ - True if ix in xi else False for ix in range(params.x.size) - ] - x_mask = np.reshape(x_mask_, (1, -1, 1)) - params[label + "_mc_set"] = params[label + "_mc_set"].where(x_mask) + if ci_avg_x_flag1: + # unweighted mean + out[label + "_avgx1"] = mcparams[label + "_avgsec"].mean(dim=x_dim2) - # subtract the mean temperature - q = params[label + "_mc_set"] - self[label] - out[label + "_mc_var"] = q.var(dim="mc", ddof=1) + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] + qvar = q.var(dim=["mc", x_dim2], ddof=1) + out[label + "_mc_avgx1_var"] = qvar if conf_ints: - new_chunks = list(params[label + "_mc_set"].chunks) - new_chunks[0] = (len(conf_ints),) - avg_axis = params[label + "_mc_set"].get_axis_num("mc") - q = params[label + "_mc_set"].data.map_blocks( + new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[2]) + avg_axis = mcparams[label + "_mc_set"].get_axis_num(["mc", x_dim2]) + q = mcparams[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr new_axis=0, - ) # The new CI dimension is added as firsaxis - - out[label + "_mc"] = (("CI", "x", "time"), q) - - # Weighted mean of the forward and backward - tmpw_var = 1 / (1 / out["tmpf_mc_var"] + 1 / out["tmpb_mc_var"]) - - q = ( - params["tmpf_mc_set"] / out["tmpf_mc_var"] - + params["tmpb_mc_set"] / out["tmpb_mc_var"] - ) * tmpw_var - - params["tmpw" + "_mc_set"] = q # - - out["tmpw"] = ( - self["tmpf"] / out["tmpf_mc_var"] + self["tmpb"] / out["tmpb_mc_var"] - ) * tmpw_var - - q = params["tmpw" + "_mc_set"] - self["tmpw"] - out["tmpw" + "_mc_var"] = q.var(dim="mc", ddof=1) - - # Calculate the CI of the weighted MC_set - if conf_ints: - new_chunks_weighted = ((len(conf_ints),),) + memchunk[1:] - avg_axis = params["tmpw" + "_mc_set"].get_axis_num("mc") - q2 = params["tmpw" + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks_weighted, # Explicitly define output chunks - drop_axis=avg_axis, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as first axis - out["tmpw" + "_mc"] = (("CI", "x", "time"), q2) - - # Clean up the garbage. All arrays with a Monte Carlo dimension. - if mc_remove_set_flag: - remove_mc_set = [ - "r_st", - "r_ast", - "r_rst", - "r_rast", - "gamma_mc", - "alpha_mc", - "df_mc", - "db_mc", - ] - - for i in ["tmpf", "tmpb", "tmpw"]: - remove_mc_set.append(i + "_mc_set") + ) # The new CI dim is added as firsaxis - if nta: - remove_mc_set.append('talpha"_fw_mc') - remove_mc_set.append('talpha"_bw_mc') + out[label + "_mc_avgx1"] = (("CI", time_dim2), q) - for k in remove_mc_set: - if k in out: - del out[k] + if ci_avg_x_flag2: + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] - if not mc_remove_set_flag: - out.update(params) + qvar = q.var(dim=["mc"], ddof=1) - self.update(out) - return out + # Inverse-variance weighting + avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - def in_confidence_interval(self, ci_label, conf_ints=None, sections=None): - """ - Returns an array with bools wether the temperature of the reference - sections are within the confidence intervals + out[label + "_mc_avgx2_var"] = avg_x_var - Parameters - ---------- - sections : Dict[str, List[slice]] - ci_label : str - The label of the data containing the confidence intervals. - conf_ints : Tuple - A tuple containing two floats between 0 and 1, representing the - levels between which the reference temperature should lay. + mcparams[label + "_mc_avgx2_set"] = ( + mcparams[label + "_mc_set"] / qvar + ).sum(dim=x_dim2) * avg_x_var + out[label + "_avgx2"] = mcparams[label + "_mc_avgx2_set"].mean(dim="mc") - Returns - ------- + if conf_ints: + new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[2]) + avg_axis_avgx = mcparams[label + "_mc_set"].get_axis_num("mc") - """ - if sections is None: - sections = self.sections - else: - sections = validate_sections(self, sections) + qq = mcparams[label + "_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), + chunks=new_chunks, # + drop_axis=avg_axis_avgx, + # avg dimensions are dropped from input arr + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # firsaxis + out[label + "_mc_avgx2"] = (("CI", time_dim2), qq) - if conf_ints is None: - conf_ints = self[ci_label].values + if ci_avg_time_flag1 is not None: + # unweighted mean + out[label + "_avg1"] = mcparams[label + "_avgsec"].mean(dim=time_dim2) - assert len(conf_ints) == 2, "Please define conf_ints" + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] + qvar = q.var(dim=["mc", time_dim2], ddof=1) + out[label + "_mc_avg1_var"] = qvar - tmp_dn = self[ci_label].sel(CI=conf_ints[0], method="nearest") - tmp_up = self[ci_label].sel(CI=conf_ints[1], method="nearest") + if conf_ints: + new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[1]) + avg_axis = mcparams[label + "_mc_set"].get_axis_num( + ["mc", time_dim2] + ) + q = mcparams[label + "_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, + # avg dimensions are dropped from input arr + new_axis=0, + ) # The new CI dim is added as firsaxis - ref = self.ufunc_per_section( - sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" - ) - ix_resid = self.ufunc_per_section( - sections=sections, x_indices=True, calc_per="all" - ) - ref_sorted = np.full(shape=tmp_dn.shape, fill_value=np.nan) - ref_sorted[ix_resid, :] = ref - ref_da = xr.DataArray(data=ref_sorted, coords=tmp_dn.coords) + out[label + "_mc_avg1"] = (("CI", x_dim2), q) - mask_dn = ref_da >= tmp_dn - mask_up = ref_da <= tmp_up + if ci_avg_time_flag2: + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] - return np.logical_and(mask_dn, mask_up) + qvar = q.var(dim=["mc"], ddof=1) - def temperature_residuals(self, label=None, sections=None): - """ - Compute the temperature residuals, between the known temperature of the - reference sections and the DTS temperature. + # Inverse-variance weighting + avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - Parameters - ---------- - label : str - The key of the temperature DataArray - sections : Dict[str, List[slice]], optional - If `None` is supplied, `ds.sections` is used. Define calibration - sections. Each section requires a reference temperature time series, - such as the temperature measured by an external temperature sensor. - They should already be part of the DataStore object. `sections` - is defined with a dictionary with its keywords of the - names of the reference temperature time series. Its values are - lists of slice objects, where each slice object is a fiber stretch - that has the reference temperature. Afterwards, `sections` is stored - under `ds.sections`. + out[label + "_mc_avg2_var"] = avg_time_var - Returns - ------- - resid_da : xarray.DataArray - The residuals as DataArray - """ - if sections is None: - sections = self.sections - else: - sections = validate_sections(self, sections) + mcparams[label + "_mc_avg2_set"] = ( + mcparams[label + "_mc_set"] / qvar + ).sum(dim=time_dim2) * avg_time_var + out[label + "_avg2"] = mcparams[label + "_mc_avg2_set"].mean(dim="mc") - resid_temp = self.ufunc_per_section( - sections=sections, label=label, temp_err=True, calc_per="all" - ) - resid_x = self.ufunc_per_section(sections=sections, label="x", calc_per="all") + if conf_ints: + new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[1]) + avg_axis_avg2 = mcparams[label + "_mc_set"].get_axis_num("mc") - resid_ix = np.array([np.argmin(np.abs(ai - self.x.data)) for ai in resid_x]) + qq = mcparams[label + "_mc_avg2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), + chunks=new_chunks, # + drop_axis=avg_axis_avg2, + # avg dimensions are dropped from input arr + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # firsaxis + out[label + "_mc_avg2"] = (("CI", x_dim2), qq) - resid_sorted = np.full(shape=self[label].shape, fill_value=np.nan) - resid_sorted[resid_ix, :] = resid_temp - resid_da = xr.DataArray( - data=resid_sorted, - dims=("x", "time"), - coords={"x": self.x, "time": self.time}, + # Weighted mean of the forward and backward + tmpw_var = 1 / ( + 1 / out["tmpf_mc" + "_avgsec_var"] + 1 / out["tmpb_mc" + "_avgsec_var"] ) - return resid_da - def ufunc_per_section( - self, - sections=None, - func=None, - label=None, - subtract_from_label=None, - temp_err=False, - x_indices=False, - ref_temp_broadcasted=False, - calc_per="stretch", - **func_kwargs, - ): - """ - User function applied to parts of the cable. Super useful, - many options and slightly - complicated. + q = ( + mcparams["tmpf_mc_set"] / out["tmpf_mc" + "_avgsec_var"] + + mcparams["tmpb_mc_set"] / out["tmpb_mc" + "_avgsec_var"] + ) * tmpw_var - The function `func` is taken over all the timesteps and calculated - per `calc_per`. This - is returned as a dictionary + mcparams["tmpw" + "_mc_set"] = q # - Parameters - ---------- - sections : Dict[str, List[slice]], optional - If `None` is supplied, `ds.sections` is used. Define calibration - sections. Each section requires a reference temperature time series, - such as the temperature measured by an external temperature sensor. - They should already be part of the DataStore object. `sections` - is defined with a dictionary with its keywords of the - names of the reference temperature time series. Its values are - lists of slice objects, where each slice object is a fiber stretch - that has the reference temperature. Afterwards, `sections` is stored - under `ds.sections`. - func : callable, str - A numpy function, or lambda function to apple to each 'calc_per'. - label - subtract_from_label - temp_err : bool - The argument of the function is label minus the reference - temperature. - x_indices : bool - To retreive an integer array with the indices of the - x-coordinates in the section/stretch. The indices are sorted. - ref_temp_broadcasted : bool - calc_per : {'all', 'section', 'stretch'} - func_kwargs : dict - Dictionary with options that are passed to func + # out["tmpw"] = out["tmpw" + '_mc_set'].mean(dim='mc') + out["tmpw" + "_avgsec"] = ( + mcparams["tmpf_avgsec"] / out["tmpf_mc" + "_avgsec_var"] + + mcparams["tmpb_avgsec"] / out["tmpb_mc" + "_avgsec_var"] + ) * tmpw_var - TODO: Spend time on creating a slice instead of appendng everything\ - to a list and concatenating after. + q = mcparams["tmpw" + "_mc_set"] - out["tmpw_avgsec"] + out["tmpw" + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + if ci_avg_time_flag1: + out["tmpw" + "_avg1"] = out["tmpw" + "_avgsec"].mean(dim=time_dim2) - Returns - ------- + out["tmpw" + "_mc_avg1_var"] = mcparams["tmpw" + "_mc_set"].var( + dim=["mc", time_dim2] + ) - Examples - -------- + if conf_ints: + new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) + avg_axis = mcparams["tmpw" + "_mc_set"].get_axis_num(["mc", time_dim2]) + q2 = mcparams["tmpw" + "_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks_weighted, + # Explicitly define output chunks + drop_axis=avg_axis, # avg dimensions are dropped + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # first axis + out["tmpw" + "_mc_avg1"] = (("CI", x_dim2), q2) - 1. Calculate the variance of the residuals in the along ALL the\ - reference sections wrt the temperature of the water baths + if ci_avg_time_flag2: + tmpw_var_avg2 = 1 / ( + 1 / out["tmpf_mc_avg2_var"] + 1 / out["tmpb_mc_avg2_var"] + ) - >>> tmpf_var = d.ufunc_per_section( - >>> sections=sections, - >>> func='var', - >>> calc_per='all', - >>> label='tmpf', - >>> temp_err=True) + q = ( + mcparams["tmpf_mc_avg2_set"] / out["tmpf_mc_avg2_var"] + + mcparams["tmpb_mc_avg2_set"] / out["tmpb_mc_avg2_var"] + ) * tmpw_var_avg2 - 2. Calculate the variance of the residuals in the along PER\ - reference section wrt the temperature of the water baths + mcparams["tmpw" + "_mc_avg2_set"] = q # - >>> tmpf_var = d.ufunc_per_section( - >>> sections=sections, - >>> func='var', - >>> calc_per='stretch', - >>> label='tmpf', - >>> temp_err=True) + out["tmpw" + "_avg2"] = ( + out["tmpf_avg2"] / out["tmpf_mc_avg2_var"] + + out["tmpb_avg2"] / out["tmpb_mc_avg2_var"] + ) * tmpw_var_avg2 - 3. Calculate the variance of the residuals in the along PER\ - water bath wrt the temperature of the water baths + out["tmpw" + "_mc_avg2_var"] = tmpw_var_avg2 - >>> tmpf_var = d.ufunc_per_section( - >>> sections=sections, - >>> func='var', - >>> calc_per='section', - >>> label='tmpf', - >>> temp_err=True) + if conf_ints: + # We first need to know the x-dim-chunk-size + new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) + avg_axis_avg2 = mcparams["tmpw" + "_mc_avg2_set"].get_axis_num("mc") + q2 = mcparams["tmpw" + "_mc_avg2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), + chunks=new_chunks_weighted, + # Explicitly define output chunks + drop_axis=avg_axis_avg2, # avg dimensions are dropped + new_axis=0, + dtype=float, + ) # The new CI dimension is added as firstax + out["tmpw" + "_mc_avg2"] = (("CI", x_dim2), q2) - 4. Obtain the coordinates of the measurements per section + if ci_avg_x_flag1: + out["tmpw" + "_avgx1"] = out["tmpw" + "_avgsec"].mean(dim=x_dim2) - >>> locs = d.ufunc_per_section( - >>> sections=sections, - >>> func=None, - >>> label='x', - >>> temp_err=False, - >>> ref_temp_broadcasted=False, - >>> calc_per='stretch') + out["tmpw" + "_mc_avgx1_var"] = mcparams["tmpw" + "_mc_set"].var(dim=x_dim2) - 5. Number of observations per stretch + if conf_ints: + new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) + avg_axis = mcparams["tmpw" + "_mc_set"].get_axis_num(["mc", x_dim2]) + q2 = mcparams["tmpw" + "_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks_weighted, + # Explicitly define output chunks + drop_axis=avg_axis, # avg dimensions are dropped + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # first axis + out["tmpw" + "_mc_avgx1"] = (("CI", time_dim2), q2) - >>> nlocs = d.ufunc_per_section( - >>> sections=sections, - >>> func=len, - >>> label='x', - >>> temp_err=False, - >>> ref_temp_broadcasted=False, - >>> calc_per='stretch') + if ci_avg_x_flag2: + tmpw_var_avgx2 = 1 / ( + 1 / out["tmpf_mc_avgx2_var"] + 1 / out["tmpb_mc_avgx2_var"] + ) - 6. broadcast the temperature of the reference sections to\ - stretch/section/all dimensions. The value of the reference\ - temperature (a timeseries) is broadcasted to the shape of self[\ - label]. The self[label] is not used for anything else. + q = ( + mcparams["tmpf_mc_avgx2_set"] / out["tmpf_mc_avgx2_var"] + + mcparams["tmpb_mc_avgx2_set"] / out["tmpb_mc_avgx2_var"] + ) * tmpw_var_avgx2 - >>> temp_ref = d.ufunc_per_section( - >>> label='st', - >>> ref_temp_broadcasted=True, - >>> calc_per='all') + mcparams["tmpw" + "_mc_avgx2_set"] = q # - 7. x-coordinate index + out["tmpw" + "_avgx2"] = ( + out["tmpf_avgx2"] / out["tmpf_mc_avgx2_var"] + + out["tmpb_avgx2"] / out["tmpb_mc_avgx2_var"] + ) * tmpw_var_avgx2 - >>> ix_loc = d.ufunc_per_section(sections=sections, x_indices=True) + out["tmpw" + "_mc_avgx2_var"] = tmpw_var_avgx2 + if conf_ints: + # We first need to know the x-dim-chunk-size + new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) + avg_axis_avgx2 = mcparams["tmpw" + "_mc_avgx2_set"].get_axis_num("mc") + q2 = mcparams["tmpw" + "_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx2), + chunks=new_chunks_weighted, + # Explicitly define output chunks + drop_axis=avg_axis_avgx2, # avg dimensions are dropped + new_axis=0, + dtype=float, + ) # The new CI dimension is added as firstax + out["tmpw" + "_mc_avgx2"] = (("CI", time_dim2), q2) - Note - ---- - If `self[label]` or `self[subtract_from_label]` is a Dask array, a Dask - array is returned else a numpy array is returned - """ - # if sections is None: - # sections = self.sections - if label is None: - dataarray = None - else: - dataarray = self[label] + # Clean up the garbage. All arrays with a Monte Carlo dimension. + if mc_remove_set_flag: + remove_mc_set = [ + "r_st", + "r_ast", + "r_rst", + "r_rast", + "gamma_mc", + "alpha_mc", + "df_mc", + "db_mc", + "x_avg", + "time_avg", + "mc", + ] - if x_indices: - x_coords = self.x - reference_dataset = None + for i in ["tmpf", "tmpb", "tmpw"]: + remove_mc_set.append(i + "_avgsec") + remove_mc_set.append(i + "_mc_set") + remove_mc_set.append(i + "_mc_avg2_set") + remove_mc_set.append(i + "_mc_avgx2_set") + remove_mc_set.append(i + "_mc_avgsec_var") - else: - sections = validate_sections(self, sections) + if "trans_att" in mcparams and mcparams.trans_att.size: + remove_mc_set.append('talpha"_fw_mc') + remove_mc_set.append('talpha"_bw_mc') - x_coords = None - reference_dataset = {k: self[k] for k in sections} + for k in remove_mc_set: + if k in out: + print(f"Removed from results: {k}") + del out[k] - out = ufunc_per_section_helper( - x_coords=x_coords, - sections=sections, - func=func, - dataarray=dataarray, - subtract_from_dataarray=subtract_from_label, - reference_dataset=reference_dataset, - subtract_reference_from_dataarray=temp_err, - ref_temp_broadcasted=ref_temp_broadcasted, - calc_per=calc_per, - **func_kwargs, - ) return out - - def resample_datastore(*args, **kwargs): - raise ( - "ds.resample_datastore() is deprecated. Use from dtscalibration import DataStore; " - "DataStore(ds.resample()) instead. See example notebook 2." - ) diff --git a/src/dtscalibration/io/apsensing.py b/src/dtscalibration/io/apsensing.py index ec013bc6..f2164d9f 100644 --- a/src/dtscalibration/io/apsensing.py +++ b/src/dtscalibration/io/apsensing.py @@ -7,8 +7,8 @@ import dask.array as da import numpy as np import pandas as pd +import xarray as xr -from dtscalibration import DataStore from dtscalibration.io.utils import dim_attrs from dtscalibration.io.utils import get_xml_namespace from dtscalibration.io.utils import open_file @@ -103,7 +103,7 @@ def read_apsensing_files( load_in_memory=load_in_memory, ) - ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) + ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) return ds diff --git a/src/dtscalibration/io/datastore.py b/src/dtscalibration/io/datastore.py deleted file mode 100644 index 174b84fd..00000000 --- a/src/dtscalibration/io/datastore.py +++ /dev/null @@ -1,188 +0,0 @@ -import glob -import inspect - -import xarray as xr - -from dtscalibration import DataStore - - -def open_datastore( - filename_or_obj, - group=None, - decode_cf=True, - mask_and_scale=None, - decode_times=True, - concat_characters=True, - decode_coords=True, - engine=None, - chunks=None, - lock=None, - cache=None, - drop_variables=None, - backend_kwargs=None, - load_in_memory=False, - **kwargs, -): - """Load and decode a datastore from a file or file-like object. Most - arguments are passed to xarray.open_dataset(). - - Parameters - ---------- - filename_or_obj : str, Path, file or xarray.backends.*DataStore - Strings and Path objects are interpreted as a path to a netCDF file - or an OpenDAP URL and opened with python-netCDF4, unless the filename - ends with .gz, in which case the file is gunzipped and opened with - scipy.io.netcdf (only netCDF3 supported). File-like objects are opened - with scipy.io.netcdf (only netCDF3 supported). - group : str, optional - Path to the netCDF4 group in the given file to open (only works for - netCDF4 files). - decode_cf : bool, optional - Whether to decode these variables, assuming they were saved according - to CF conventions. - mask_and_scale : bool, optional - If True, replace array values equal to `_FillValue` with NA and scale - values according to the formula `original_values * scale_factor + - add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are - taken from variable attributes (if they exist). If the `_FillValue` or - `missing_value` attribute contains multiple values a warning will be - issued and all array values matching one of the multiple values will - be replaced by NA. mask_and_scale defaults to True except for the - pseudonetcdf backend. - decode_times : bool, optional - If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, leave them encoded as numbers. - concat_characters : bool, optional - If True, concatenate along the last dimension of character arrays to - form string arrays. Dimensions will only be concatenated over (and - removed) if they have no corresponding variable and if they are only - used as the last dimension of character arrays. - decode_coords : bool, optional - If True, decode the 'coordinates' attribute to identify coordinates in - the resulting dataset. - engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', - 'pseudonetcdf'}, optional - Engine to use when reading files. If not provided, the default engine - is chosen based on available dependencies, with a preference for - 'netcdf4'. - chunks : int or dict, optional - If chunks is provided, it used to load the new dataset into dask - arrays. ``chunks={}`` loads the dataset with dask using a single - chunk for all arrays. - lock : False, True or threading.Lock, optional - If chunks is provided, this argument is passed on to - :py:func:`dask.array.from_array`. By default, a global lock is - used when reading data from netCDF files with the netcdf4 and h5netcdf - engines to avoid issues with concurrent access when using dask's - multithreaded backend. - cache : bool, optional - If True, cache data loaded from the underlying datastore in memory as - NumPy arrays when accessed to avoid reading from the underlying data- - store multiple times. Defaults to True unless you specify the `chunks` - argument to use dask, in which case it defaults to False. Does not - change the behavior of coordinates corresponding to dimensions, which - always load their data from disk into a ``pandas.Index``. - drop_variables: string or iterable, optional - A variable or list of variables to exclude from being parsed from the - dataset. This may be useful to drop variables with problems or - inconsistent values. - backend_kwargs: dictionary, optional - A dictionary of keyword arguments to pass on to the backend. This - may be useful when backend options would improve performance or - allow user control of dataset processing. - - Returns - ------- - dataset : Dataset - The newly created dataset. - - See Also - -------- - xarray.open_dataset - xarray.load_dataset - """ - - xr_kws = inspect.signature(xr.open_dataset).parameters.keys() - - ds_kwargs = {k: v for k, v in kwargs.items() if k not in xr_kws} - - if chunks is None: - chunks = {} - - with xr.open_dataset( - filename_or_obj, - group=group, - decode_cf=decode_cf, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - engine=engine, - chunks=chunks, - lock=lock, - cache=cache, - drop_variables=drop_variables, - backend_kwargs=backend_kwargs, - ) as ds_xr: - ds = DataStore( - data_vars=ds_xr.data_vars, - coords=ds_xr.coords, - attrs=ds_xr.attrs, - **ds_kwargs, - ) - - # to support deprecated st_labels - ds = ds.rename_labels(assertion=False) - - if load_in_memory: - if "cache" in kwargs: - raise TypeError("cache has no effect in this context") - return ds.load() - - else: - return ds - - -def open_mf_datastore( - path=None, paths=None, combine="by_coords", load_in_memory=False, **kwargs -): - """ - Open a datastore from multiple netCDF files. This script assumes the - datastore was split along the time dimension. But only variables with a - time dimension should be concatenated in the time dimension. Other - options from xarray do not support this. - - Parameters - ---------- - combine : {'by_coords', 'nested'}, optional - Leave it at by_coords - path : str - A file path to the stored netcdf files with an asterisk in the - filename to list all. Ensure you have leading zeros in the file - numbering. - paths : list - Define you own list of file paths. - Returns - ------- - dataset : Dataset - The newly created dataset. - """ - from xarray.backends.api import open_mfdataset - - if paths is None: - paths = sorted(glob.glob(path)) - assert paths, "No files match found with: " + path - - with open_mfdataset(paths=paths, combine=combine, **kwargs) as xds: - ds = DataStore(data_vars=xds.data_vars, coords=xds.coords, attrs=xds.attrs) - - # to support deprecated st_labels - ds = ds.rename_labels(assertion=False) - - if load_in_memory: - if "cache" in kwargs: - raise TypeError("cache has no effect in this context") - return ds.load() - - else: - return ds diff --git a/src/dtscalibration/io/sensornet.py b/src/dtscalibration/io/sensornet.py index e887f16b..ccac51c9 100644 --- a/src/dtscalibration/io/sensornet.py +++ b/src/dtscalibration/io/sensornet.py @@ -5,8 +5,8 @@ import numpy as np import pandas as pd +import xarray as xr -from dtscalibration import DataStore from dtscalibration.io.utils import coords_time from dtscalibration.io.utils import dim_attrs from dtscalibration.io.utils import open_file @@ -130,7 +130,7 @@ def read_sensornet_files( flip_reverse_measurements=flip_reverse_measurements, ) - ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) + ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) return ds diff --git a/src/dtscalibration/io/sensortran.py b/src/dtscalibration/io/sensortran.py index 0d2ff728..b1077897 100644 --- a/src/dtscalibration/io/sensortran.py +++ b/src/dtscalibration/io/sensortran.py @@ -4,8 +4,8 @@ from typing import Union import numpy as np +import xarray as xr -from dtscalibration import DataStore from dtscalibration.io.utils import coords_time from dtscalibration.io.utils import dim_attrs @@ -16,7 +16,7 @@ def read_sensortran_files( timezone_netcdf: str = "UTC", silent: bool = False, **kwargs, -) -> DataStore: +) -> xr.Dataset: """Read a folder with measurement files from a device of the Sensortran brand. Each measurement file contains values for a single timestep. Remember to check which timezone you are working in. @@ -77,7 +77,7 @@ def read_sensortran_files( "Sensortran binary version " + f"{version} not implemented" ) - ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) + ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) return ds diff --git a/src/dtscalibration/io/silixa.py b/src/dtscalibration/io/silixa.py index f89a672e..025b7b1f 100644 --- a/src/dtscalibration/io/silixa.py +++ b/src/dtscalibration/io/silixa.py @@ -6,8 +6,8 @@ import dask.array as da import numpy as np import pandas as pd +import xarray as xr -from dtscalibration import DataStore from dtscalibration.io.utils import coords_time from dtscalibration.io.utils import dim_attrs from dtscalibration.io.utils import get_xml_namespace @@ -98,7 +98,7 @@ def read_silixa_files( "Silixa xml version " + f"{xml_version} not implemented" ) - ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) + ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) return ds diff --git a/src/dtscalibration/plot.py b/src/dtscalibration/plot.py old mode 100755 new mode 100644 index 13c0c111..d864cafe --- a/src/dtscalibration/plot.py +++ b/src/dtscalibration/plot.py @@ -26,7 +26,7 @@ def plot_residuals_reference_sections( plot_avg_std resid : DataArray The residuals of the fit to estimate the noise in the measured - Stokes signal. is returned by `ds.variance_stokes` + Stokes signal. is returned by `variance_stokes_*()` sections : Dict[str, List[slice]] The sections obj is normally used to set DataStore.sections, now is used toobtain the @@ -254,7 +254,7 @@ def plot_residuals_reference_sections_single( plot_avg_std resid : DataArray The residuals of the fit to estimate the noise in the measured - Stokes signal. is returned by `ds.variance_stokes` + Stokes signal. is returned by `variance_stokes_*()` fig : Figurehandle, optional title : str, optional Adds a title to the plot @@ -389,7 +389,7 @@ def plot_accuracy( plot_avg_std resid : DataArray The residuals of the fit to estimate the noise in the measured - Stokes signal. is returned by `ds.variance_stokes` + Stokes signal. is returned by `variance_stokes_*()` fig : Figurehandle, optional title : str, optional Adds a title to the plot @@ -572,7 +572,7 @@ def plot_sigma_report( # calc_per='stretch', # temp_err=True, # axis=0) - sigma_est = ds.ufunc_per_section( + sigma_est = ds.dts.ufunc_per_section( sections=sections, label=temp_label, func=np.std, @@ -581,7 +581,7 @@ def plot_sigma_report( axis=0, ) else: - sigma_est = ds.ufunc_per_section( + sigma_est = ds.dts.ufunc_per_section( sections=sections, label=temp_label, func=np.std, @@ -635,14 +635,14 @@ def plot_sigma_report( ax1.legend() ax1.set_ylabel(r"Temperature [$^\circ$C]") - err_ref = ds.ufunc_per_section( + err_ref = ds.dts.ufunc_per_section( sections=sections, label=temp_label, func=None, temp_err=True, calc_per="stretch", ) - x_ref = ds.ufunc_per_section(sections=sections, label="x", calc_per="stretch") + x_ref = ds.dts.ufunc_per_section(sections=sections, label="x", calc_per="stretch") for (k, v), (k_se, v_se), (kx, vx) in zip( ds.sections.items(), err_ref.items(), x_ref.items() diff --git a/src/dtscalibration/variance_helpers.py b/src/dtscalibration/variance_helpers.py index e1b930c3..9ea6c834 100644 --- a/src/dtscalibration/variance_helpers.py +++ b/src/dtscalibration/variance_helpers.py @@ -158,7 +158,7 @@ def variance_stokes_linear_helper(st_sec, resid_sec, nbin, through_zero): "not possible. Most likely, your Stokes intensities do " "not vary enough to fit a linear curve. Either " "use `through_zero` option or use " - "`ds.variance_stokes_constant()`. Another reason " + "`variance_stokes_constant()`. Another reason " "could be that your sections are defined to be " "wider than they actually are." ) @@ -167,3 +167,30 @@ def var_fun(stokes): return slope * stokes + offset return slope, offset, st_sort_mean, st_sort_var, resid_sec, var_fun + + +def check_allclose_acquisitiontime(acquisitiontime, eps: float = 0.05) -> None: + """ + Check if all acquisition times are of equal duration. For now it is not possible to calibrate + over timesteps if the acquisition time of timesteps varies, as the Stokes variance + would change over time. + + The acquisition time is stored for single ended measurements in userAcquisitionTime, + for double ended measurements in userAcquisitionTimeFW and userAcquisitionTimeBW. + + Parameters + ---------- + ds : DataStore + eps : float + Default accepts 1% of relative variation between min and max acquisition time. + + Returns + ------- + """ + dtmin = acquisitiontime.min() + dtmax = acquisitiontime.max() + dtavg = (dtmin + dtmax) / 2 + assert ( + dtmax - dtmin + ) / dtavg < eps, "Acquisition time is Forward channel not equal for all time steps" + pass diff --git a/src/dtscalibration/variance_stokes.py b/src/dtscalibration/variance_stokes.py new file mode 100644 index 00000000..59819b87 --- /dev/null +++ b/src/dtscalibration/variance_stokes.py @@ -0,0 +1,509 @@ +import dask.array as da +import numpy as np +import xarray as xr + +from dtscalibration.calibration.section_utils import validate_no_overlapping_sections +from dtscalibration.calibration.section_utils import validate_sections_definition +from dtscalibration.datastore_utils import ufunc_per_section_helper +from dtscalibration.variance_helpers import check_allclose_acquisitiontime +from dtscalibration.variance_helpers import variance_stokes_constant_helper +from dtscalibration.variance_helpers import variance_stokes_exponential_helper +from dtscalibration.variance_helpers import variance_stokes_linear_helper + + +def variance_stokes_constant(st, sections, acquisitiontime, reshape_residuals=True): + """ + Approximate the variance of the noise in Stokes intensity measurements + with one value, suitable for small setups. + + * `variance_stokes_constant()` for small setups with small variations in\ + intensity. Variance of the Stokes measurements is assumed to be the same\ + along the entire fiber. + + * `variance_stokes_exponential()` for small setups with very few time\ + steps. Too many degrees of freedom results in an under estimation of the\ + noise variance. Almost never the case, but use when calibrating pre time\ + step. + + * `variance_stokes_linear()` for larger setups with more time steps.\ + Assumes Poisson distributed noise with the following model:: + + st_var = a * ds.st + b + + + where `a` and `b` are constants. Requires reference sections at + beginning and end of the fiber, to have residuals at high and low + intensity measurements. + + The Stokes and anti-Stokes intensities are measured with detectors, + which inherently introduce noise to the measurements. Knowledge of the + distribution of the measurement noise is needed for a calibration with + weighted observations (Sections 5 and 6 of [1]_) + and to project the associated uncertainty to the temperature confidence + intervals (Section 7 of [1]_). Two sources dominate the noise + in the Stokes and anti-Stokes intensity measurements + (Hartog, 2017, p.125). Close to the laser, noise from the conversion of + backscatter to electricity dominates the measurement noise. The + detecting component, an avalanche photodiode, produces Poisson- + distributed noise with a variance that increases linearly with the + intensity. The Stokes and anti-Stokes intensities are commonly much + larger than the standard deviation of the noise, so that the Poisson + distribution can be approximated with a Normal distribution with a mean + of zero and a variance that increases linearly with the intensity. At + the far-end of the fiber, noise from the electrical circuit dominates + the measurement noise. It produces Normal-distributed noise with a mean + of zero and a variance that is independent of the intensity. + + Calculates the variance between the measurements and a best fit + at each reference section. This fits a function to the nt * nx + measurements with ns * nt + nx parameters, where nx are the total + number of reference locations along all sections. The temperature is + constant along the reference sections, so the expression of the + Stokes power can be split in a time series per reference section and + a constant per observation location. + + Idea from Discussion at page 127 in Richter, P. H. (1995). Estimating + errors in least-squares fitting. + + The timeseries and the constant are, of course, highly correlated + (Equations 20 and 21 in [1]_), but that is not relevant here as only the + product is of interest. The residuals between the fitted product and the + Stokes intensity measurements are attributed to the + noise from the detector. The variance of the residuals is used as a + proxy for the variance of the noise in the Stokes and anti-Stokes + intensity measurements. A non-uniform temperature of + the reference sections results in an over estimation of the noise + variance estimate because all temperature variation is attributed to + the noise. + + Parameters + ---------- + reshape_residuals + st : DataArray + sections : Dict[str, List[slice]] + + Returns + ------- + I_var : float + Variance of the residuals between measured and best fit + resid : array_like + Residuals between measured and best fit + + Notes + ----- + + * Because there are a large number of unknowns, spend time on\ + calculating an initial estimate. Can be turned off by setting to False. + + * It is often not needed to use measurements from all time steps. If\ + your variance estimate does not change when including measurements\ + additional time steps, you have included enough measurements. + + References + ---------- + .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation + of Temperature and Associated Uncertainty from Fiber-Optic Raman- + Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. + https://doi.org/10.3390/s20082235 + + Examples + -------- + - `Example notebook 4: Calculate variance Stokes intensity measurements\ + `_ + + TODO: Account for varying acquisition times + """ + validate_sections_definition(sections=sections) + validate_no_overlapping_sections(sections=sections) + check_allclose_acquisitiontime(acquisitiontime=acquisitiontime) + + assert st.dims[0] == "x", "DataArray is transposed" + + # should maybe be per section. But then residuals + # seem to be correlated between stretches. I don't know why.. BdT. + data_dict = da.compute( + ufunc_per_section_helper(sections=sections, dataarray=st, calc_per="stretch") + )[0] + + var_I, resid = variance_stokes_constant_helper(data_dict) + + if not reshape_residuals: + return var_I, resid + + else: + ix_resid = ufunc_per_section_helper( + sections=sections, x_coords=st.x, calc_per="all" + ) + + resid_sorted = np.full(shape=st.shape, fill_value=np.nan) + resid_sorted[ix_resid, :] = resid + resid_da = xr.DataArray(data=resid_sorted, coords=st.coords) + + return var_I, resid_da + + +def variance_stokes_exponential( + st, + sections, + acquisitiontime, + use_statsmodels=False, + suppress_info=True, + reshape_residuals=True, +): + """ + Approximate the variance of the noise in Stokes intensity measurements + with one value, suitable for small setups with measurements from only + a few times. + + * `variance_stokes_constant()` for small setups with small variations in\ + intensity. Variance of the Stokes measurements is assumed to be the same\ + along the entire fiber. + + * `variance_stokes_exponential()` for small setups with very few time\ + steps. Too many degrees of freedom results in an under estimation of the\ + noise variance. Almost never the case, but use when calibrating pre time\ + step. + + * `variance_stokes_linear()` for larger setups with more time steps.\ + Assumes Poisson distributed noise with the following model:: + + st_var = a * ds.st + b + + + where `a` and `b` are constants. Requires reference sections at + beginning and end of the fiber, to have residuals at high and low + intensity measurements. + + The Stokes and anti-Stokes intensities are measured with detectors, + which inherently introduce noise to the measurements. Knowledge of the + distribution of the measurement noise is needed for a calibration with + weighted observations (Sections 5 and 6 of [1]_) + and to project the associated uncertainty to the temperature confidence + intervals (Section 7 of [1]_). Two sources dominate the noise + in the Stokes and anti-Stokes intensity measurements + (Hartog, 2017, p.125). Close to the laser, noise from the conversion of + backscatter to electricity dominates the measurement noise. The + detecting component, an avalanche photodiode, produces Poisson- + distributed noise with a variance that increases linearly with the + intensity. The Stokes and anti-Stokes intensities are commonly much + larger than the standard deviation of the noise, so that the Poisson + distribution can be approximated with a Normal distribution with a mean + of zero and a variance that increases linearly with the intensity. At + the far-end of the fiber, noise from the electrical circuit dominates + the measurement noise. It produces Normal-distributed noise with a mean + of zero and a variance that is independent of the intensity. + + Calculates the variance between the measurements and a best fit + at each reference section. This fits a function to the nt * nx + measurements with ns * nt + nx parameters, where nx are the total + number of reference locations along all sections. The temperature is + constant along the reference sections. This fits a two-parameter + exponential to the stokes measurements. The temperature is constant + and there are no splices/sharp bends in each reference section. + Therefore all signal decrease is due to differential attenuation, + which is the same for each reference section. The scale of the + exponential does differ per reference section. + + Assumptions: 1) the temperature is the same along a reference + section. 2) no sharp bends and splices in the reference sections. 3) + Same type of optical cable in each reference section. + + Idea from discussion at page 127 in Richter, P. H. (1995). Estimating + errors in least-squares fitting. For weights used error propagation: + w^2 = 1/sigma(lny)^2 = y^2/sigma(y)^2 = y^2 + + The timeseries and the constant are, of course, highly correlated + (Equations 20 and 21 in [1]_), but that is not relevant here as only the + product is of interest. The residuals between the fitted product and the + Stokes intensity measurements are attributed to the + noise from the detector. The variance of the residuals is used as a + proxy for the variance of the noise in the Stokes and anti-Stokes + intensity measurements. A non-uniform temperature of + the reference sections results in an over estimation of the noise + variance estimate because all temperature variation is attributed to + the noise. + + Parameters + ---------- + suppress_info : bool, optional + Suppress print statements. + use_statsmodels : bool, optional + Use statsmodels to fit the exponential. If `False`, use scipy. + reshape_residuals : bool, optional + Reshape the residuals to the shape of the Stokes intensity + st_label : str + label of the Stokes, anti-Stokes measurement. + E.g., st, ast, rst, rast + sections : Dict[str, List[slice]], optional + If `None` is supplied, `ds.sections` is used. Define calibration + sections. Each section requires a reference temperature time series, + such as the temperature measured by an external temperature sensor. + They should already be part of the DataStore object. `sections` + is defined with a dictionary with its keywords of the + names of the reference temperature time series. Its values are + lists of slice objects, where each slice object is a fiber stretch + that has the reference temperature. Afterwards, `sections` is stored + under `ds.sections`. + + Returns + ------- + I_var : float + Variance of the residuals between measured and best fit + resid : array_like + Residuals between measured and best fit + + Notes + ----- + + * Because there are a large number of unknowns, spend time on\ + calculating an initial estimate. Can be turned off by setting to False. + + * It is often not needed to use measurements from all time steps. If\ + your variance estimate does not change when including measurements from\ + more time steps, you have included enough measurements. + + References + ---------- + .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation + of Temperature and Associated Uncertainty from Fiber-Optic Raman- + Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. + https://doi.org/10.3390/s20082235 + + Examples + -------- + - `Example notebook 4: Calculate variance Stokes intensity measurements\ + `_ + """ + validate_sections_definition(sections=sections) + validate_no_overlapping_sections(sections=sections) + check_allclose_acquisitiontime(acquisitiontime=acquisitiontime) + + assert st.dims[0] == "x", "Stokes are transposed" + nt = st.coords["time"].size + + # number of reference points per section (spatial) + len_stretch_list = [] + y_list = [] # intensities of stokes + x_list = [] # length rel to start of section. for alpha + + for k, stretches in sections.items(): + for stretch in stretches: + y_list.append(st.sel(x=stretch).data.T.reshape(-1)) + _x = st.coords["x"].sel(x=stretch).data.copy() + _x -= _x[0] + x_list.append(da.tile(_x, nt)) + len_stretch_list.append(_x.size) + + x = np.concatenate(x_list) + y = np.concatenate(y_list) + + var_I, resid = variance_stokes_exponential_helper( + nt, x, y, len_stretch_list, use_statsmodels, suppress_info + ) + + if not reshape_residuals: + return var_I, resid + + else: + # restructure the residuals, such that they can be plotted and + # added to ds + resid_res = [] + for leni, lenis, lenie in zip( + len_stretch_list, + nt * np.cumsum([0] + len_stretch_list[:-1]), + nt * np.cumsum(len_stretch_list), + ): + try: + resid_res.append(resid[lenis:lenie].reshape((leni, nt), order="F")) + except: # noqa: E722 + # Dask array does not support order + resid_res.append(resid[lenis:lenie].T.reshape((nt, leni)).T) + + _resid = np.concatenate(resid_res) + # _resid_x = self.ufunc_per_section( + # sections=sections, label="x", calc_per="all" + # ) + _resid_x = ufunc_per_section_helper( + sections=sections, dataarray=st.coords["x"], calc_per="all" + ) + isort = np.argsort(_resid_x) + resid_x = _resid_x[isort] # get indices from ufunc directly + resid = _resid[isort, :] + + ix_resid = np.array( + [np.argmin(np.abs(ai - st.coords["x"].data)) for ai in resid_x] + ) + + resid_sorted = np.full(shape=st.shape, fill_value=np.nan) + resid_sorted[ix_resid, :] = resid + resid_da = xr.DataArray(data=resid_sorted, coords=st.coords) + + return var_I, resid_da + + +def variance_stokes_linear( + st, sections, acquisitiontime, nbin=50, through_zero=False, plot_fit=False +): + """ + Approximate the variance of the noise in Stokes intensity measurements + with a linear function of the intensity, suitable for large setups. + + * `variance_stokes_constant()` for small setups with small variations in\ + intensity. Variance of the Stokes measurements is assumed to be the same\ + along the entire fiber. + + * `variance_stokes_exponential()` for small setups with very few time\ + steps. Too many degrees of freedom results in an under estimation of the\ + noise variance. Almost never the case, but use when calibrating pre time\ + step. + + * `variance_stokes_linear()` for larger setups with more time steps.\ + Assumes Poisson distributed noise with the following model:: + + st_var = a * ds.st + b + + + where `a` and `b` are constants. Requires reference sections at + beginning and end of the fiber, to have residuals at high and low + intensity measurements. + + The Stokes and anti-Stokes intensities are measured with detectors, + which inherently introduce noise to the measurements. Knowledge of the + distribution of the measurement noise is needed for a calibration with + weighted observations (Sections 5 and 6 of [1]_) + and to project the associated uncertainty to the temperature confidence + intervals (Section 7 of [1]_). Two sources dominate the noise + in the Stokes and anti-Stokes intensity measurements + (Hartog, 2017, p.125). Close to the laser, noise from the conversion of + backscatter to electricity dominates the measurement noise. The + detecting component, an avalanche photodiode, produces Poisson- + distributed noise with a variance that increases linearly with the + intensity. The Stokes and anti-Stokes intensities are commonly much + larger than the standard deviation of the noise, so that the Poisson + distribution can be approximated with a Normal distribution with a mean + of zero and a variance that increases linearly with the intensity. At + the far-end of the fiber, noise from the electrical circuit dominates + the measurement noise. It produces Normal-distributed noise with a mean + of zero and a variance that is independent of the intensity. + + Calculates the variance between the measurements and a best fit + at each reference section. This fits a function to the nt * nx + measurements with ns * nt + nx parameters, where nx are the total + number of reference locations along all sections. The temperature is + constant along the reference sections, so the expression of the + Stokes power can be split in a time series per reference section and + a constant per observation location. + + Idea from Discussion at page 127 in Richter, P. H. (1995). Estimating + errors in least-squares fitting. + + The timeseries and the constant are, of course, highly correlated + (Equations 20 and 21 in [1]_), but that is not relevant here as only the + product is of interest. The residuals between the fitted product and the + Stokes intensity measurements are attributed to the + noise from the detector. The variance of the residuals is used as a + proxy for the variance of the noise in the Stokes and anti-Stokes + intensity measurements. A non-uniform temperature of + the reference sections results in an over estimation of the noise + variance estimate because all temperature variation is attributed to + the noise. + + Notes + ----- + + * Because there are a large number of unknowns, spend time on\ + calculating an initial estimate. Can be turned off by setting to False. + + * It is often not needed to use measurements from all time steps. If\ + your variance estimate does not change when including measurements \ + from more time steps, you have included enough measurements. + + References + ---------- + .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation + of Temperature and Associated Uncertainty from Fiber-Optic Raman- + Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. + https://doi.org/10.3390/s20082235 + + Examples + -------- + - `Example notebook 4: Calculate variance Stokes intensity \ + measurements `_ + + Parameters + ---------- + st_label : str + Key under which the Stokes DataArray is stored. E.g., 'st', 'rst' + sections : dict, optional + Define sections. See documentation + nbin : int + Number of bins to compute the variance for, through which the + linear function is fitted. Make sure that that are at least 50 + residuals per bin to compute the variance from. + through_zero : bool + If True, the variance is computed as: VAR(Stokes) = slope * Stokes + If False, VAR(Stokes) = slope * Stokes + offset. + From what we can tell from our inital trails, is that the offset + seems relatively small, so that True seems a better option for + setups where a reference section with very low Stokes intensities + is missing. If data with low Stokes intensities available, it is + better to not fit through zero, but determine the offset from + the data. + plot_fit : bool + If True plot the variances for each bin and plot the fitted + linear function + """ + validate_sections_definition(sections=sections) + validate_no_overlapping_sections(sections=sections) + check_allclose_acquisitiontime(acquisitiontime=acquisitiontime) + + assert st.dims[0] == "x", "Stokes are transposed" + _, resid = variance_stokes_constant( + sections=sections, + st=st, + acquisitiontime=acquisitiontime, + reshape_residuals=False, + ) + ix_sec = ufunc_per_section_helper( + sections=sections, x_coords=st.coords["x"], calc_per="all" + ) + + st = st.isel(x=ix_sec).values.ravel() + diff_st = resid.ravel() + + ( + slope, + offset, + st_sort_mean, + st_sort_var, + resid, + var_fun, + ) = variance_stokes_linear_helper(st, diff_st, nbin, through_zero) + + if plot_fit: + import matplotlib.pyplot as plt + + plt.figure() + plt.scatter(st_sort_mean, st_sort_var, marker=".", c="black") + plt.plot( + [0.0, st_sort_mean[-1]], + [var_fun(0.0), var_fun(st_sort_mean[-1])], + c="white", + lw=1.3, + ) + plt.plot( + [0.0, st_sort_mean[-1]], + [var_fun(0.0), var_fun(st_sort_mean[-1])], + c="black", + lw=0.8, + ) + plt.xlabel("intensity") + plt.ylabel("intensity variance") + + return slope, offset, st_sort_mean, st_sort_var, resid, var_fun diff --git a/tests/data/docs_notebooks/01Not_working.ipynb b/tests/data/docs_notebooks/01Not_working.ipynb new file mode 100644 index 00000000..cf95764e --- /dev/null +++ b/tests/data/docs_notebooks/01Not_working.ipynb @@ -0,0 +1,34 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import not_existing_package" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/test_averaging.py b/tests/test_averaging.py new file mode 100644 index 00000000..36ecf408 --- /dev/null +++ b/tests/test_averaging.py @@ -0,0 +1,197 @@ +import os + +import numpy as np + +from dtscalibration import read_silixa_files +from dtscalibration.dts_accessor import DtsAccessor # noqa: F401 + +np.random.seed(0) + +fn = [ + "channel 1_20170921112245510.xml", + "channel 1_20170921112746818.xml", + "channel 1_20170921112746818.xml", +] +fn_single = [ + "channel 2_20180504132202074.xml", + "channel 2_20180504132232903.xml", + "channel 2_20180504132303723.xml", +] + +if 1: + # working dir is tests + wd = os.path.dirname(os.path.abspath(__file__)) + data_dir_single_ended = os.path.join(wd, "data", "single_ended") + data_dir_double_ended = os.path.join(wd, "data", "double_ended") + data_dir_double_ended2 = os.path.join(wd, "data", "double_ended2") + +else: + # working dir is src + data_dir_single_ended = os.path.join("..", "..", "tests", "data", "single_ended") + data_dir_double_ended = os.path.join("..", "..", "tests", "data", "double_ended") + data_dir_double_ended2 = os.path.join("..", "..", "tests", "data", "double_ended2") + + +def assert_almost_equal_verbose(actual, desired, verbose=False, **kwargs): + """Print the actual precision decimals""" + err = np.abs(actual - desired).max() + dec = -np.ceil(np.log10(err)) + + if not (np.isfinite(dec)): + dec = 18.0 + + m = "\n>>>>>The actual precision is: " + str(float(dec)) + + if verbose: + print(m) + + desired2 = np.broadcast_to(desired, actual.shape) + np.testing.assert_almost_equal(actual, desired2, err_msg=m, **kwargs) + pass + + +def test_average_measurements_single_ended(): + filepath = data_dir_single_ended + + ds_ = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") + + ds = ds_.sel(x=slice(0, 100)) # only calibrate parts of the fiber + sections = {"probe2Temperature": [slice(6.0, 14.0)]} # warm bath + + st_var, ast_var = 5.0, 5.0 + + out = ds.dts.calibrate_single_ended( + sections=sections, st_var=st_var, ast_var=ast_var, method="wls", solver="sparse" + ) + ds.dts.average_monte_carlo_single_ended( + result=out, + st_var=st_var, + ast_var=ast_var, + conf_ints=[2.5, 97.5], + mc_sample_size=50, # <- choose a much larger sample size + ci_avg_x_flag1=True, + ci_avg_x_sel=slice(6.0, 14.0), + ) + + def get_section_indices(x_da, sec): + """Returns the x-indices of the section. `sec` is a slice.""" + xis = x_da.astype(int) * 0 + np.arange(x_da.size, dtype=int) + return xis.sel(x=sec).values + + ix = get_section_indices(ds.x, slice(6, 14)) + ds.dts.average_monte_carlo_single_ended( + result=out, + st_var=st_var, + ast_var=ast_var, + conf_ints=[2.5, 97.5], + mc_sample_size=50, # <- choose a much larger sample size + ci_avg_x_flag2=True, + ci_avg_x_isel=ix, + ) + sl = slice( + np.datetime64("2018-05-04T12:22:17.710000000"), + np.datetime64("2018-05-04T12:22:47.702000000"), + ) + ds.dts.average_monte_carlo_single_ended( + result=out, + st_var=st_var, + ast_var=ast_var, + conf_ints=[2.5, 97.5], + mc_sample_size=50, # <- choose a much larger sample size + ci_avg_time_flag1=True, + ci_avg_time_flag2=False, + ci_avg_time_sel=sl, + ) + ds.dts.average_monte_carlo_single_ended( + result=out, + st_var=st_var, + ast_var=ast_var, + conf_ints=[2.5, 97.5], + mc_sample_size=50, # <- choose a much larger sample size + ci_avg_time_flag1=False, + ci_avg_time_flag2=True, + ci_avg_time_isel=range(3), + ) + pass + + +def test_average_measurements_double_ended(): + filepath = data_dir_double_ended2 + + ds_ = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") + + ds = ds_.sel(x=slice(0, 100)) # only calibrate parts of the fiber + sections = { + "probe1Temperature": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath + "probe2Temperature": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath + } + + st_var, ast_var, rst_var, rast_var = 5.0, 5.0, 5.0, 5.0 + + out = ds.dts.calibrate_double_ended( + sections=sections, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, + method="wls", + solver="sparse", + ) + ds.dts.average_monte_carlo_double_ended( + result=out, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, + conf_ints=[2.5, 97.5], + mc_sample_size=50, # <- choose a much larger sample size + ci_avg_x_flag1=True, + ci_avg_x_sel=slice(6, 10), + ) + + def get_section_indices(x_da, sec): + """Returns the x-indices of the section. `sec` is a slice.""" + xis = x_da.astype(int) * 0 + np.arange(x_da.size, dtype=int) + return xis.sel(x=sec).values + + ix = get_section_indices(ds.x, slice(6, 10)) + ds.dts.average_monte_carlo_double_ended( + result=out, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, + conf_ints=[2.5, 97.5], + mc_sample_size=50, # <- choose a much larger sample size + ci_avg_x_flag2=True, + ci_avg_x_isel=ix, + ) + sl = slice( + np.datetime64("2018-03-28T00:40:54.097000000"), + np.datetime64("2018-03-28T00:41:12.084000000"), + ) + ds.dts.average_monte_carlo_double_ended( + result=out, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, + conf_ints=[2.5, 97.5], + mc_sample_size=50, # <- choose a much larger sample size + ci_avg_time_flag1=True, + ci_avg_time_flag2=False, + ci_avg_time_sel=sl, + ) + ds.dts.average_monte_carlo_double_ended( + result=out, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, + conf_ints=[2.5, 97.5], + mc_sample_size=50, # <- choose a much larger sample size + ci_avg_time_flag1=False, + ci_avg_time_flag2=True, + ci_avg_time_isel=range(3), + ) + pass diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 15a6cc38..03fa60f3 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -1,17 +1,15 @@ import hashlib import os import tempfile -import time import warnings from zipfile import ZipFile import dask.array as da import numpy as np import pytest +import xarray as xr +from xarray import Dataset -from dtscalibration import DataStore -from dtscalibration import open_datastore -from dtscalibration import open_mf_datastore from dtscalibration import read_apsensing_files from dtscalibration import read_sensornet_files from dtscalibration import read_sensortran_files @@ -20,6 +18,7 @@ from dtscalibration.datastore_utils import merge_double_ended from dtscalibration.datastore_utils import shift_double_ended from dtscalibration.datastore_utils import suggest_cable_shift_double_ended +from dtscalibration.dts_accessor import DtsAccessor # noqa: F401 np.random.seed(0) @@ -121,24 +120,23 @@ def test_read_data_from_single_file_single_ended(): assert actual_hash == desired_hash, "The data is not read correctly" -def test_empty_construction(): - ds = DataStore() # noqa: F841 - - def test_repr(): - ds = DataStore() - assert "dtscalibration" in str(ds) - assert "Sections" in str(ds) - + ds = Dataset( + { + "st": (["x", "time"], np.ones((100, 5))), + "ast": (["x", "time"], np.ones((100, 5))), + "probe1Temperature": (["time"], range(5)), + "probe2Temperature": (["time"], range(5)), + }, + coords={"x": range(100), "time": range(5)}, + ) -def test_has_sectionattr_upon_creation(): - ds = DataStore() - assert hasattr(ds, "_sections") - assert isinstance(ds._sections, str) + assert "dtscalibration" in str(ds.dts) + assert "Sections" in str(ds.dts) def test_sections_property(): - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], np.ones((100, 5))), "ast": (["x", "time"], np.ones((100, 5))), @@ -156,15 +154,15 @@ def test_sections_property(): "probe1Temperature": [slice(0.0, 17.0), slice(70.0, 80.0)], # cold bath "probe2Temperature": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath } - ds = set_sections(ds, sections1) + set_sections(ds, sections1) assert isinstance(ds.attrs["_sections"], str) - assert ds.sections == sections1 - assert ds.sections != sections2 + assert ds.dts.sections == sections1 + assert ds.dts.sections != sections2 # test if accepts singleton numpy arrays - ds = set_sections( + set_sections( ds, { "probe1Temperature": [ @@ -176,7 +174,7 @@ def test_sections_property(): def test_io_sections_property(): - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], np.ones((100, 5))), "ast": (["x", "time"], np.ones((100, 5))), @@ -192,7 +190,7 @@ def test_io_sections_property(): } ds["x"].attrs["units"] = "m" - ds = set_sections(ds, sections) + set_sections(ds, sections) # Create a temporary file to write data to. # 'with' method is used so the file is closed by tempfile @@ -204,14 +202,14 @@ def test_io_sections_property(): ds.to_netcdf(path=temppath) try: - ds2 = open_datastore(temppath) + ds2 = xr.open_dataset(temppath) except ValueError as e: if str(e) != "cannot guess the engine, try passing one explicitly": raise warnings.warn("Could not guess engine, defaulted to netcdf4") - ds2 = open_datastore(temppath, engine="netcdf4") + ds2 = xr.open_datastore(temppath, engine="netcdf4") - assert ds.sections == ds2.sections + assert ds.dts.sections == ds2.dts.sections # Close the datastore so the temp file can be removed ds2.close() @@ -494,41 +492,6 @@ def test_read_sensortran_files(): ) -def test_to_mf_netcdf_open_mf_datastore(): - filepath = data_dir_single_ended - ds = read_silixa_files(directory=filepath, file_ext="*.xml") - - with tempfile.TemporaryDirectory() as tmpdirname: - print("created temporary directory", tmpdirname) - - # work around the effects of deafault encoding. - path = os.path.join(tmpdirname, "ds_merged.nc") - - with read_silixa_files(directory=filepath, file_ext="*.xml") as ds: - ds.to_netcdf(path) - - time.sleep(5) # to ensure all is written on Windows and file released - - with open_datastore(path, load_in_memory=True) as ds1: - # Test saving - ds1 = ds1.chunk({"time": 1}) - ds1.to_mf_netcdf( - folder_path=tmpdirname, - filename_preamble="file_", - filename_extension=".nc", - ) - correct_val = float(ds1.st.sum()) - - time.sleep(2) # to ensure all is written on Windows and file released - - # Test loading - path = os.path.join(tmpdirname, "file_*.nc") - - with open_mf_datastore(path=path, load_in_memory=True) as ds2: - test_val = float(ds2.st.sum()) - np.testing.assert_equal(correct_val, test_val) - - def read_data_from_fp_numpy(fp): """ Read the data from a single Silixa xml file. Using a simple approach @@ -566,7 +529,7 @@ def test_resample_datastore(): ds = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") assert ds.time.size == 3 - ds_resampled = DataStore(ds.resample(time="47S").mean()) + ds_resampled = Dataset(ds.resample(time="47S").mean()) assert ds_resampled.time.size == 2 assert ds_resampled.st.dims == ("x", "time"), ( @@ -581,7 +544,7 @@ def test_timeseries_keys(): filepath = data_dir_single_ended ds = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") - k = ds.timeseries_keys + k = ds.dts.get_timeseries_keys() # no false positive for ki in k: diff --git a/tests/test_dtscalibration.py b/tests/test_dtscalibration.py index 24f92951..ec951835 100644 --- a/tests/test_dtscalibration.py +++ b/tests/test_dtscalibration.py @@ -5,11 +5,12 @@ import pytest import scipy.sparse as sp from scipy import stats +from xarray import Dataset -from dtscalibration import DataStore -from dtscalibration import read_silixa_files from dtscalibration.calibrate_utils import wls_sparse from dtscalibration.calibrate_utils import wls_stats +from dtscalibration.dts_accessor import DtsAccessor # noqa: F401 +from dtscalibration.variance_stokes import variance_stokes_exponential np.random.seed(0) @@ -60,897 +61,12 @@ def assert_almost_equal_verbose(actual, desired, verbose=False, **kwargs): pass -@pytest.mark.slow # Execution time ~20 seconds -def test_variance_input_types_single(): - import dask.array as da - - from src.dtscalibration import DataStore - - state = da.random.RandomState(0) - - stokes_m_var = 40.0 - cable_len = 100.0 - nt = 500 - time = np.arange(nt) - x = np.linspace(0.0, cable_len, 100) - ts_cold = np.ones(nt) * 4.0 - ts_warm = np.ones(nt) * 20.0 - - C_p = 15246 - C_m = 2400.0 - dalpha_r = 0.005284 - dalpha_m = 0.004961 - dalpha_p = 0.005607 - gamma = 482.6 - cold_mask = x < 0.5 * cable_len - warm_mask = np.invert(cold_mask) # == False - temp_real = np.ones((len(x), nt)) - temp_real[cold_mask] *= ts_cold + 273.15 - temp_real[warm_mask] *= ts_warm + 273.15 - - st = ( - C_p - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_p * x[:, None]) - * np.exp(-gamma / temp_real) - / (1 - np.exp(-gamma / temp_real)) - ) - ast = ( - C_m - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_m * x[:, None]) - / (1 - np.exp(-gamma / temp_real)) - ) - - st_m = st + stats.norm.rvs(size=st.shape, scale=stokes_m_var**0.5) - ast_m = ast + stats.norm.rvs(size=ast.shape, scale=1.1 * stokes_m_var**0.5) - - print("alphaint", cable_len * (dalpha_p - dalpha_m)) - print("alpha", dalpha_p - dalpha_m) - print("C", np.log(C_p / C_m)) - print("x0", x.max()) - - ds = DataStore( - { - "st": (["x", "time"], st_m), - "ast": (["x", "time"], ast_m), - "userAcquisitionTimeFW": (["time"], np.ones(nt)), - "cold": (["time"], ts_cold), - "warm": (["time"], ts_warm), - }, - coords={"x": x, "time": time}, - attrs={"isDoubleEnded": "0"}, - ) - - sections = { - "cold": [slice(0.0, 0.4 * cable_len)], - "warm": [slice(0.6 * cable_len, cable_len)], - } - - # Test float input - st_var = 5.0 - - ds.calibration_single_ended( - sections=sections, st_var=st_var, ast_var=st_var, method="wls", solver="sparse" - ) - - ds.conf_int_single_ended( - st_var=st_var, - ast_var=st_var, - mc_sample_size=100, - da_random_state=state, - mc_remove_set_flag=False, - ) - - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(0, 10)).mean(), 0.044361, decimal=2 - ) - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(90, 100)).mean(), 0.242028, decimal=2 - ) - - # Test callable input - def callable_st_var(stokes): - slope = 0.01 - offset = 0 - return slope * stokes + offset - - ds.calibration_single_ended( - sections=sections, - st_var=callable_st_var, - ast_var=callable_st_var, - method="wls", - solver="sparse", - ) - - ds.conf_int_single_ended( - st_var=callable_st_var, - ast_var=callable_st_var, - mc_sample_size=100, - da_random_state=state, - ) - - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(0, 10)).mean(), 0.184753, decimal=2 - ) - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(90, 100)).mean(), 0.545186, decimal=2 - ) - - # Test input with shape of (ntime, nx) - st_var = ds.st.values * 0 + 20.0 - ds.calibration_single_ended( - sections=sections, st_var=st_var, ast_var=st_var, method="wls", solver="sparse" - ) - - ds.conf_int_single_ended( - st_var=st_var, ast_var=st_var, mc_sample_size=100, da_random_state=state - ) - - assert_almost_equal_verbose(ds.tmpf_mc_var.mean(), 0.418098, decimal=2) - - # Test input with shape (nx, 1) - st_var = np.vstack( - ds.st.mean(dim="time").values * 0 + np.linspace(10, 50, num=ds.st.x.size) - ) - - ds.calibration_single_ended( - sections=sections, st_var=st_var, ast_var=st_var, method="wls", solver="sparse" - ) - - ds.conf_int_single_ended( - st_var=st_var, ast_var=st_var, mc_sample_size=100, da_random_state=state - ) - - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(0, 50)).mean().values, 0.2377, decimal=2 - ) - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(50, 100)).mean().values, 1.3203, decimal=2 - ) - - # Test input with shape (ntime) - st_var = ds.st.mean(dim="x").values * 0 + np.linspace(5, 200, num=nt) - - ds.calibration_single_ended( - sections=sections, st_var=st_var, ast_var=st_var, method="wls", solver="sparse" - ) - - ds.conf_int_single_ended( - st_var=st_var, ast_var=st_var, mc_sample_size=100, da_random_state=state - ) - - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(time=slice(0, nt // 2)).mean().values, 1.0908, decimal=2 - ) - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(time=slice(nt // 2, None)).mean().values, 3.0759, decimal=2 - ) - - pass - - -@pytest.mark.slow # Execution time ~0.5 minute -def test_variance_input_types_double(): - import dask.array as da - - from src.dtscalibration import DataStore - - state = da.random.RandomState(0) - - stokes_m_var = 40.0 - cable_len = 100.0 - nt = 500 - time = np.arange(nt) - x = np.linspace(0.0, cable_len, 100) - ts_cold = np.ones(nt) * 4.0 - ts_warm = np.ones(nt) * 20.0 - - C_p = 15246 - C_m = 2400.0 - dalpha_r = 0.005284 - dalpha_m = 0.004961 - dalpha_p = 0.005607 - gamma = 482.6 - cold_mask = x < 0.5 * cable_len - warm_mask = np.invert(cold_mask) # == False - temp_real = np.ones((len(x), nt)) - temp_real[cold_mask] *= ts_cold + 273.15 - temp_real[warm_mask] *= ts_warm + 273.15 - - st = ( - C_p - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_p * x[:, None]) - * np.exp(-gamma / temp_real) - / (1 - np.exp(-gamma / temp_real)) - ) - ast = ( - C_m - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_m * x[:, None]) - / (1 - np.exp(-gamma / temp_real)) - ) - rst = ( - C_p - * np.exp(-dalpha_r * (-x[:, None] + 100)) - * np.exp(-dalpha_p * (-x[:, None] + 100)) - * np.exp(-gamma / temp_real) - / (1 - np.exp(-gamma / temp_real)) - ) - rast = ( - C_m - * np.exp(-dalpha_r * (-x[:, None] + 100)) - * np.exp(-dalpha_m * (-x[:, None] + 100)) - / (1 - np.exp(-gamma / temp_real)) - ) - - st_m = st + stats.norm.rvs(size=st.shape, scale=stokes_m_var**0.5) - ast_m = ast + stats.norm.rvs(size=ast.shape, scale=1.1 * stokes_m_var**0.5) - rst_m = rst + stats.norm.rvs(size=rst.shape, scale=0.9 * stokes_m_var**0.5) - rast_m = rast + stats.norm.rvs(size=rast.shape, scale=0.8 * stokes_m_var**0.5) - - print("alphaint", cable_len * (dalpha_p - dalpha_m)) - print("alpha", dalpha_p - dalpha_m) - print("C", np.log(C_p / C_m)) - print("x0", x.max()) - - ds = DataStore( - { - "st": (["x", "time"], st_m), - "ast": (["x", "time"], ast_m), - "rst": (["x", "time"], rst_m), - "rast": (["x", "time"], rast_m), - "userAcquisitionTimeFW": (["time"], np.ones(nt)), - "userAcquisitionTimeBW": (["time"], np.ones(nt)), - "cold": (["time"], ts_cold), - "warm": (["time"], ts_warm), - }, - coords={"x": x, "time": time}, - attrs={"isDoubleEnded": "1"}, - ) - - sections = { - "cold": [slice(0.0, 0.4 * cable_len)], - "warm": [slice(0.6 * cable_len, cable_len)], - } - - # Test float input - st_var = 5.0 - - ds.calibration_double_ended( - sections=sections, - st_var=st_var, - ast_var=st_var, - rst_var=st_var, - rast_var=st_var, - method="wls", - solver="sparse", - ) - - ds.conf_int_double_ended( - sections=sections, - st_var=st_var, - ast_var=st_var, - rst_var=st_var, - rast_var=st_var, - mc_sample_size=100, - da_random_state=state, - ) - - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(0, 10)).mean(), 0.03584935, decimal=2 - ) - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(90, 100)).mean(), 0.22982146, decimal=2 - ) - - # Test callable input - def st_var_callable(stokes): - slope = 0.01 - offset = 0 - return slope * stokes + offset - - ds.calibration_double_ended( - sections=sections, - st_var=st_var_callable, - ast_var=st_var_callable, - rst_var=st_var_callable, - rast_var=st_var_callable, - method="wls", - solver="sparse", - ) - - ds.conf_int_double_ended( - sections=sections, - st_var=st_var_callable, - ast_var=st_var_callable, - rst_var=st_var_callable, - rast_var=st_var_callable, - mc_sample_size=100, - da_random_state=state, - ) - - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(0, 10)).mean(), 0.18058514, decimal=2 - ) - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(90, 100)).mean(), 0.53862813, decimal=2 - ) - - # Test input with shape of (ntime, nx) - st_var = ds.st.values * 0 + 20.0 - - ds.calibration_double_ended( - sections=sections, - st_var=st_var, - ast_var=st_var, - rst_var=st_var, - rast_var=st_var, - method="wls", - solver="sparse", - ) - - ds.conf_int_double_ended( - sections=sections, - st_var=st_var, - ast_var=st_var, - rst_var=st_var, - rast_var=st_var, - mc_sample_size=100, - da_random_state=state, - ) - - assert_almost_equal_verbose(ds.tmpf_mc_var.mean(), 0.40725674, decimal=2) - - # Test input with shape (nx, 1) - st_var = np.vstack( - ds.st.mean(dim="time").values * 0 + np.linspace(10, 50, num=ds.st.x.size) - ) - - ds.calibration_double_ended( - sections=sections, - st_var=st_var, - ast_var=st_var, - rst_var=st_var, - rast_var=st_var, - method="wls", - solver="sparse", - ) - - ds.conf_int_double_ended( - sections=sections, - st_var=st_var, - ast_var=st_var, - rst_var=st_var, - rast_var=st_var, - mc_sample_size=100, - da_random_state=state, - ) - - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(0, 50)).mean().values, 0.21163704, decimal=2 - ) - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(x=slice(50, 100)).mean().values, 1.28247762, decimal=2 - ) - - # Test input with shape (ntime) - st_var = ds.st.mean(dim="x").values * 0 + np.linspace(5, 200, num=nt) - - ds.calibration_double_ended( - sections=sections, - st_var=st_var, - ast_var=st_var, - rst_var=st_var, - rast_var=st_var, - method="wls", - solver="sparse", - ) - - ds.conf_int_double_ended( - sections=sections, - st_var=st_var, - ast_var=st_var, - rst_var=st_var, - rast_var=st_var, - mc_sample_size=100, - da_random_state=state, - ) - - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(time=slice(0, nt // 2)).mean().values, 1.090, decimal=2 - ) - assert_almost_equal_verbose( - ds.tmpf_mc_var.sel(time=slice(nt // 2, None)).mean().values, 3.06, decimal=2 - ) - - pass - - -@pytest.mark.slow # Execution time ~0.5 minute -def test_double_ended_variance_estimate_synthetic(): - import dask.array as da - - from src.dtscalibration import DataStore - - state = da.random.RandomState(0) - - stokes_m_var = 40.0 - cable_len = 100.0 - nt = 500 - time = np.arange(nt) - x = np.linspace(0.0, cable_len, 100) - ts_cold = np.ones(nt) * 4.0 - ts_warm = np.ones(nt) * 20.0 - - C_p = 15246 - C_m = 2400.0 - dalpha_r = 0.0005284 - dalpha_m = 0.0004961 - dalpha_p = 0.0005607 - gamma = 482.6 - cold_mask = x < 0.5 * cable_len - warm_mask = np.invert(cold_mask) # == False - temp_real = np.ones((len(x), nt)) - temp_real[cold_mask] *= ts_cold + 273.15 - temp_real[warm_mask] *= ts_warm + 273.15 - - st = ( - C_p - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_p * x[:, None]) - * np.exp(-gamma / temp_real) - / (1 - np.exp(-gamma / temp_real)) - ) - ast = ( - C_m - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_m * x[:, None]) - / (1 - np.exp(-gamma / temp_real)) - ) - rst = ( - C_p - * np.exp(-dalpha_r * (-x[:, None] + 100)) - * np.exp(-dalpha_p * (-x[:, None] + 100)) - * np.exp(-gamma / temp_real) - / (1 - np.exp(-gamma / temp_real)) - ) - rast = ( - C_m - * np.exp(-dalpha_r * (-x[:, None] + 100)) - * np.exp(-dalpha_m * (-x[:, None] + 100)) - / (1 - np.exp(-gamma / temp_real)) - ) - - st_m = st + stats.norm.rvs(size=st.shape, scale=stokes_m_var**0.5) - ast_m = ast + stats.norm.rvs(size=ast.shape, scale=1.1 * stokes_m_var**0.5) - rst_m = rst + stats.norm.rvs(size=rst.shape, scale=0.9 * stokes_m_var**0.5) - rast_m = rast + stats.norm.rvs(size=rast.shape, scale=0.8 * stokes_m_var**0.5) - - print("alphaint", cable_len * (dalpha_p - dalpha_m)) - print("alpha", dalpha_p - dalpha_m) - print("C", np.log(C_p / C_m)) - print("x0", x.max()) - - ds = DataStore( - { - "st": (["x", "time"], st_m), - "ast": (["x", "time"], ast_m), - "rst": (["x", "time"], rst_m), - "rast": (["x", "time"], rast_m), - "userAcquisitionTimeFW": (["time"], np.ones(nt)), - "userAcquisitionTimeBW": (["time"], np.ones(nt)), - "cold": (["time"], ts_cold), - "warm": (["time"], ts_warm), - }, - coords={"x": x, "time": time}, - attrs={"isDoubleEnded": "1"}, - ) - - sections = { - "cold": [slice(0.0, 0.5 * cable_len)], - "warm": [slice(0.5 * cable_len, cable_len)], - } - - mst_var, _ = ds.variance_stokes(st_label="st", sections=sections) - mast_var, _ = ds.variance_stokes(st_label="ast", sections=sections) - mrst_var, _ = ds.variance_stokes(st_label="rst", sections=sections) - mrast_var, _ = ds.variance_stokes(st_label="rast", sections=sections) - - mst_var = float(mst_var) - mast_var = float(mast_var) - mrst_var = float(mrst_var) - mrast_var = float(mrast_var) - - # MC variance - ds.calibration_double_ended( - sections=sections, - st_var=mst_var, - ast_var=mast_var, - rst_var=mrst_var, - rast_var=mrast_var, - method="wls", - solver="sparse", - ) - - assert_almost_equal_verbose(ds.tmpf.mean(), 12.0, decimal=2) - assert_almost_equal_verbose(ds.tmpb.mean(), 12.0, decimal=3) - - ds.conf_int_double_ended( - sections=sections, - p_val="p_val", - p_cov="p_cov", - st_var=mst_var, - ast_var=mast_var, - rst_var=mrst_var, - rast_var=mrast_var, - conf_ints=[2.5, 50.0, 97.5], - mc_sample_size=100, - da_random_state=state, - ) - - # Calibrated variance - stdsf1 = ds.ufunc_per_section( - sections=sections, label="tmpf", func=np.std, temp_err=True, calc_per="stretch" - ) - stdsb1 = ds.ufunc_per_section( - sections=sections, label="tmpb", func=np.std, temp_err=True, calc_per="stretch" - ) - - # Use a single timestep to better check if the parameter uncertainties propagate - ds1 = ds.isel(time=1) - # Estimated VAR - stdsf2 = ds1.ufunc_per_section( - sections=sections, - label="tmpf_mc_var", - func=np.mean, - temp_err=False, - calc_per="stretch", - ) - stdsb2 = ds1.ufunc_per_section( - sections=sections, - label="tmpb_mc_var", - func=np.mean, - temp_err=False, - calc_per="stretch", - ) - - for (_, v1), (_, v2) in zip(stdsf1.items(), stdsf2.items()): - for v1i, v2i in zip(v1, v2): - print("Real VAR: ", v1i**2, "Estimated VAR: ", float(v2i)) - assert_almost_equal_verbose(v1i**2, v2i, decimal=2) - - for (_, v1), (_, v2) in zip(stdsb1.items(), stdsb2.items()): - for v1i, v2i in zip(v1, v2): - print("Real VAR: ", v1i**2, "Estimated VAR: ", float(v2i)) - assert_almost_equal_verbose(v1i**2, v2i, decimal=2) - - pass - - -def test_single_ended_variance_estimate_synthetic(): - import dask.array as da - - from src.dtscalibration import DataStore - - state = da.random.RandomState(0) - - stokes_m_var = 40.0 - astokes_m_var = 60.0 - cable_len = 100.0 - nt = 50 - time = np.arange(nt) - x = np.linspace(0.0, cable_len, 500) - ts_cold = np.ones(nt) * 4.0 - ts_warm = np.ones(nt) * 20.0 - - C_p = 15246 - C_m = 2400.0 - dalpha_r = 0.0005284 - dalpha_m = 0.0004961 - dalpha_p = 0.0005607 - gamma = 482.6 - cold_mask = x < 0.5 * cable_len - warm_mask = np.invert(cold_mask) # == False - temp_real = np.ones((len(x), nt)) - temp_real[cold_mask] *= ts_cold + 273.15 - temp_real[warm_mask] *= ts_warm + 273.15 - - st = ( - C_p - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_p * x[:, None]) - * np.exp(-gamma / temp_real) - / (1 - np.exp(-gamma / temp_real)) - ) - ast = ( - C_m - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_m * x[:, None]) - / (1 - np.exp(-gamma / temp_real)) - ) - st_m = st + stats.norm.rvs(size=st.shape, scale=stokes_m_var**0.5) - ast_m = ast + stats.norm.rvs(size=ast.shape, scale=astokes_m_var**0.5) - - print("alphaint", cable_len * (dalpha_p - dalpha_m)) - print("alpha", dalpha_p - dalpha_m) - print("C", np.log(C_p / C_m)) - print("x0", x.max()) - - ds = DataStore( - { - "st": (["x", "time"], st_m), - "ast": (["x", "time"], ast_m), - "userAcquisitionTimeFW": (["time"], np.ones(nt)), - "cold": (["time"], ts_cold), - "warm": (["time"], ts_warm), - }, - coords={"x": x, "time": time}, - attrs={"isDoubleEnded": "0"}, - ) - - sections = { - "cold": [slice(0.0, 0.5 * cable_len)], - "warm": [slice(0.5 * cable_len, cable_len)], - } - - st_label = "st" - ast_label = "ast" - - mst_var, _ = ds.variance_stokes(st_label=st_label, sections=sections) - mast_var, _ = ds.variance_stokes(st_label=ast_label, sections=sections) - mst_var = float(mst_var) - mast_var = float(mast_var) - - # MC variqnce - ds.calibration_single_ended( - sections=sections, - st_var=mst_var, - ast_var=mast_var, - method="wls", - solver="sparse", - ) - - ds.conf_int_single_ended( - p_val="p_val", - p_cov="p_cov", - st_var=mst_var, - ast_var=mast_var, - conf_ints=[2.5, 50.0, 97.5], - mc_sample_size=50, - da_random_state=state, - ) - - # Calibrated variance - stdsf1 = ds.ufunc_per_section( - sections=sections, - label="tmpf", - func=np.std, - temp_err=True, - calc_per="stretch", - ddof=1, - ) - - # Use a single timestep to better check if the parameter uncertainties propagate - ds1 = ds.isel(time=1) - # Estimated VAR - stdsf2 = ds1.ufunc_per_section( - sections=sections, - label="tmpf_mc_var", - func=np.mean, - temp_err=False, - calc_per="stretch", - ) - - for (_, v1), (_, v2) in zip(stdsf1.items(), stdsf2.items()): - for v1i, v2i in zip(v1, v2): - print("Real VAR: ", v1i**2, "Estimated VAR: ", float(v2i)) - assert_almost_equal_verbose(v1i**2, v2i, decimal=2) - - pass - - -@pytest.mark.skip(reason="Not enough measurements in time. Use exponential " "instead.") -def test_variance_of_stokes(): - correct_var = 9.045 - filepath = data_dir_double_ended2 - ds = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") - sections = { - "probe1Temperature": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath - "probe2Temperature": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath - } - - I_var, _ = ds.variance_stokes(st_label="st", sections=sections) - assert_almost_equal_verbose(I_var, correct_var, decimal=1) - - ds_dask = ds.chunk(chunks={}) - I_var, _ = ds_dask.variance_stokes(st_label="st", sections=sections) - assert_almost_equal_verbose(I_var, correct_var, decimal=1) - - pass - - -def test_variance_of_stokes_synthetic(): - """ - Produces a synthetic Stokes measurement with a known noise distribution. Check if same - variance is obtained. - - Returns - ------- - - """ - yvar = 5.0 - - nx = 500 - x = np.linspace(0.0, 20.0, nx) - - nt = 200 - G = np.linspace(3000, 4000, nt)[None] - - y = G * np.exp(-0.001 * x[:, None]) - - y += stats.norm.rvs(size=y.size, scale=yvar**0.5).reshape(y.shape) - - ds = DataStore( - { - "st": (["x", "time"], y), - "probe1Temperature": (["time"], range(nt)), - "userAcquisitionTimeFW": (["time"], np.ones(nt)), - }, - coords={"x": x, "time": range(nt)}, - attrs={"isDoubleEnded": "0"}, - ) - - sections = {"probe1Temperature": [slice(0.0, 20.0)]} - test_st_var, _ = ds.variance_stokes(st_label="st", sections=sections) - - assert_almost_equal_verbose(test_st_var, yvar, decimal=1) - - test_st_var, _ = ds.variance_stokes(st_label="st", sections=sections) - - assert_almost_equal_verbose(test_st_var, yvar, decimal=1) - pass - - -@pytest.mark.slow # Execution time ~20 seconds -def test_variance_of_stokes_linear_synthetic(): - """ - Produces a synthetic Stokes measurement with a known noise distribution. - Check if same variance is obtained. - - Returns - ------- - - """ - var_slope = 0.01 - - nx = 500 - x = np.linspace(0.0, 20.0, nx) - - nt = 200 - G = np.linspace(500, 4000, nt)[None] - c_no_noise = G * np.exp(-0.001 * x[:, None]) - - c_lin_var_through_zero = stats.norm.rvs( - loc=c_no_noise, - # size=y.size, - scale=(var_slope * c_no_noise) ** 0.5, - ) - ds = DataStore( - { - "st": (["x", "time"], c_no_noise), - "c_lin_var_through_zero": (["x", "time"], c_lin_var_through_zero), - "probe1Temperature": (["time"], range(nt)), - "userAcquisitionTimeFW": (["time"], np.ones(nt)), - }, - coords={"x": x, "time": range(nt)}, - attrs={"isDoubleEnded": "0"}, - ) - - sections = {"probe1Temperature": [slice(0.0, 20.0)]} - test_st_var, _ = ds.variance_stokes(st_label="st", sections=sections) - - # If fit is forced through zero. Only Poisson distributed noise - ( - slope, - offset, - st_sort_mean, - st_sort_var, - resid, - var_fun, - ) = ds.variance_stokes_linear( - "c_lin_var_through_zero", - sections=sections, - nbin=10, - through_zero=True, - plot_fit=False, - ) - assert_almost_equal_verbose(slope, var_slope, decimal=3) - - # Fit accounts for Poisson noise plus white noise - ( - slope, - offset, - st_sort_mean, - st_sort_var, - resid, - var_fun, - ) = ds.variance_stokes_linear( - "c_lin_var_through_zero", sections=sections, nbin=100, through_zero=False - ) - assert_almost_equal_verbose(slope, var_slope, decimal=3) - assert_almost_equal_verbose(offset, 0.0, decimal=0) - - pass - - -@pytest.mark.slow # Execution time ~20 seconds -def test_exponential_variance_of_stokes(): - correct_var = 11.86535 - filepath = data_dir_double_ended2 - ds = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") - sections = { - "probe1Temperature": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath - "probe2Temperature": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath - } - - I_var, _ = ds.variance_stokes_exponential(st_label="st", sections=sections) - assert_almost_equal_verbose(I_var, correct_var, decimal=5) - - ds_dask = ds.chunk(chunks={}) - I_var, _ = ds_dask.variance_stokes_exponential(st_label="st", sections=sections) - assert_almost_equal_verbose(I_var, correct_var, decimal=5) - - pass - - -def test_exponential_variance_of_stokes_synthetic(): - """ - Produces a synthetic Stokes measurement with a known noise distribution. Check if same - variance is obtained. - - Returns - ------- - - """ - yvar = 5.0 - - nx = 500 - x = np.linspace(0.0, 20.0, nx) - - nt = 200 - beta = np.linspace(3000, 4000, nt)[None] - - y = beta * np.exp(-0.001 * x[:, None]) - - y += stats.norm.rvs(size=y.size, scale=yvar**0.5).reshape(y.shape) - - ds = DataStore( - { - "st": (["x", "time"], y), - "probe1Temperature": (["time"], range(nt)), - "userAcquisitionTimeFW": (["time"], np.ones(nt)), - }, - coords={"x": x, "time": range(nt)}, - attrs={"isDoubleEnded": "0"}, - ) - - sections = {"probe1Temperature": [slice(0.0, 20.0)]} - test_st_var, _ = ds.variance_stokes_exponential(st_label="st", sections=sections) - - assert_almost_equal_verbose(test_st_var, yvar, decimal=1) - pass - - def test_double_ended_wls_estimate_synthetic(): """Checks whether the coefficients are correctly defined by creating a synthetic measurement set, and derive the parameters from this set. Without variance. They should be the same as the parameters used to create the synthetic measurment set""" - from dtscalibration import DataStore - cable_len = 100.0 nt = 50 time = np.arange(nt) @@ -996,7 +112,7 @@ def test_double_ended_wls_estimate_synthetic(): alpha = np.mean(np.log(rst / rast) - np.log(st / ast), axis=1) / 2 alpha -= alpha[0] # the first x-index is where to start counting - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -1017,7 +133,7 @@ def test_double_ended_wls_estimate_synthetic(): } # WLS - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1e-7, ast_var=1e-7, @@ -1027,11 +143,11 @@ def test_double_ended_wls_estimate_synthetic(): solver="sparse", ) - assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=10) - assert_almost_equal_verbose(ds.alpha.values, alpha, decimal=8) - assert_almost_equal_verbose(ds.tmpf.values, temp_real - 273.15, decimal=6) - assert_almost_equal_verbose(ds.tmpb.values, temp_real - 273.15, decimal=6) - assert_almost_equal_verbose(ds.tmpw.values, temp_real - 273.15, decimal=6) + assert_almost_equal_verbose(out["gamma"].values, gamma, decimal=10) + assert_almost_equal_verbose(out["alpha"].values, alpha, decimal=8) + assert_almost_equal_verbose(out["tmpf"].values, temp_real - 273.15, decimal=6) + assert_almost_equal_verbose(out["tmpb"].values, temp_real - 273.15, decimal=6) + assert_almost_equal_verbose(out["tmpw"].values, temp_real - 273.15, decimal=6) def test_double_ended_wls_estimate_synthetic_df_and_db_are_different(): @@ -1041,7 +157,6 @@ def test_double_ended_wls_estimate_synthetic_df_and_db_are_different(): They should be the same as the parameters used to create the synthetic measurment set. This one has a different D for the forward channel than for the backward channel.""" - from dtscalibration import DataStore cable_len = 100.0 nt = 3 @@ -1110,7 +225,7 @@ def test_double_ended_wls_estimate_synthetic_df_and_db_are_different(): E_real = (i_bw - i_fw) / 2 + (db - df) / 2 - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -1132,126 +247,7 @@ def test_double_ended_wls_estimate_synthetic_df_and_db_are_different(): real_ans2 = np.concatenate(([gamma], df, db, E_real[:, 0])) - ds.calibration_double_ended( - sections=sections, - st_var=1.5, - ast_var=1.5, - rst_var=1.0, - rast_var=1.0, - method="wls", - solver="sparse", - fix_gamma=(gamma, 0.0), - ) - - assert_almost_equal_verbose(df, ds.df.values, decimal=14) - assert_almost_equal_verbose(db, ds.db.values, decimal=13) - assert_almost_equal_verbose( - x * (dalpha_p - dalpha_m), ds.alpha.values - ds.alpha.values[0], decimal=13 - ) - assert np.all(np.abs(real_ans2 - ds.p_val.values) < 1e-10) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=10) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=10) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=10) - pass - - -def test_reneaming_old_default_labels_to_new_fixed_labels(): - """Same as - `test_double_ended_wls_estimate_synthetic_df_and_db_are_different` - Which runs fast, but using the renaming function.""" - from dtscalibration import DataStore - - cable_len = 100.0 - nt = 3 - time = np.arange(nt) - x = np.linspace(0.0, cable_len, 8) - ts_cold = np.ones(nt) * 4.0 + np.cos(time) * 4 - ts_warm = np.ones(nt) * 20.0 + -np.sin(time) * 4 - - C_p = 1324 # 1/2 * E0 * v * K_+/lam_+^4 - eta_pf = np.cos(time) / 10 + 1 # eta_+ (gain factor forward channel) - eta_pb = np.sin(time) / 10 + 1 # eta_- (gain factor backward channel) - C_m = 5000.0 - eta_mf = np.cos(time + np.pi / 8) / 10 + 1 - eta_mb = np.sin(time + np.pi / 8) / 10 + 1 - dalpha_r = 0.005284 - dalpha_m = 0.004961 - dalpha_p = 0.005607 - gamma = 482.6 - - temp_real_kelvin = np.zeros((len(x), nt)) + 273.15 - temp_real_kelvin[x < 0.2 * cable_len] += ts_cold[None] - temp_real_kelvin[x > 0.85 * cable_len] += ts_warm[None] - temp_real_celsius = temp_real_kelvin - 273.15 - - st = ( - eta_pf[None] - * C_p - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_p * x[:, None]) - * np.exp(gamma / temp_real_kelvin) - / (np.exp(gamma / temp_real_kelvin) - 1) - ) - ast = ( - eta_mf[None] - * C_m - * np.exp(-dalpha_r * x[:, None]) - * np.exp(-dalpha_m * x[:, None]) - / (np.exp(gamma / temp_real_kelvin) - 1) - ) - rst = ( - eta_pb[None] - * C_p - * np.exp(-dalpha_r * (-x[:, None] + cable_len)) - * np.exp(-dalpha_p * (-x[:, None] + cable_len)) - * np.exp(gamma / temp_real_kelvin) - / (np.exp(gamma / temp_real_kelvin) - 1) - ) - rast = ( - eta_mb[None] - * C_m - * np.exp(-dalpha_r * (-x[:, None] + cable_len)) - * np.exp(-dalpha_m * (-x[:, None] + cable_len)) - / (np.exp(gamma / temp_real_kelvin) - 1) - ) - - c_f = np.log(eta_mf * C_m / (eta_pf * C_p)) - c_b = np.log(eta_mb * C_m / (eta_pb * C_p)) - - dalpha = dalpha_p - dalpha_m # \Delta\alpha - alpha_int = cable_len * dalpha - - df = c_f # reference section starts at first x-index - db = c_b + alpha_int - i_fw = np.log(st / ast) - i_bw = np.log(rst / rast) - - E_real = (i_bw - i_fw) / 2 + (db - df) / 2 - - ds = DataStore( - { - "ST": (["x", "time"], st), - "AST": (["x", "time"], ast), - "REV-ST": (["x", "time"], rst), - "REV-AST": (["x", "time"], rast), - "userAcquisitionTimeFW": (["time"], np.ones(nt)), - "userAcquisitionTimeBW": (["time"], np.ones(nt)), - "cold": (["time"], ts_cold), - "warm": (["time"], ts_warm), - }, - coords={"x": x, "time": time}, - attrs={"isDoubleEnded": "1"}, - ) - ds = ds.rename_labels() - - sections = { - "cold": [slice(0.0, 0.09 * cable_len)], - "warm": [slice(0.9 * cable_len, cable_len)], - } - - real_ans2 = np.concatenate(([gamma], df, db, E_real[:, 0])) - - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1.5, ast_var=1.5, @@ -1262,15 +258,17 @@ def test_reneaming_old_default_labels_to_new_fixed_labels(): fix_gamma=(gamma, 0.0), ) - assert_almost_equal_verbose(df, ds.df.values, decimal=14) - assert_almost_equal_verbose(db, ds.db.values, decimal=13) + assert_almost_equal_verbose(df, out["df"].values, decimal=14) + assert_almost_equal_verbose(db, out["db"].values, decimal=13) assert_almost_equal_verbose( - x * (dalpha_p - dalpha_m), ds.alpha.values - ds.alpha.values[0], decimal=13 - ) - assert np.all(np.abs(real_ans2 - ds.p_val.values) < 1e-10) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=10) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=10) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=10) + x * (dalpha_p - dalpha_m), + out["alpha"].values - out["alpha"].values[0], + decimal=13, + ) + assert np.all(np.abs(real_ans2 - out["p_val"].values) < 1e-10) + assert_almost_equal_verbose(temp_real_celsius, out["tmpf"].values, decimal=10) + assert_almost_equal_verbose(temp_real_celsius, out["tmpb"].values, decimal=10) + assert_almost_equal_verbose(temp_real_celsius, out["tmpw"].values, decimal=10) pass @@ -1279,7 +277,6 @@ def test_fail_if_st_labels_are_passed_to_calibration_function(): """Same as `test_double_ended_wls_estimate_synthetic_df_and_db_are_different` Which runs fast.""" - from dtscalibration import DataStore cable_len = 100.0 nt = 3 @@ -1334,7 +331,7 @@ def test_fail_if_st_labels_are_passed_to_calibration_function(): / (np.exp(gamma / temp_real_kelvin) - 1) ) - ds = DataStore( + ds = Dataset( { "ST": (["x", "time"], st), "AST": (["x", "time"], ast), @@ -1355,7 +352,7 @@ def test_fail_if_st_labels_are_passed_to_calibration_function(): "warm": [slice(0.9 * cable_len, cable_len)], } - ds.calibration_double_ended( + ds.dts.calibrate_double_ended( sections=sections, st_label="ST", ast_label="AST", @@ -1375,8 +372,6 @@ def test_fail_if_st_labels_are_passed_to_calibration_function(): def test_double_ended_asymmetrical_attenuation(): - from dtscalibration import DataStore - cable_len = 100.0 nt = 3 time = np.arange(nt) @@ -1441,7 +436,7 @@ def test_double_ended_asymmetrical_attenuation(): / (np.exp(gamma / temp_real_kelvin) - 1) ) - ds = DataStore( + ds = Dataset( { "TMPR": (["x", "time"], temp_real_celsius), "st": (["x", "time"], st), @@ -1454,7 +449,6 @@ def test_double_ended_asymmetrical_attenuation(): "warm": (["time"], ts_warm), }, coords={"x": x, "time": time}, - attrs={"isDoubleEnded": "1"}, ) sections = { @@ -1465,37 +459,7 @@ def test_double_ended_asymmetrical_attenuation(): ], } - ds.calibration_double_ended( - sections=sections, - st_var=1.5, - ast_var=1.5, - rst_var=1.0, - rast_var=1.0, - method="wls", - solver="sparse", - trans_att=[50.0], - ) - - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=7) - - # test `trans_att` related functions - - # Clear out old results - ds.set_trans_att([]) - - assert ds.trans_att.size == 0, "clear out trans_att config" - - del_keys = [] - for k, v in ds.data_vars.items(): - if "trans_att" in v.dims: - del_keys.append(k) - - assert len(del_keys) == 0, "clear out trans_att config" - - # About to be depreciated - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1.5, ast_var=1.5, @@ -1506,15 +470,13 @@ def test_double_ended_asymmetrical_attenuation(): trans_att=[50.0], ) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out.tmpf.values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out.tmpb.values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out.tmpw.values, decimal=7) pass def test_double_ended_one_matching_section_and_one_asym_att(): - from dtscalibration import DataStore - cable_len = 100.0 nt = 3 time = np.arange(nt) @@ -1579,7 +541,7 @@ def test_double_ended_one_matching_section_and_one_asym_att(): / (np.exp(gamma / temp_real_kelvin) - 1) ) - ds = DataStore( + ds = Dataset( { "TMPR": (["x", "time"], temp_real_celsius), "st": (["x", "time"], st), @@ -1600,7 +562,7 @@ def test_double_ended_one_matching_section_and_one_asym_att(): "warm": [slice(x[nx_per_sec], x[2 * nx_per_sec - 1])], } - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1.5, ast_var=1.5, @@ -1618,15 +580,14 @@ def test_double_ended_one_matching_section_and_one_asym_att(): ], ) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpf"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpb"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpw"].values, decimal=7) def test_double_ended_two_matching_sections_and_two_asym_atts(): """Setup contains two matching sections and two connectors that introduce asymmetrical attenuation. Solves beautifully.""" - from dtscalibration import DataStore cable_len = 100.0 nt = 5 @@ -1698,7 +659,7 @@ def test_double_ended_two_matching_sections_and_two_asym_atts(): / (np.exp(gamma / temp_real_kelvin) - 1) ) - ds = DataStore( + ds = Dataset( { "TMPR": (["x", "time"], temp_real_celsius), "st": (["x", "time"], st), @@ -1731,7 +692,7 @@ def test_double_ended_two_matching_sections_and_two_asym_atts(): ), ] - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=0.5, ast_var=0.5, @@ -1743,9 +704,9 @@ def test_double_ended_two_matching_sections_and_two_asym_atts(): matching_sections=ms, ) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpf"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpb"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpw"].values, decimal=7) pass @@ -1755,7 +716,6 @@ def test_double_ended_wls_fix_gamma_estimate_synthetic(): Without variance. They should be the same as the parameters used to create the synthetic measurment set""" - from dtscalibration import DataStore cable_len = 100.0 nt = 500 @@ -1807,7 +767,7 @@ def test_double_ended_wls_fix_gamma_estimate_synthetic(): # to ensure the st, rst, ast, rast were correctly defined. np.testing.assert_allclose(alpha2, alpha, atol=1e-15, rtol=0) - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -1828,7 +788,7 @@ def test_double_ended_wls_fix_gamma_estimate_synthetic(): } # WLS - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1e-12, ast_var=1e-12, @@ -1839,11 +799,11 @@ def test_double_ended_wls_fix_gamma_estimate_synthetic(): fix_gamma=(gamma, 0.0), ) - assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=18) - assert_almost_equal_verbose(ds.alpha.values, alpha, decimal=9) - assert_almost_equal_verbose(ds.tmpf.values, temp_real - 273.15, decimal=6) - assert_almost_equal_verbose(ds.tmpb.values, temp_real - 273.15, decimal=6) - assert_almost_equal_verbose(ds.tmpw.values, temp_real - 273.15, decimal=6) + assert_almost_equal_verbose(out["gamma"].values, gamma, decimal=18) + assert_almost_equal_verbose(out["alpha"].values, alpha, decimal=9) + assert_almost_equal_verbose(out["tmpf"].values, temp_real - 273.15, decimal=6) + assert_almost_equal_verbose(out["tmpb"].values, temp_real - 273.15, decimal=6) + assert_almost_equal_verbose(out["tmpw"].values, temp_real - 273.15, decimal=6) pass @@ -1854,7 +814,6 @@ def test_double_ended_wls_fix_alpha_estimate_synthetic(): Without variance. They should be the same as the parameters used to create the synthetic measurment set""" - from dtscalibration import DataStore cable_len = 100.0 nt = 500 @@ -1901,7 +860,7 @@ def test_double_ended_wls_fix_alpha_estimate_synthetic(): alpha = np.mean(np.log(rst / rast) - np.log(st / ast), axis=1) / 2 alpha -= alpha[0] # the first x-index is where to start counting - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -1922,7 +881,7 @@ def test_double_ended_wls_fix_alpha_estimate_synthetic(): } # WLS - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1e-7, ast_var=1e-7, @@ -1933,11 +892,11 @@ def test_double_ended_wls_fix_alpha_estimate_synthetic(): fix_alpha=(alpha, np.zeros_like(alpha)), ) - assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=8) - assert_almost_equal_verbose(ds.alpha.values, alpha, decimal=18) - assert_almost_equal_verbose(ds.tmpf.values, temp_real - 273.15, decimal=7) - assert_almost_equal_verbose(ds.tmpb.values, temp_real - 273.15, decimal=7) - assert_almost_equal_verbose(ds.tmpw.values, temp_real - 273.15, decimal=7) + assert_almost_equal_verbose(out["gamma"].values, gamma, decimal=8) + assert_almost_equal_verbose(out["alpha"].values, alpha, decimal=18) + assert_almost_equal_verbose(out["tmpf"].values, temp_real - 273.15, decimal=7) + assert_almost_equal_verbose(out["tmpb"].values, temp_real - 273.15, decimal=7) + assert_almost_equal_verbose(out["tmpw"].values, temp_real - 273.15, decimal=7) pass @@ -1948,7 +907,6 @@ def test_double_ended_wls_fix_alpha_fix_gamma_estimate_synthetic(): Without variance. They should be the same as the parameters used to create the synthetic measurment set""" - from dtscalibration import DataStore cable_len = 100.0 nt = 500 @@ -1995,7 +953,7 @@ def test_double_ended_wls_fix_alpha_fix_gamma_estimate_synthetic(): alpha = np.mean(np.log(rst / rast) - np.log(st / ast), axis=1) / 2 alpha -= alpha[0] # the first x-index is where to start counting - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -2016,7 +974,7 @@ def test_double_ended_wls_fix_alpha_fix_gamma_estimate_synthetic(): } # WLS - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1e-7, ast_var=1e-7, @@ -2028,18 +986,16 @@ def test_double_ended_wls_fix_alpha_fix_gamma_estimate_synthetic(): fix_alpha=(alpha, np.zeros_like(alpha)), ) - assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=18) - assert_almost_equal_verbose(ds.alpha.values, alpha, decimal=18) - assert_almost_equal_verbose(ds.tmpf.values, temp_real - 273.15, decimal=11) - assert_almost_equal_verbose(ds.tmpb.values, temp_real - 273.15, decimal=11) - assert_almost_equal_verbose(ds.tmpw.values, temp_real - 273.15, decimal=11) + assert_almost_equal_verbose(out["gamma"].values, gamma, decimal=18) + assert_almost_equal_verbose(out["alpha"].values, alpha, decimal=18) + assert_almost_equal_verbose(out["tmpf"].values, temp_real - 273.15, decimal=11) + assert_almost_equal_verbose(out["tmpb"].values, temp_real - 273.15, decimal=11) + assert_almost_equal_verbose(out["tmpw"].values, temp_real - 273.15, decimal=11) pass def test_double_ended_fix_alpha_matching_sections_and_one_asym_att(): - from dtscalibration import DataStore - cable_len = 100.0 nt = 3 time = np.arange(nt) @@ -2104,7 +1060,7 @@ def test_double_ended_fix_alpha_matching_sections_and_one_asym_att(): / (np.exp(gamma / temp_real_kelvin) - 1) ) - ds = DataStore( + ds = Dataset( { "TMPR": (["x", "time"], temp_real_celsius), "st": (["x", "time"], st), @@ -2125,7 +1081,7 @@ def test_double_ended_fix_alpha_matching_sections_and_one_asym_att(): "warm": [slice(x[nx_per_sec], x[2 * nx_per_sec - 1])], } - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1.5, ast_var=1.5, @@ -2147,12 +1103,12 @@ def test_double_ended_fix_alpha_matching_sections_and_one_asym_att(): k = ["talpha_fw", "talpha_bw", "trans_att"] for ki in k: - del ds[ki] + del out[ki] - alpha_adj = ds.alpha.values.copy() - alpha_var_adj = ds.alpha_var.values.copy() + alpha_adj = out["alpha"].values.copy() + alpha_var_adj = out["alpha_var"].values.copy() - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1.5, ast_var=1.5, @@ -2171,15 +1127,13 @@ def test_double_ended_fix_alpha_matching_sections_and_one_asym_att(): ], ) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpf"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpb"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpw"].values, decimal=7) pass def test_double_ended_fix_alpha_gamma_matching_sections_and_one_asym_att(): - from dtscalibration import DataStore - cable_len = 100.0 nt = 3 time = np.arange(nt) @@ -2244,7 +1198,7 @@ def test_double_ended_fix_alpha_gamma_matching_sections_and_one_asym_att(): / (np.exp(gamma / temp_real_kelvin) - 1) ) - ds = DataStore( + ds = Dataset( { "TMPR": (["x", "time"], temp_real_celsius), "st": (["x", "time"], st), @@ -2265,7 +1219,7 @@ def test_double_ended_fix_alpha_gamma_matching_sections_and_one_asym_att(): "warm": [slice(x[nx_per_sec], x[2 * nx_per_sec - 1])], } - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1.5, ast_var=1.5, @@ -2287,12 +1241,12 @@ def test_double_ended_fix_alpha_gamma_matching_sections_and_one_asym_att(): k = ["talpha_fw", "talpha_bw", "trans_att"] for ki in k: - del ds[ki] + del out[ki] - alpha_adj = ds.alpha.values.copy() - alpha_var_adj = ds.alpha_var.values.copy() + alpha_adj = out["alpha"].values.copy() + alpha_var_adj = out["alpha_var"].values.copy() - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1.5, ast_var=1.5, @@ -2312,15 +1266,13 @@ def test_double_ended_fix_alpha_gamma_matching_sections_and_one_asym_att(): ], ) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpf"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpb"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpw"].values, decimal=7) pass def test_double_ended_fix_gamma_matching_sections_and_one_asym_att(): - from dtscalibration import DataStore - cable_len = 100.0 nt = 3 time = np.arange(nt) @@ -2385,7 +1337,7 @@ def test_double_ended_fix_gamma_matching_sections_and_one_asym_att(): / (np.exp(gamma / temp_real_kelvin) - 1) ) - ds = DataStore( + ds = Dataset( { "TMPR": (["x", "time"], temp_real_celsius), "st": (["x", "time"], st), @@ -2406,7 +1358,7 @@ def test_double_ended_fix_gamma_matching_sections_and_one_asym_att(): "warm": [slice(x[nx_per_sec], x[2 * nx_per_sec - 1])], } - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=1.5, ast_var=1.5, @@ -2425,9 +1377,9 @@ def test_double_ended_fix_gamma_matching_sections_and_one_asym_att(): ], ) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpb.values, decimal=7) - assert_almost_equal_verbose(temp_real_celsius, ds.tmpw.values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpf"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpb"].values, decimal=7) + assert_almost_equal_verbose(temp_real_celsius, out["tmpw"].values, decimal=7) pass @@ -2437,8 +1389,6 @@ def test_double_ended_fix_gamma_matching_sections_and_one_asym_att(): def test_double_ended_exponential_variance_estimate_synthetic(): import dask.array as da - from dtscalibration import DataStore - state = da.random.RandomState(0) stokes_m_var = 4.0 @@ -2499,7 +1449,7 @@ def test_double_ended_exponential_variance_estimate_synthetic(): print("C", np.log(C_p / C_m)) print("x0", x.max()) - ds = DataStore( + ds = Dataset( { # 'st': (['x', 'time'], st), # 'ast': (['x', 'time'], ast), @@ -2529,7 +1479,7 @@ def test_double_ended_exponential_variance_estimate_synthetic(): rast_label = "rast" # MC variance - ds.calibration_double_ended( + ds.dts.calibrate_double_ended( sections=sections, st_label=st_label, ast_label=ast_label, @@ -2604,8 +1554,6 @@ def test_double_ended_exponential_variance_estimate_synthetic(): def test_estimate_variance_of_temperature_estimate(): import dask.array as da - from dtscalibration import DataStore - state = da.random.RandomState(0) stokes_m_var = 0.1 @@ -2669,7 +1617,7 @@ def test_estimate_variance_of_temperature_estimate(): print("C", np.log(C_p / C_m)) print("x0", x.max()) - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st_m), "ast": (["x", "time"], ast_m), @@ -2689,7 +1637,7 @@ def test_estimate_variance_of_temperature_estimate(): "warm": [slice(0.5 * cable_len, 0.75 * cable_len)], } # MC variance - ds.calibration_double_ended( + out = ds.dts.calibrate_double_ended( sections=sections, st_var=mst_var, ast_var=mast_var, @@ -2701,15 +1649,13 @@ def test_estimate_variance_of_temperature_estimate(): solver="stats", ) - ds.conf_int_double_ended( - sections=sections, - p_val="p_val", - p_cov="p_cov", + out2 = ds.dts.monte_carlo_double_ended( + result=out, st_var=mst_var, ast_var=mast_var, rst_var=mrst_var, rast_var=mrast_var, - # conf_ints=[20., 80.], + conf_ints=[], mc_sample_size=nmc, da_random_state=state, mc_remove_set_flag=False, @@ -2717,41 +1663,41 @@ def test_estimate_variance_of_temperature_estimate(): ) assert_almost_equal_verbose( - (ds.r_st - ds.st).var(dim=["mc", "time"]), mst_var, decimal=2 + (out2["r_st"] - ds["st"]).var(dim=["mc", "time"]), mst_var, decimal=2 ) assert_almost_equal_verbose( - (ds.r_ast - ds.ast).var(dim=["mc", "time"]), mast_var, decimal=2 + (out2["r_ast"] - ds["ast"]).var(dim=["mc", "time"]), mast_var, decimal=2 ) assert_almost_equal_verbose( - (ds.r_rst - ds.rst).var(dim=["mc", "time"]), mrst_var, decimal=2 + (out2["r_rst"] - ds["rst"]).var(dim=["mc", "time"]), mrst_var, decimal=2 ) assert_almost_equal_verbose( - (ds.r_rast - ds.rast).var(dim=["mc", "time"]), mrast_var, decimal=3 + (out2["r_rast"] - ds["rast"]).var(dim=["mc", "time"]), mrast_var, decimal=3 ) - assert_almost_equal_verbose(ds.gamma_mc.var(dim="mc"), 0.0, decimal=2) - assert_almost_equal_verbose(ds.alpha_mc.var(dim="mc"), 0.0, decimal=8) - assert_almost_equal_verbose(ds.df_mc.var(dim="mc"), ds.df_var, decimal=8) - assert_almost_equal_verbose(ds.db_mc.var(dim="mc"), ds.db_var, decimal=8) + assert_almost_equal_verbose(out2["gamma_mc"].var(dim="mc"), 0.0, decimal=2) + assert_almost_equal_verbose(out2["alpha_mc"].var(dim="mc"), 0.0, decimal=8) + assert_almost_equal_verbose(out2["df_mc"].var(dim="mc"), out["df_var"], decimal=8) + assert_almost_equal_verbose(out2["db_mc"].var(dim="mc"), out["db_var"], decimal=8) # tmpf temp_real2 = temp_real[:, 0] - 273.15 - actual = (ds.tmpf - temp_real2[:, None]).var(dim="time") - desire2 = ds.tmpf_var.mean(dim="time") + actual = (out["tmpf"] - temp_real2[:, None]).var(dim="time") + desire2 = out["tmpf_var"].mean(dim="time") # Validate on sections that were not used for calibration. assert_almost_equal_verbose(actual[16:32], desire2[16:32], decimal=2) # tmpb - actual = (ds.tmpb - temp_real2[:, None]).var(dim="time") - desire2 = ds.tmpb_var.mean(dim="time") + actual = (out["tmpb"] - temp_real2[:, None]).var(dim="time") + desire2 = out["tmpb_var"].mean(dim="time") # Validate on sections that were not used for calibration. assert_almost_equal_verbose(actual[16:32], desire2[16:32], decimal=2) # tmpw - actual = (ds.tmpw - temp_real2[:, None]).var(dim="time") - desire2 = ds.tmpw_var.mean(dim="time") + actual = (out["tmpw"] - temp_real2[:, None]).var(dim="time") + desire2 = out["tmpw_var"].mean(dim="time") assert_almost_equal_verbose(actual[16:32], desire2[16:32], decimal=3) pass @@ -2764,8 +1710,6 @@ def test_single_ended_wls_estimate_synthetic(): They should be the same as the parameters used to create the synthetic measurment set""" - from dtscalibration import DataStore - cable_len = 100.0 nt = 50 time = np.arange(nt) @@ -2804,7 +1748,7 @@ def test_single_ended_wls_estimate_synthetic(): print("C", np.log(C_p / C_m)) print("x0", x.max()) - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -2822,13 +1766,13 @@ def test_single_ended_wls_estimate_synthetic(): } # WLS - ds.calibration_single_ended( + out = ds.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, method="wls", solver="sparse" ) - assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=6) - assert_almost_equal_verbose(ds.dalpha.values, dalpha_p - dalpha_m, decimal=8) - assert_almost_equal_verbose(ds.tmpf.values, temp_real - 273.15, decimal=4) + assert_almost_equal_verbose(out["gamma"].values, gamma, decimal=6) + assert_almost_equal_verbose(out["dalpha"].values, dalpha_p - dalpha_m, decimal=8) + assert_almost_equal_verbose(out["tmpf"].values, temp_real - 273.15, decimal=4) pass @@ -2840,8 +1784,6 @@ def test_single_ended_wls_fix_dalpha_synthetic(): They should be the same as the parameters used to create the synthetic measurment set""" - from dtscalibration import DataStore - cable_len = 100.0 nt = 50 time = np.arange(nt) @@ -2880,7 +1822,7 @@ def test_single_ended_wls_fix_dalpha_synthetic(): print("C", np.log(C_p / C_m)) print("x0", x.max()) - ds_ori = DataStore( + ds_ori = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -2899,7 +1841,7 @@ def test_single_ended_wls_fix_dalpha_synthetic(): # Test fix_dalpha ds_dalpha = ds_ori.copy() - ds_dalpha.calibration_single_ended( + out = ds_dalpha.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -2907,19 +1849,14 @@ def test_single_ended_wls_fix_dalpha_synthetic(): solver="sparse", fix_dalpha=(dalpha_p - dalpha_m, 0.0), ) - - assert_almost_equal_verbose(ds_dalpha.gamma.values, gamma, decimal=12) - assert_almost_equal_verbose( - ds_dalpha.dalpha.values, dalpha_p - dalpha_m, decimal=14 - ) - assert_almost_equal_verbose( - ds_dalpha.alpha.values, x * (dalpha_p - dalpha_m), decimal=14 - ) - assert_almost_equal_verbose(ds_dalpha.tmpf.values, temp_real - 273.15, decimal=10) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=12) + assert_almost_equal_verbose(out.dalpha.values, dalpha_p - dalpha_m, decimal=14) + assert_almost_equal_verbose(out.alpha.values, x * (dalpha_p - dalpha_m), decimal=14) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=10) # Test fix_alpha ds_alpha = ds_ori.copy() - ds_alpha.calibration_single_ended( + out = ds_alpha.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -2928,11 +1865,9 @@ def test_single_ended_wls_fix_dalpha_synthetic(): fix_alpha=(x * (dalpha_p - dalpha_m), 0.0 * x), ) - assert_almost_equal_verbose(ds_alpha.gamma.values, gamma, decimal=12) - assert_almost_equal_verbose( - ds_dalpha.alpha.values, x * (dalpha_p - dalpha_m), decimal=14 - ) - assert_almost_equal_verbose(ds_alpha.tmpf.values, temp_real - 273.15, decimal=10) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=12) + assert_almost_equal_verbose(out.alpha.values, x * (dalpha_p - dalpha_m), decimal=14) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=10) pass @@ -2944,8 +1879,6 @@ def test_single_ended_wls_fix_gamma_synthetic(): They should be the same as the parameters used to create the synthetic measurment set""" - from dtscalibration import DataStore - cable_len = 100.0 nt = 50 time = np.arange(nt) @@ -2984,7 +1917,7 @@ def test_single_ended_wls_fix_gamma_synthetic(): print("C", np.log(C_p / C_m)) print("x0", x.max()) - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -3002,7 +1935,7 @@ def test_single_ended_wls_fix_gamma_synthetic(): } # WLS - ds.calibration_single_ended( + out = ds.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -3011,9 +1944,9 @@ def test_single_ended_wls_fix_gamma_synthetic(): fix_gamma=(gamma, 0.0), ) - assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=18) - assert_almost_equal_verbose(ds.dalpha.values, dalpha_p - dalpha_m, decimal=10) - assert_almost_equal_verbose(ds.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=18) + assert_almost_equal_verbose(out.dalpha.values, dalpha_p - dalpha_m, decimal=10) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) pass @@ -3025,8 +1958,6 @@ def test_single_ended_wls_fix_gamma_fix_dalpha_synthetic(): They should be the same as the parameters used to create the synthetic measurment set""" - from dtscalibration import DataStore - cable_len = 100.0 nt = 50 time = np.arange(nt) @@ -3065,7 +1996,7 @@ def test_single_ended_wls_fix_gamma_fix_dalpha_synthetic(): print("C", np.log(C_p / C_m)) print("x0", x.max()) - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -3083,7 +2014,7 @@ def test_single_ended_wls_fix_gamma_fix_dalpha_synthetic(): } # WLS - ds.calibration_single_ended( + out = ds.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -3093,9 +2024,9 @@ def test_single_ended_wls_fix_gamma_fix_dalpha_synthetic(): fix_dalpha=(dalpha_p - dalpha_m, 0.0), ) - assert_almost_equal_verbose(ds.gamma.values, gamma, decimal=18) - assert_almost_equal_verbose(ds.dalpha.values, dalpha_p - dalpha_m, decimal=18) - assert_almost_equal_verbose(ds.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=18) + assert_almost_equal_verbose(out.dalpha.values, dalpha_p - dalpha_m, decimal=18) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) pass @@ -3103,7 +2034,6 @@ def test_single_ended_wls_fix_gamma_fix_dalpha_synthetic(): def test_single_ended_trans_att_synthetic(): """Checks whether the transient attenuation routines perform as intended, and calibrate to the correct temperature""" - from dtscalibration import DataStore cable_len = 100.0 nt = 50 @@ -3151,7 +2081,7 @@ def test_single_ended_trans_att_synthetic(): tr_att2 = np.random.rand(nt) * 0.2 + 0.8 st[int(x.size * 0.6) :] *= tr_att2 - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -3176,81 +2106,50 @@ def test_single_ended_trans_att_synthetic(): ds_test = ds.copy(deep=True) # WLS - ds_test.calibration_single_ended( - sections=sections, - st_var=1.0, - ast_var=1.0, - method="wls", - trans_att=[40, 60], - solver="sparse", - ) - - assert_almost_equal_verbose(ds_test.gamma.values, gamma, decimal=8) - assert_almost_equal_verbose(ds_test.tmpf.values, temp_real - 273.15, decimal=8) - assert_almost_equal_verbose( - ds_test.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 - ) - assert_almost_equal_verbose( - ds_test.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 - ) - - # test `trans_att` related functions - # Clear out old results - ds_test.set_trans_att([]) - - assert ds_test.trans_att.size == 0, "clear out trans_att config" - - del_keys = [] - for k, v in ds_test.data_vars.items(): - if "trans_att" in v.dims: - del_keys.append(k) - - assert len(del_keys) == 0, "clear out trans_att config" - - ds_test.calibration_single_ended( + out = ds_test.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, method="wls", - trans_att=[40, 60], + trans_att=[40.0, 60.0], solver="sparse", ) - assert_almost_equal_verbose(ds_test.gamma.values, gamma, decimal=8) - assert_almost_equal_verbose(ds_test.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=8) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) assert_almost_equal_verbose( - ds_test.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 + out.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 ) assert_almost_equal_verbose( - ds_test.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 + out.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 ) ds_test = ds.copy(deep=True) # Test fixing gamma + transient att. - ds_test.calibration_single_ended( + out = ds_test.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, method="wls", fix_gamma=(482.6, 0), - trans_att=[40, 60], + trans_att=[40.0, 60.0], solver="sparse", ) - assert_almost_equal_verbose(ds_test.gamma.values, gamma, decimal=10) - assert_almost_equal_verbose(ds_test.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=10) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) assert_almost_equal_verbose( - ds_test.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 + out.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 ) assert_almost_equal_verbose( - ds_test.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 + out.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 ) ds_test = ds.copy(deep=True) # Test fixing alpha + transient att. - ds_test.calibration_single_ended( + out = ds_test.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -3260,20 +2159,19 @@ def test_single_ended_trans_att_synthetic(): solver="sparse", ) - assert_almost_equal_verbose(ds_test.gamma.values, gamma, decimal=8) - assert_almost_equal_verbose(ds_test.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=8) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) assert_almost_equal_verbose( - ds_test.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 + out.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 ) assert_almost_equal_verbose( - ds_test.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 + out.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 ) def test_single_ended_matching_sections_synthetic(): """Checks whether the matching sections routines perform as intended, and calibrate to the correct temperature""" - from dtscalibration import DataStore cable_len = 100.0 nt = 50 @@ -3321,7 +2219,7 @@ def test_single_ended_matching_sections_synthetic(): tr_att2 = np.random.rand(nt) * 0.2 + 0.8 st[int(x.size * 0.6) :] *= tr_att2 - ds = DataStore( + ds = Dataset( { "st": (["x", "time"], st), "ast": (["x", "time"], ast), @@ -3355,7 +2253,7 @@ def test_single_ended_matching_sections_synthetic(): ds_test = ds.copy(deep=True) # WLS - ds_test.calibration_single_ended( + out = ds_test.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -3365,19 +2263,19 @@ def test_single_ended_matching_sections_synthetic(): solver="sparse", ) - assert_almost_equal_verbose(ds_test.gamma.values, gamma, decimal=8) - assert_almost_equal_verbose(ds_test.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=8) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) assert_almost_equal_verbose( - ds_test.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 + out.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 ) assert_almost_equal_verbose( - ds_test.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 + out.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 ) ds_test = ds.copy(deep=True) # Test fixing gamma + transient att. - ds_test.calibration_single_ended( + out = ds_test.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -3388,19 +2286,19 @@ def test_single_ended_matching_sections_synthetic(): solver="sparse", ) - assert_almost_equal_verbose(ds_test.gamma.values, gamma, decimal=10) - assert_almost_equal_verbose(ds_test.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=10) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) assert_almost_equal_verbose( - ds_test.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 + out.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 ) assert_almost_equal_verbose( - ds_test.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 + out.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 ) ds_test = ds.copy(deep=True) # Test fixing dalpha + transient att. - ds_test.calibration_single_ended( + out = ds_test.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -3411,19 +2309,19 @@ def test_single_ended_matching_sections_synthetic(): solver="sparse", ) - assert_almost_equal_verbose(ds_test.gamma.values, gamma, decimal=10) - assert_almost_equal_verbose(ds_test.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=10) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) assert_almost_equal_verbose( - ds_test.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 + out.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 ) assert_almost_equal_verbose( - ds_test.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 + out.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 ) ds_test = ds.copy(deep=True) # Test fixing gamma & dalpha + transient att. - ds_test.calibration_single_ended( + out = ds_test.dts.calibrate_single_ended( sections=sections, st_var=1.0, ast_var=1.0, @@ -3435,36 +2333,35 @@ def test_single_ended_matching_sections_synthetic(): solver="sparse", ) - assert_almost_equal_verbose(ds_test.gamma.values, gamma, decimal=10) - assert_almost_equal_verbose(ds_test.tmpf.values, temp_real - 273.15, decimal=8) + assert_almost_equal_verbose(out.gamma.values, gamma, decimal=10) + assert_almost_equal_verbose(out.tmpf.values, temp_real - 273.15, decimal=8) assert_almost_equal_verbose( - ds_test.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 + out.isel(trans_att=0).talpha_fw, -np.log(tr_att), decimal=8 ) assert_almost_equal_verbose( - ds_test.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 + out.isel(trans_att=1).talpha_fw, -np.log(tr_att2), decimal=8 ) # Test conf. ints. for the combination of everything - ds_test.conf_int_single_ended( - p_val="p_val", - p_cov="p_cov", + out_conf = ds_test.dts.monte_carlo_single_ended( + result=out, st_var=1.0, ast_var=1.0, conf_ints=[2.5, 50.0, 97.5], mc_sample_size=50, ) - ds_test_1 = ds_test.isel(time=-1) + out_conf_1 = out_conf.isel(time=-1) # ds_test_1.tmpf # ds_test_1.tmpf_mc.isel(CI=0).values # ds_test_1.tmpf_mc.isel(CI=2).values assert np.all( - np.less(ds_test_1.tmpf_mc.isel(CI=0).values, ds_test_1.tmpf) + np.less(out_conf_1.tmpf_mc.isel(CI=0).values, out.isel(time=-1).tmpf) ), "Single-ended, trans. att.; 2.5% confidence interval is incorrect" assert np.all( - np.greater(ds_test_1.tmpf_mc.isel(CI=2).values, ds_test_1.tmpf) + np.greater(out_conf_1.tmpf_mc.isel(CI=2).values, out.isel(time=-1).tmpf) ), "Single-ended, trans. att.; 97.5% confidence interval is incorrect" @@ -3476,8 +2373,6 @@ def test_single_ended_exponential_variance_estimate_synthetic(): measurment set""" import dask.array as da - from dtscalibration import DataStore - state = da.random.RandomState(0) stokes_m_var = 40.0 @@ -3522,7 +2417,7 @@ def test_single_ended_exponential_variance_estimate_synthetic(): # print('C', np.log(C_p / C_m)) # print('x0', x.max()) - ds = DataStore( + ds = Dataset( { # 'st': (['x', 'time'], st), # 'ast': (['x', 'time'], ast), @@ -3541,33 +2436,36 @@ def test_single_ended_exponential_variance_estimate_synthetic(): "warm": [slice(0.5 * cable_len, cable_len)], } - st_label = "st" - ast_label = "ast" - - mst_var, _ = ds.variance_stokes_exponential(st_label=st_label, sections=sections) - mast_var, _ = ds.variance_stokes_exponential(st_label=ast_label, sections=sections) + mst_var, _ = variance_stokes_exponential( + ds["st"], + sections, + ds["userAcquisitionTimeFW"], + use_statsmodels=False, + suppress_info=False, + reshape_residuals=True, + ) + mast_var, _ = variance_stokes_exponential( + ds["ast"], + sections, + ds["userAcquisitionTimeFW"], + use_statsmodels=False, + suppress_info=False, + reshape_residuals=True, + ) # MC variqnce - ds.calibration_single_ended( + out = ds.dts.calibrate_single_ended( sections=sections, st_var=mst_var, ast_var=mast_var, method="wls", solver="sparse", ) - - ds.conf_int_single_ended( - p_val="p_val", - p_cov="p_cov", - st_var=mst_var, - ast_var=mast_var, - conf_ints=[2.5, 50.0, 97.5], - mc_sample_size=50, - da_random_state=state, - ) + ds2 = ds.copy() + ds2.update(out) # Calibrated variance - stdsf1 = ds.ufunc_per_section( + stdsf1 = ds2.dts.ufunc_per_section( sections=sections, label="tmpf", func=np.var, @@ -3575,12 +2473,21 @@ def test_single_ended_exponential_variance_estimate_synthetic(): calc_per="stretch", ddof=1, ) + out_ci = ds2.dts.monte_carlo_single_ended( + result=out, + st_var=mst_var, + ast_var=mast_var, + conf_ints=[2.5, 50.0, 97.5], + mc_sample_size=50, + da_random_state=state, + mc_remove_set_flag=False, + ) + ds2.update(out_ci) # Use a single timestep to better check if the parameter uncertainties # propagate - ds1 = ds.isel(time=1) # Estimated VAR - stdsf2 = ds1.ufunc_per_section( + stdsf2 = ds2.isel(time=1).dts.ufunc_per_section( sections=sections, label="tmpf_mc_var", func=np.mean, @@ -3598,7 +2505,7 @@ def test_single_ended_exponential_variance_estimate_synthetic(): print("hoi") -def test_calibrate_wls_procedures(): +def test_calibrate_wls_solver_procedures(): x = np.linspace(0, 10, 25 * 4) np.random.shuffle(x) @@ -3637,150 +2544,3 @@ def test_calibrate_wls_procedures(): np.testing.assert_array_almost_equal(p_cov, psp_cov, decimal=dec) pass - - -def test_average_measurements_single_ended(): - filepath = data_dir_single_ended - - ds_ = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") - - ds = ds_.sel(x=slice(0, 100)) # only calibrate parts of the fiber - sections = {"probe2Temperature": [slice(6.0, 14.0)]} # warm bath - - st_var, ast_var = 5.0, 5.0 - - ds.calibration_single_ended( - sections=sections, st_var=st_var, ast_var=ast_var, method="wls", solver="sparse" - ) - ds.average_single_ended( - p_val="p_val", - p_cov="p_cov", - st_var=st_var, - ast_var=ast_var, - conf_ints=[2.5, 97.5], - mc_sample_size=50, # <- choose a much larger sample size - ci_avg_x_flag1=True, - ci_avg_x_sel=slice(6.0, 14.0), - ) - ix = ds.get_section_indices(slice(6, 14)) - ds.average_single_ended( - p_val="p_val", - p_cov="p_cov", - st_var=st_var, - ast_var=ast_var, - conf_ints=[2.5, 97.5], - mc_sample_size=50, # <- choose a much larger sample size - ci_avg_x_flag2=True, - ci_avg_x_isel=ix, - ) - sl = slice( - np.datetime64("2018-05-04T12:22:17.710000000"), - np.datetime64("2018-05-04T12:22:47.702000000"), - ) - ds.average_single_ended( - p_val="p_val", - p_cov="p_cov", - st_var=st_var, - ast_var=ast_var, - conf_ints=[2.5, 97.5], - mc_sample_size=50, # <- choose a much larger sample size - ci_avg_time_flag1=True, - ci_avg_time_flag2=False, - ci_avg_time_sel=sl, - ) - ds.average_single_ended( - p_val="p_val", - p_cov="p_cov", - st_var=st_var, - ast_var=ast_var, - conf_ints=[2.5, 97.5], - mc_sample_size=50, # <- choose a much larger sample size - ci_avg_time_flag1=False, - ci_avg_time_flag2=True, - ci_avg_time_isel=range(3), - ) - pass - - -def test_average_measurements_double_ended(): - filepath = data_dir_double_ended2 - - ds_ = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") - - ds = ds_.sel(x=slice(0, 100)) # only calibrate parts of the fiber - sections = { - "probe1Temperature": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath - "probe2Temperature": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath - } - - st_var, ast_var, rst_var, rast_var = 5.0, 5.0, 5.0, 5.0 - - ds.calibration_double_ended( - sections=sections, - st_var=st_var, - ast_var=ast_var, - rst_var=rst_var, - rast_var=rast_var, - method="wls", - solver="sparse", - ) - ds.average_double_ended( - sections=sections, - p_val="p_val", - p_cov="p_cov", - st_var=st_var, - ast_var=ast_var, - rst_var=rst_var, - rast_var=rast_var, - conf_ints=[2.5, 97.5], - mc_sample_size=50, # <- choose a much larger sample size - ci_avg_x_flag1=True, - ci_avg_x_sel=slice(6, 10), - ) - ix = ds.get_section_indices(slice(6, 10)) - ds.average_double_ended( - sections=sections, - p_val="p_val", - p_cov="p_cov", - st_var=st_var, - ast_var=ast_var, - rst_var=rst_var, - rast_var=rast_var, - conf_ints=[2.5, 97.5], - mc_sample_size=50, # <- choose a much larger sample size - ci_avg_x_flag2=True, - ci_avg_x_isel=ix, - ) - sl = slice( - np.datetime64("2018-03-28T00:40:54.097000000"), - np.datetime64("2018-03-28T00:41:12.084000000"), - ) - ds.average_double_ended( - sections=sections, - p_val="p_val", - p_cov="p_cov", - st_var=st_var, - ast_var=ast_var, - rst_var=rst_var, - rast_var=rast_var, - conf_ints=[2.5, 97.5], - mc_sample_size=50, # <- choose a much larger sample size - ci_avg_time_flag1=True, - ci_avg_time_flag2=False, - ci_avg_time_sel=sl, - ) - ds.average_double_ended( - sections=sections, - p_val="p_val", - p_cov="p_cov", - st_var=st_var, - ast_var=ast_var, - rst_var=rst_var, - rast_var=rast_var, - conf_ints=[2.5, 97.5], - mc_sample_size=50, # <- choose a much larger sample size - ci_avg_time_flag1=False, - ci_avg_time_flag2=True, - ci_avg_time_isel=range(3), - ) - pass diff --git a/tests/test_examples.py b/tests/test_examples.py index 0695492e..e0efbf14 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,72 +1,70 @@ -import glob import os -import shutil -import subprocess -import tempfile +from collections import namedtuple +from pathlib import Path +from typing import Optional import nbformat +import pytest +from nbconvert.preprocessors import CellExecutionError +from nbconvert.preprocessors import ExecutePreprocessor +NBError = namedtuple("NBError", "title, errname, errvalue, exception") -def _notebook_run(path): - """Execute a notebook via nbconvert and collect output. - :returns (parsed nb object, execution errors) - """ - dirname, __ = os.path.split(path) - os.chdir(dirname) +wd = os.path.dirname(os.path.abspath(__file__)) +src_dir = os.path.join(wd, "..", "docs", "notebooks") - # Create a temporary file to write the notebook to. - # 'with' method is used so the file is closed by tempfile - # and free to be overwritten. - # with tempfile.NamedTemporaryFile('w', suffix=".ipynb") as fout: - with tempfile.NamedTemporaryFile( - "w", suffix=".nbconvert.ipynb", delete=False - ) as fout: - nbpath = fout.name - jupyter_exec = shutil.which("jupyter") +@pytest.mark.parametrize( + "src_path", Path(src_dir).glob("*.ipynb"), ids=lambda x: x.name +) +def test_docs_notebook(src_path): + print(_test_notebook(src_path, "python3")) - # recent version (~7.3.1) requires output without extension - out_path = os.path.join( - os.path.dirname(nbpath), os.path.basename(nbpath).split(".", 1)[0] - ) - args = [ - jupyter_exec, - "nbconvert", - path, - "--output", - out_path, - "--to", - "notebook", - "--execute", - "--ExecutePreprocessor.timeout=60", - ] - subprocess.check_call(args) - assert os.path.exists(nbpath), "nbconvert used different output filename" +@pytest.mark.xfail +def test_identify_not_working_docs_notebook(): + fp = Path(os.path.join(wd, "data", "docs_notebooks", "01Not_working.ipynb")) + _test_notebook(fp, "python3") - nb = nbformat.read(nbpath, nbformat.current_nbformat) - errors = [ - output - for cell in nb.cells - if "outputs" in cell - for output in cell["outputs"] - if output.output_type == "error" - ] +def _test_notebook(notebook_file, kernel) -> Optional[NBError]: + """ + Test single notebook. - # Remove the temp file once the test is done - if os.path.exists(nbpath): - os.remove(nbpath) + Parameters + ---------- + notebook_file : str + Source path for notebook + kernel : str, optional + Notebook kernel name, by default "python3" - return nb, errors + Returns + ------- + NBError + Error tuple + """ + print(f"\nTesting notebook {notebook_file.name}") + with open(notebook_file, "rb") as file_handle: + nb_bytes = file_handle.read() + nb_text = nb_bytes.decode("utf-8") + nb_content = nbformat.reads(nb_text, as_version=4) + exec_processor = ExecutePreprocessor(timeout=600, kernel_name=kernel) -def test_ipynb(): - file_ext = "*.ipynb" - wd = os.path.dirname(os.path.abspath(__file__)) - nb_dir = os.path.join(wd, "..", "examples", "notebooks", file_ext) - filepathlist = glob.glob(nb_dir) + try: + print(f"{notebook_file.name} - running notebook: ...") + exec_processor.preprocess( + nb_content, {"metadata": {"path": str(notebook_file.parent)}} + ) + + except CellExecutionError as cell_err: + msg = f"Error executing the notebook '{notebook_file.absolute()}" + print(msg) + return NBError( + f"Error while running notebook {notebook_file}", + cell_err.ename, + cell_err.evalue, + cell_err.args[0], + ) - for fp in filepathlist: - _, errors = _notebook_run(fp) - assert errors == [] + return None diff --git a/tests/test_variance_stokes.py b/tests/test_variance_stokes.py new file mode 100644 index 00000000..02f56568 --- /dev/null +++ b/tests/test_variance_stokes.py @@ -0,0 +1,1005 @@ +import os + +import numpy as np +import pytest +from scipy import stats +from xarray import Dataset + +from dtscalibration import read_silixa_files +from dtscalibration.dts_accessor import DtsAccessor # noqa: F401 +from dtscalibration.variance_stokes import variance_stokes_constant +from dtscalibration.variance_stokes import variance_stokes_exponential +from dtscalibration.variance_stokes import variance_stokes_linear + +state = np.random.seed(0) + +fn = [ + "channel 1_20170921112245510.xml", + "channel 1_20170921112746818.xml", + "channel 1_20170921112746818.xml", +] +fn_single = [ + "channel 2_20180504132202074.xml", + "channel 2_20180504132232903.xml", + "channel 2_20180504132303723.xml", +] + +if 1: + # working dir is tests + wd = os.path.dirname(os.path.abspath(__file__)) + data_dir_single_ended = os.path.join(wd, "data", "single_ended") + data_dir_double_ended = os.path.join(wd, "data", "double_ended") + data_dir_double_ended2 = os.path.join(wd, "data", "double_ended2") + +else: + # working dir is src + data_dir_single_ended = os.path.join("..", "..", "tests", "data", "single_ended") + data_dir_double_ended = os.path.join("..", "..", "tests", "data", "double_ended") + data_dir_double_ended2 = os.path.join("..", "..", "tests", "data", "double_ended2") + + +def assert_almost_equal_verbose(actual, desired, verbose=False, **kwargs): + """Print the actual precision decimals""" + err = np.abs(actual - desired).max() + dec = -np.ceil(np.log10(err)) + + if not (np.isfinite(dec)): + dec = 18.0 + + m = "\n>>>>>The actual precision is: " + str(float(dec)) + + if verbose: + print(m) + + desired2 = np.broadcast_to(desired, actual.shape) + np.testing.assert_almost_equal(actual, desired2, err_msg=m, **kwargs) + pass + + +@pytest.mark.slow # Execution time ~20 seconds +def test_variance_input_types_single(): + import dask.array as da + + state_da = da.random.RandomState(0) + state_np = np.random.RandomState(0) + + stokes_m_var = 40.0 + cable_len = 100.0 + nt = 500 + time = np.arange(nt) + x = np.linspace(0.0, cable_len, 100) + ts_cold = np.ones(nt) * 4.0 + ts_warm = np.ones(nt) * 20.0 + + C_p = 15246 + C_m = 2400.0 + dalpha_r = 0.005284 + dalpha_m = 0.004961 + dalpha_p = 0.005607 + gamma = 482.6 + cold_mask = x < 0.5 * cable_len + warm_mask = np.invert(cold_mask) # == False + temp_real = np.ones((len(x), nt)) + temp_real[cold_mask] *= ts_cold + 273.15 + temp_real[warm_mask] *= ts_warm + 273.15 + + st = ( + C_p + * np.exp(-dalpha_r * x[:, None]) + * np.exp(-dalpha_p * x[:, None]) + * np.exp(-gamma / temp_real) + / (1 - np.exp(-gamma / temp_real)) + ) + ast = ( + C_m + * np.exp(-dalpha_r * x[:, None]) + * np.exp(-dalpha_m * x[:, None]) + / (1 - np.exp(-gamma / temp_real)) + ) + + st_m = st + stats.norm.rvs( + size=st.shape, scale=stokes_m_var**0.5, random_state=state_np + ) + ast_m = ast + stats.norm.rvs( + size=ast.shape, scale=1.1 * stokes_m_var**0.5, random_state=state_np + ) + + print("alphaint", cable_len * (dalpha_p - dalpha_m)) + print("alpha", dalpha_p - dalpha_m) + print("C", np.log(C_p / C_m)) + print("x0", x.max()) + + ds = Dataset( + { + "st": (["x", "time"], st_m), + "ast": (["x", "time"], ast_m), + "userAcquisitionTimeFW": (["time"], np.ones(nt)), + "cold": (["time"], ts_cold), + "warm": (["time"], ts_warm), + }, + coords={"x": x, "time": time}, + attrs={"isDoubleEnded": "0"}, + ) + + sections = { + "cold": [slice(0.0, 0.4 * cable_len)], + "warm": [slice(0.6 * cable_len, cable_len)], + } + + # Test float input + st_var = 5.0 + + out = ds.dts.calibrate_single_ended( + sections=sections, st_var=st_var, ast_var=st_var, method="wls", solver="sparse" + ) + + out2 = ds.dts.monte_carlo_single_ended( + result=out, + st_var=st_var, + ast_var=st_var, + mc_sample_size=100, + da_random_state=state_da, + mc_remove_set_flag=False, + ) + + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(0, 10)).mean(), 0.044361, decimal=2 + ) + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(90, 100)).mean(), 0.242028, decimal=2 + ) + + # Test callable input + def callable_st_var(stokes): + slope = 0.01 + offset = 0 + return slope * stokes + offset + + out = ds.dts.calibrate_single_ended( + sections=sections, + st_var=callable_st_var, + ast_var=callable_st_var, + method="wls", + solver="sparse", + ) + + out2 = ds.dts.monte_carlo_single_ended( + result=out, + st_var=callable_st_var, + ast_var=callable_st_var, + mc_sample_size=100, + da_random_state=state_da, + ) + + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(0, 10)).mean(), 0.184753, decimal=2 + ) + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(90, 100)).mean(), 0.545186, decimal=2 + ) + + # Test input with shape of (ntime, nx) + st_var = ds.st.values * 0 + 20.0 + out = ds.dts.calibrate_single_ended( + sections=sections, st_var=st_var, ast_var=st_var, method="wls", solver="sparse" + ) + + out2 = ds.dts.monte_carlo_single_ended( + result=out, + st_var=st_var, + ast_var=st_var, + mc_sample_size=100, + da_random_state=state, + ) + + assert_almost_equal_verbose(out2["tmpf_mc_var"].mean(), 0.418098, decimal=2) + + # Test input with shape (nx, 1) + st_var = np.vstack( + ds.st.mean(dim="time").values * 0 + np.linspace(10, 50, num=ds.st.x.size) + ) + + out = ds.dts.calibrate_single_ended( + sections=sections, st_var=st_var, ast_var=st_var, method="wls", solver="sparse" + ) + + out2 = ds.dts.monte_carlo_single_ended( + result=out, + st_var=st_var, + ast_var=st_var, + mc_sample_size=100, + da_random_state=state, + ) + + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(0, 50)).mean().values, 0.2377, decimal=2 + ) + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(50, 100)).mean().values, 1.3203, decimal=2 + ) + + # Test input with shape (ntime) + st_var = ds.st.mean(dim="x").values * 0 + np.linspace(5, 200, num=nt) + + out = ds.dts.calibrate_single_ended( + sections=sections, st_var=st_var, ast_var=st_var, method="wls", solver="sparse" + ) + + out2 = ds.dts.monte_carlo_single_ended( + result=out, + st_var=st_var, + ast_var=st_var, + mc_sample_size=100, + da_random_state=state, + ) + + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(time=slice(0, nt // 2)).mean().values, 1.0908, decimal=2 + ) + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(time=slice(nt // 2, None)).mean().values, + 3.0759, + decimal=2, + ) + + pass + + +@pytest.mark.slow # Execution time ~0.5 minute +def test_variance_input_types_double(): + state = np.random.seed(0) + + stokes_m_var = 40.0 + cable_len = 100.0 + nt = 500 + time = np.arange(nt) + x = np.linspace(0.0, cable_len, 100) + ts_cold = np.ones(nt) * 4.0 + ts_warm = np.ones(nt) * 20.0 + + C_p = 15246 + C_m = 2400.0 + dalpha_r = 0.005284 + dalpha_m = 0.004961 + dalpha_p = 0.005607 + gamma = 482.6 + cold_mask = x < 0.5 * cable_len + warm_mask = np.invert(cold_mask) # == False + temp_real = np.ones((len(x), nt)) + temp_real[cold_mask] *= ts_cold + 273.15 + temp_real[warm_mask] *= ts_warm + 273.15 + + st = ( + C_p + * np.exp(-dalpha_r * x[:, None]) + * np.exp(-dalpha_p * x[:, None]) + * np.exp(-gamma / temp_real) + / (1 - np.exp(-gamma / temp_real)) + ) + ast = ( + C_m + * np.exp(-dalpha_r * x[:, None]) + * np.exp(-dalpha_m * x[:, None]) + / (1 - np.exp(-gamma / temp_real)) + ) + rst = ( + C_p + * np.exp(-dalpha_r * (-x[:, None] + 100)) + * np.exp(-dalpha_p * (-x[:, None] + 100)) + * np.exp(-gamma / temp_real) + / (1 - np.exp(-gamma / temp_real)) + ) + rast = ( + C_m + * np.exp(-dalpha_r * (-x[:, None] + 100)) + * np.exp(-dalpha_m * (-x[:, None] + 100)) + / (1 - np.exp(-gamma / temp_real)) + ) + + st_m = st + stats.norm.rvs( + size=st.shape, scale=stokes_m_var**0.5, random_state=state + ) + ast_m = ast + stats.norm.rvs( + size=ast.shape, scale=1.1 * stokes_m_var**0.5, random_state=state + ) + rst_m = rst + stats.norm.rvs( + size=rst.shape, scale=0.9 * stokes_m_var**0.5, random_state=state + ) + rast_m = rast + stats.norm.rvs( + size=rast.shape, scale=0.8 * stokes_m_var**0.5, random_state=state + ) + + print("alphaint", cable_len * (dalpha_p - dalpha_m)) + print("alpha", dalpha_p - dalpha_m) + print("C", np.log(C_p / C_m)) + print("x0", x.max()) + + ds = Dataset( + { + "st": (["x", "time"], st_m), + "ast": (["x", "time"], ast_m), + "rst": (["x", "time"], rst_m), + "rast": (["x", "time"], rast_m), + "userAcquisitionTimeFW": (["time"], np.ones(nt)), + "userAcquisitionTimeBW": (["time"], np.ones(nt)), + "cold": (["time"], ts_cold), + "warm": (["time"], ts_warm), + }, + coords={"x": x, "time": time}, + attrs={"isDoubleEnded": "1"}, + ) + + sections = { + "cold": [slice(0.0, 0.4 * cable_len)], + "warm": [slice(0.6 * cable_len, cable_len)], + } + + # Test float input + st_var = 5.0 + + out = ds.dts.calibrate_double_ended( + sections=sections, + st_var=st_var, + ast_var=st_var, + rst_var=st_var, + rast_var=st_var, + method="wls", + solver="sparse", + ) + + out2 = ds.dts.monte_carlo_double_ended( + result=out, + st_var=st_var, + ast_var=st_var, + rst_var=st_var, + rast_var=st_var, + conf_ints=[], + mc_sample_size=100, + da_random_state=state, + ) + + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(0, 10)).mean(), 0.03584935, decimal=2 + ) + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(90, 100)).mean(), 0.22982146, decimal=2 + ) + + # Test callable input + def st_var_callable(stokes): + slope = 0.01 + offset = 0 + return slope * stokes + offset + + out = ds.dts.calibrate_double_ended( + sections=sections, + st_var=st_var_callable, + ast_var=st_var_callable, + rst_var=st_var_callable, + rast_var=st_var_callable, + method="wls", + solver="sparse", + ) + + out2 = ds.dts.monte_carlo_double_ended( + result=out, + st_var=st_var_callable, + ast_var=st_var_callable, + rst_var=st_var_callable, + rast_var=st_var_callable, + conf_ints=[], + mc_sample_size=100, + da_random_state=state, + ) + + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(0, 10)).mean(), 0.18058514, decimal=2 + ) + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(90, 100)).mean(), 0.53862813, decimal=2 + ) + + # Test input with shape of (ntime, nx) + st_var = ds.st.values * 0 + 20.0 + + out = ds.dts.calibrate_double_ended( + sections=sections, + st_var=st_var, + ast_var=st_var, + rst_var=st_var, + rast_var=st_var, + method="wls", + solver="sparse", + ) + + out2 = ds.dts.monte_carlo_double_ended( + result=out, + st_var=st_var, + ast_var=st_var, + rst_var=st_var, + rast_var=st_var, + conf_ints=[], + mc_sample_size=100, + da_random_state=state, + ) + + assert_almost_equal_verbose(out2["tmpf_mc_var"].mean(), 0.40725674, decimal=2) + + # Test input with shape (nx, 1) + st_var = np.vstack( + ds.st.mean(dim="time").values * 0 + np.linspace(10, 50, num=ds.st.x.size) + ) + + out = ds.dts.calibrate_double_ended( + sections=sections, + st_var=st_var, + ast_var=st_var, + rst_var=st_var, + rast_var=st_var, + method="wls", + solver="sparse", + ) + + out2 = ds.dts.monte_carlo_double_ended( + result=out, + st_var=st_var, + ast_var=st_var, + rst_var=st_var, + rast_var=st_var, + conf_ints=[], + mc_sample_size=100, + da_random_state=state, + ) + + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(0, 50)).mean().values, 0.21163704, decimal=2 + ) + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(x=slice(50, 100)).mean().values, 1.28247762, decimal=2 + ) + + # Test input with shape (ntime) + st_var = ds.st.mean(dim="x").values * 0 + np.linspace(5, 200, num=nt) + + out = ds.dts.calibrate_double_ended( + sections=sections, + st_var=st_var, + ast_var=st_var, + rst_var=st_var, + rast_var=st_var, + method="wls", + solver="sparse", + ) + + out2 = ds.dts.monte_carlo_double_ended( + result=out, + st_var=st_var, + ast_var=st_var, + rst_var=st_var, + rast_var=st_var, + conf_ints=[], + mc_sample_size=100, + da_random_state=state, + ) + + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(time=slice(0, nt // 2)).mean().values, 1.090, decimal=2 + ) + assert_almost_equal_verbose( + out2["tmpf_mc_var"].sel(time=slice(nt // 2, None)).mean().values, + 3.06, + decimal=2, + ) + + pass + + +@pytest.mark.slow # Execution time ~0.5 minute +def test_double_ended_variance_estimate_synthetic(): + import dask.array as da + + state = da.random.RandomState(0) + + stokes_m_var = 40.0 + cable_len = 100.0 + nt = 500 + time = np.arange(nt) + x = np.linspace(0.0, cable_len, 100) + ts_cold = np.ones(nt) * 4.0 + ts_warm = np.ones(nt) * 20.0 + + C_p = 15246 + C_m = 2400.0 + dalpha_r = 0.0005284 + dalpha_m = 0.0004961 + dalpha_p = 0.0005607 + gamma = 482.6 + cold_mask = x < 0.5 * cable_len + warm_mask = np.invert(cold_mask) # == False + temp_real = np.ones((len(x), nt)) + temp_real[cold_mask] *= ts_cold + 273.15 + temp_real[warm_mask] *= ts_warm + 273.15 + + st = ( + C_p + * np.exp(-dalpha_r * x[:, None]) + * np.exp(-dalpha_p * x[:, None]) + * np.exp(-gamma / temp_real) + / (1 - np.exp(-gamma / temp_real)) + ) + ast = ( + C_m + * np.exp(-dalpha_r * x[:, None]) + * np.exp(-dalpha_m * x[:, None]) + / (1 - np.exp(-gamma / temp_real)) + ) + rst = ( + C_p + * np.exp(-dalpha_r * (-x[:, None] + 100)) + * np.exp(-dalpha_p * (-x[:, None] + 100)) + * np.exp(-gamma / temp_real) + / (1 - np.exp(-gamma / temp_real)) + ) + rast = ( + C_m + * np.exp(-dalpha_r * (-x[:, None] + 100)) + * np.exp(-dalpha_m * (-x[:, None] + 100)) + / (1 - np.exp(-gamma / temp_real)) + ) + + st_m = st + stats.norm.rvs(size=st.shape, scale=stokes_m_var**0.5) + ast_m = ast + stats.norm.rvs(size=ast.shape, scale=1.1 * stokes_m_var**0.5) + rst_m = rst + stats.norm.rvs(size=rst.shape, scale=0.9 * stokes_m_var**0.5) + rast_m = rast + stats.norm.rvs(size=rast.shape, scale=0.8 * stokes_m_var**0.5) + + print("alphaint", cable_len * (dalpha_p - dalpha_m)) + print("alpha", dalpha_p - dalpha_m) + print("C", np.log(C_p / C_m)) + print("x0", x.max()) + + ds = Dataset( + { + "st": (["x", "time"], st_m), + "ast": (["x", "time"], ast_m), + "rst": (["x", "time"], rst_m), + "rast": (["x", "time"], rast_m), + "userAcquisitionTimeFW": (["time"], np.ones(nt)), + "userAcquisitionTimeBW": (["time"], np.ones(nt)), + "cold": (["time"], ts_cold), + "warm": (["time"], ts_warm), + }, + coords={"x": x, "time": time}, + attrs={"isDoubleEnded": "1"}, + ) + + sections = { + "cold": [slice(0.0, 0.5 * cable_len)], + "warm": [slice(0.5 * cable_len, cable_len)], + } + + mst_var, _ = variance_stokes_constant( + ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False + ) + mast_var, _ = variance_stokes_constant( + ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False + ) + mrst_var, _ = variance_stokes_constant( + ds.dts.rst, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False + ) + mrast_var, _ = variance_stokes_constant( + ds.dts.rast, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False + ) + + mst_var = float(mst_var) + mast_var = float(mast_var) + mrst_var = float(mrst_var) + mrast_var = float(mrast_var) + + # MC variance + out = ds.dts.calibrate_double_ended( + sections=sections, + st_var=mst_var, + ast_var=mast_var, + rst_var=mrst_var, + rast_var=mrast_var, + method="wls", + solver="sparse", + ) + out["cold"] = ds.cold + out["warm"] = ds.warm + + assert_almost_equal_verbose(out["tmpf"].mean(), 12.0, decimal=2) + assert_almost_equal_verbose(out["tmpb"].mean(), 12.0, decimal=3) + + # Calibrated variance + stdsf1 = out.dts.ufunc_per_section( + sections=sections, + label="tmpf", + func=np.std, + temp_err=True, + calc_per="stretch", + suppress_section_validation=True, + ) + stdsb1 = out.dts.ufunc_per_section( + sections=sections, + label="tmpb", + func=np.std, + temp_err=True, + calc_per="stretch", + suppress_section_validation=True, + ) + + out2 = ds.dts.monte_carlo_double_ended( + result=out, + st_var=mst_var, + ast_var=mast_var, + rst_var=mrst_var, + rast_var=mrast_var, + conf_ints=[2.5, 50.0, 97.5], + mc_sample_size=100, + da_random_state=state, + ) + + out2["cold"] = ds.cold + out2["warm"] = ds.warm + + # Use a single timestep to better check if the parameter uncertainties propagate + ds1 = out2.isel(time=1) + # Estimated VAR + stdsf2 = ds1.dts.ufunc_per_section( + sections=sections, + label="tmpf_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", + suppress_section_validation=True, + ) + stdsb2 = ds1.dts.ufunc_per_section( + sections=sections, + label="tmpb_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", + suppress_section_validation=True, + ) + + for (_, v1), (_, v2) in zip(stdsf1.items(), stdsf2.items()): + for v1i, v2i in zip(v1, v2): + print("Real VAR: ", v1i**2, "Estimated VAR: ", float(v2i)) + assert_almost_equal_verbose(v1i**2, v2i, decimal=2) + + for (_, v1), (_, v2) in zip(stdsb1.items(), stdsb2.items()): + for v1i, v2i in zip(v1, v2): + print("Real VAR: ", v1i**2, "Estimated VAR: ", float(v2i)) + assert_almost_equal_verbose(v1i**2, v2i, decimal=2) + + pass + + +def test_single_ended_variance_estimate_synthetic(): + import dask.array as da + + state = da.random.RandomState(0) + + stokes_m_var = 40.0 + astokes_m_var = 60.0 + cable_len = 100.0 + nt = 50 + time = np.arange(nt) + x = np.linspace(0.0, cable_len, 500) + ts_cold = np.ones(nt) * 4.0 + ts_warm = np.ones(nt) * 20.0 + + C_p = 15246 + C_m = 2400.0 + dalpha_r = 0.0005284 + dalpha_m = 0.0004961 + dalpha_p = 0.0005607 + gamma = 482.6 + cold_mask = x < 0.5 * cable_len + warm_mask = np.invert(cold_mask) # == False + temp_real = np.ones((len(x), nt)) + temp_real[cold_mask] *= ts_cold + 273.15 + temp_real[warm_mask] *= ts_warm + 273.15 + + st = ( + C_p + * np.exp(-dalpha_r * x[:, None]) + * np.exp(-dalpha_p * x[:, None]) + * np.exp(-gamma / temp_real) + / (1 - np.exp(-gamma / temp_real)) + ) + ast = ( + C_m + * np.exp(-dalpha_r * x[:, None]) + * np.exp(-dalpha_m * x[:, None]) + / (1 - np.exp(-gamma / temp_real)) + ) + st_m = st + stats.norm.rvs(size=st.shape, scale=stokes_m_var**0.5) + ast_m = ast + stats.norm.rvs(size=ast.shape, scale=astokes_m_var**0.5) + + print("alphaint", cable_len * (dalpha_p - dalpha_m)) + print("alpha", dalpha_p - dalpha_m) + print("C", np.log(C_p / C_m)) + print("x0", x.max()) + + ds = Dataset( + { + "st": (["x", "time"], st_m), + "ast": (["x", "time"], ast_m), + "userAcquisitionTimeFW": (["time"], np.ones(nt)), + "cold": (["time"], ts_cold), + "warm": (["time"], ts_warm), + }, + coords={"x": x, "time": time}, + attrs={"isDoubleEnded": "0"}, + ) + + sections = { + "cold": [slice(0.0, 0.5 * cable_len)], + "warm": [slice(0.5 * cable_len, cable_len)], + } + + mst_var, _ = variance_stokes_constant( + ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False + ) + mast_var, _ = variance_stokes_constant( + ds.dts.ast, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False + ) + # mrst_var, _ = variance_stokes_constant(ds.dts.rst, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False) + # mrast_var, _ = variance_stokes_constant(ds.dts.rast, sections, ds.dts.acquisitiontime_bw, reshape_residuals=False) + mst_var = float(mst_var) + mast_var = float(mast_var) + + # MC variqnce + out = ds.dts.calibrate_single_ended( + sections=sections, + st_var=mst_var, + ast_var=mast_var, + method="wls", + solver="sparse", + ) + out["cold"] = ds.cold + out["warm"] = ds.warm + + out2 = ds.dts.monte_carlo_single_ended( + result=out, + st_var=mst_var, + ast_var=mast_var, + conf_ints=[2.5, 50.0, 97.5], + mc_sample_size=50, + da_random_state=state, + ) + out2["cold"] = ds.cold + out2["warm"] = ds.warm + + # Calibrated variance + stdsf1 = out.dts.ufunc_per_section( + sections=sections, + label="tmpf", + func=np.std, + temp_err=True, + calc_per="stretch", + ddof=1, + ) + + # Use a single timestep to better check if the parameter uncertainties propagate + ds1 = out2.isel(time=1) + # Estimated VAR + stdsf2 = ds1.dts.ufunc_per_section( + sections=sections, + label="tmpf_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", + ) + + for (_, v1), (_, v2) in zip(stdsf1.items(), stdsf2.items()): + for v1i, v2i in zip(v1, v2): + print("Real VAR: ", v1i**2, "Estimated VAR: ", float(v2i)) + assert_almost_equal_verbose(v1i**2, v2i, decimal=2) + + pass + + +@pytest.mark.skip(reason="Not enough measurements in time. Use exponential instead.") +def test_variance_of_stokes(): + correct_var = 9.045 + filepath = data_dir_double_ended2 + ds = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") + sections = { + "probe1Temperature": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath + "probe2Temperature": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath + } + + I_var, _ = variance_stokes_constant(st=ds["st"], sections=sections) + assert_almost_equal_verbose(I_var, correct_var, decimal=1) + + ds_dask = ds.chunk(chunks={}) + I_var, _ = variance_stokes_constant(st=ds_dask["st"], sections=sections) + assert_almost_equal_verbose(I_var, correct_var, decimal=1) + + pass + + +def test_variance_of_stokes_synthetic(): + """ + Produces a synthetic Stokes measurement with a known noise distribution. Check if same + variance is obtained. + + Returns + ------- + + """ + yvar = 5.0 + + nx = 500 + x = np.linspace(0.0, 20.0, nx) + + nt = 200 + G = np.linspace(3000, 4000, nt)[None] + + y = G * np.exp(-0.001 * x[:, None]) + + y += stats.norm.rvs(size=y.size, scale=yvar**0.5).reshape(y.shape) + + ds = Dataset( + { + "st": (["x", "time"], y), + "probe1Temperature": (["time"], range(nt)), + "userAcquisitionTimeFW": (["time"], np.ones(nt)), + }, + coords={"x": x, "time": range(nt)}, + attrs={"isDoubleEnded": "0"}, + ) + + sections = {"probe1Temperature": [slice(0.0, 20.0)]} + test_st_var, _ = variance_stokes_constant( + ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False + ) + + assert_almost_equal_verbose(test_st_var, yvar, decimal=1) + pass + + +@pytest.mark.slow # Execution time ~20 seconds +def test_variance_of_stokes_linear_synthetic(): + """ + Produces a synthetic Stokes measurement with a known noise distribution. + Check if same variance is obtained. + + Returns + ------- + + """ + var_slope = 0.01 + + nx = 500 + x = np.linspace(0.0, 20.0, nx) + + nt = 200 + G = np.linspace(500, 4000, nt)[None] + c_no_noise = G * np.exp(-0.001 * x[:, None]) + + c_lin_var_through_zero = stats.norm.rvs( + loc=c_no_noise, + # size=y.size, + scale=(var_slope * c_no_noise) ** 0.5, + ) + ds = Dataset( + { + "st": (["x", "time"], c_no_noise), + "c_lin_var_through_zero": (["x", "time"], c_lin_var_through_zero), + "probe1Temperature": (["time"], range(nt)), + "userAcquisitionTimeFW": (["time"], np.ones(nt)), + }, + coords={"x": x, "time": range(nt)}, + attrs={"isDoubleEnded": "0"}, + ) + + sections = {"probe1Temperature": [slice(0.0, 20.0)]} + test_st_var, _ = variance_stokes_constant( + ds.dts.st, sections, ds.dts.acquisitiontime_fw, reshape_residuals=False + ) + + # If fit is forced through zero. Only Poisson distributed noise + ( + slope, + offset, + st_sort_mean, + st_sort_var, + resid, + var_fun, + ) = variance_stokes_linear( + st=ds["c_lin_var_through_zero"], + sections=sections, + acquisitiontime=ds.dts.acquisitiontime_fw, + nbin=10, + through_zero=True, + plot_fit=False, + ) + assert_almost_equal_verbose(slope, var_slope, decimal=3) + + # Fit accounts for Poisson noise plus white noise + ( + slope, + offset, + st_sort_mean, + st_sort_var, + resid, + var_fun, + ) = variance_stokes_linear( + st=ds["c_lin_var_through_zero"], + sections=sections, + acquisitiontime=ds.dts.acquisitiontime_fw, + nbin=100, + through_zero=False, + ) + assert_almost_equal_verbose(slope, var_slope, decimal=3) + assert_almost_equal_verbose(offset, 0.0, decimal=0) + + pass + + +@pytest.mark.slow # Execution time ~20 seconds +def test_exponential_variance_of_stokes(): + correct_var = 11.86535 + filepath = data_dir_double_ended2 + ds = read_silixa_files(directory=filepath, timezone_netcdf="UTC", file_ext="*.xml") + sections = { + "probe1Temperature": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath + "probe2Temperature": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath + } + + I_var, _ = variance_stokes_exponential( + st=ds["st"], sections=sections, acquisitiontime=ds.dts.acquisitiontime_fw + ) + assert_almost_equal_verbose(I_var, correct_var, decimal=5) + + ds_dask = ds.chunk(chunks={}) + I_var, _ = variance_stokes_exponential( + st=ds_dask["st"], sections=sections, acquisitiontime=ds.dts.acquisitiontime_fw + ) + assert_almost_equal_verbose(I_var, correct_var, decimal=5) + + pass + + +def test_exponential_variance_of_stokes_synthetic(): + """ + Produces a synthetic Stokes measurement with a known noise distribution. Check if same + variance is obtained. + + Returns + ------- + + """ + yvar = 5.0 + + nx = 500 + x = np.linspace(0.0, 20.0, nx) + + nt = 200 + beta = np.linspace(3000, 4000, nt)[None] + + y = beta * np.exp(-0.001 * x[:, None]) + + y += stats.norm.rvs(size=y.size, scale=yvar**0.5).reshape(y.shape) + + ds = Dataset( + { + "st": (["x", "time"], y), + "probe1Temperature": (["time"], range(nt)), + "userAcquisitionTimeFW": (["time"], np.ones(nt)), + }, + coords={"x": x, "time": range(nt)}, + attrs={"isDoubleEnded": "0"}, + ) + + sections = {"probe1Temperature": [slice(0.0, 20.0)]} + test_st_var, _ = variance_stokes_exponential( + st=ds["st"], sections=sections, acquisitiontime=ds.dts.acquisitiontime_fw + ) + + assert_almost_equal_verbose(test_st_var, yvar, decimal=1) + pass