diff --git a/.github/workflows/python-publish-dry-run.yml b/.github/workflows/python-publish-dry-run.yml index 74f2b3a0..66c2d5de 100644 --- a/.github/workflows/python-publish-dry-run.yml +++ b/.github/workflows/python-publish-dry-run.yml @@ -34,5 +34,5 @@ jobs: pip install build - name: Build package run: python -m build - - name: Test build + - name: Check long description for PyPi run: twine check --strict dist/* diff --git a/.gitignore b/.gitignore index b8f681da..f06bd184 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,4 @@ docs/_build .appveyor.token *.bak .vscode/settings.json +*.code-workspace diff --git a/docs/api/dtscalibration.DataStore.rst b/docs/api/dtscalibration.DataStore.rst new file mode 100644 index 00000000..8ed756d9 --- /dev/null +++ b/docs/api/dtscalibration.DataStore.rst @@ -0,0 +1,7 @@ +DataStore +========= + +.. currentmodule:: dtscalibration + +.. autoclass:: DataStore + :show-inheritance: diff --git a/docs/api/dtscalibration.check_dims.rst b/docs/api/dtscalibration.check_dims.rst new file mode 100644 index 00000000..1dc68e7e --- /dev/null +++ b/docs/api/dtscalibration.check_dims.rst @@ -0,0 +1,6 @@ +check_dims +========== + +.. currentmodule:: dtscalibration + +.. autofunction:: check_dims diff --git a/docs/api/dtscalibration.check_timestep_allclose.rst b/docs/api/dtscalibration.check_timestep_allclose.rst new file mode 100644 index 00000000..d655bfe4 --- /dev/null +++ b/docs/api/dtscalibration.check_timestep_allclose.rst @@ -0,0 +1,6 @@ +check_timestep_allclose +======================= + +.. currentmodule:: dtscalibration + +.. autofunction:: check_timestep_allclose diff --git a/docs/api/dtscalibration.get_netcdf_encoding.rst b/docs/api/dtscalibration.get_netcdf_encoding.rst new file mode 100644 index 00000000..820b0a0f --- /dev/null +++ b/docs/api/dtscalibration.get_netcdf_encoding.rst @@ -0,0 +1,6 @@ +get_netcdf_encoding +=================== + +.. currentmodule:: dtscalibration + +.. autofunction:: get_netcdf_encoding diff --git a/docs/api/dtscalibration.merge_double_ended.rst b/docs/api/dtscalibration.merge_double_ended.rst new file mode 100644 index 00000000..c8e512b5 --- /dev/null +++ b/docs/api/dtscalibration.merge_double_ended.rst @@ -0,0 +1,6 @@ +merge_double_ended +================== + +.. currentmodule:: dtscalibration + +.. autofunction:: merge_double_ended diff --git a/docs/api/dtscalibration.open_datastore.rst b/docs/api/dtscalibration.open_datastore.rst new file mode 100644 index 00000000..95a987bd --- /dev/null +++ b/docs/api/dtscalibration.open_datastore.rst @@ -0,0 +1,6 @@ +open_datastore +============== + +.. currentmodule:: dtscalibration + +.. autofunction:: open_datastore diff --git a/docs/api/dtscalibration.open_mf_datastore.rst b/docs/api/dtscalibration.open_mf_datastore.rst new file mode 100644 index 00000000..edab4ebe --- /dev/null +++ b/docs/api/dtscalibration.open_mf_datastore.rst @@ -0,0 +1,6 @@ +open_mf_datastore +================= + +.. currentmodule:: dtscalibration + +.. autofunction:: open_mf_datastore diff --git a/docs/api/dtscalibration.plot_accuracy.rst b/docs/api/dtscalibration.plot_accuracy.rst new file mode 100644 index 00000000..a2e41fc2 --- /dev/null +++ b/docs/api/dtscalibration.plot_accuracy.rst @@ -0,0 +1,6 @@ +plot_accuracy +============= + +.. currentmodule:: dtscalibration + +.. autofunction:: plot_accuracy diff --git a/docs/api/dtscalibration.plot_location_residuals_double_ended.rst b/docs/api/dtscalibration.plot_location_residuals_double_ended.rst new file mode 100644 index 00000000..ad0d16db --- /dev/null +++ b/docs/api/dtscalibration.plot_location_residuals_double_ended.rst @@ -0,0 +1,6 @@ +plot_location_residuals_double_ended +==================================== + +.. currentmodule:: dtscalibration + +.. autofunction:: plot_location_residuals_double_ended diff --git a/docs/api/dtscalibration.plot_residuals_reference_sections.rst b/docs/api/dtscalibration.plot_residuals_reference_sections.rst new file mode 100644 index 00000000..45f2529b --- /dev/null +++ b/docs/api/dtscalibration.plot_residuals_reference_sections.rst @@ -0,0 +1,6 @@ +plot_residuals_reference_sections +================================= + +.. currentmodule:: dtscalibration + +.. autofunction:: plot_residuals_reference_sections diff --git a/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst b/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst new file mode 100644 index 00000000..725d77a0 --- /dev/null +++ b/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst @@ -0,0 +1,6 @@ +plot_residuals_reference_sections_single +======================================== + +.. currentmodule:: dtscalibration + +.. autofunction:: plot_residuals_reference_sections_single diff --git a/docs/api/dtscalibration.plot_sigma_report.rst b/docs/api/dtscalibration.plot_sigma_report.rst new file mode 100644 index 00000000..b047cdeb --- /dev/null +++ b/docs/api/dtscalibration.plot_sigma_report.rst @@ -0,0 +1,6 @@ +plot_sigma_report +================= + +.. currentmodule:: dtscalibration + +.. autofunction:: plot_sigma_report diff --git a/docs/api/dtscalibration.read_apsensing_files.rst b/docs/api/dtscalibration.read_apsensing_files.rst new file mode 100644 index 00000000..04bd67d6 --- /dev/null +++ b/docs/api/dtscalibration.read_apsensing_files.rst @@ -0,0 +1,6 @@ +read_apsensing_files +==================== + +.. currentmodule:: dtscalibration + +.. autofunction:: read_apsensing_files diff --git a/docs/api/dtscalibration.read_sensornet_files.rst b/docs/api/dtscalibration.read_sensornet_files.rst new file mode 100644 index 00000000..541f00cb --- /dev/null +++ b/docs/api/dtscalibration.read_sensornet_files.rst @@ -0,0 +1,6 @@ +read_sensornet_files +==================== + +.. currentmodule:: dtscalibration + +.. autofunction:: read_sensornet_files diff --git a/docs/api/dtscalibration.read_sensortran_files.rst b/docs/api/dtscalibration.read_sensortran_files.rst new file mode 100644 index 00000000..35d92c94 --- /dev/null +++ b/docs/api/dtscalibration.read_sensortran_files.rst @@ -0,0 +1,6 @@ +read_sensortran_files +===================== + +.. currentmodule:: dtscalibration + +.. autofunction:: read_sensortran_files diff --git a/docs/api/dtscalibration.read_silixa_files.rst b/docs/api/dtscalibration.read_silixa_files.rst new file mode 100644 index 00000000..d5adee72 --- /dev/null +++ b/docs/api/dtscalibration.read_silixa_files.rst @@ -0,0 +1,6 @@ +read_silixa_files +================= + +.. currentmodule:: dtscalibration + +.. autofunction:: read_silixa_files diff --git a/docs/api/dtscalibration.shift_double_ended.rst b/docs/api/dtscalibration.shift_double_ended.rst new file mode 100644 index 00000000..0a879ea1 --- /dev/null +++ b/docs/api/dtscalibration.shift_double_ended.rst @@ -0,0 +1,6 @@ +shift_double_ended +================== + +.. currentmodule:: dtscalibration + +.. autofunction:: shift_double_ended diff --git a/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst b/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst new file mode 100644 index 00000000..14a135c0 --- /dev/null +++ b/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst @@ -0,0 +1,6 @@ +suggest_cable_shift_double_ended +================================ + +.. currentmodule:: dtscalibration + +.. autofunction:: suggest_cable_shift_double_ended diff --git a/docs/notebooks/03Define_sections.ipynb b/docs/notebooks/03Define_sections.ipynb index ef9d97b2..c342cd53 100644 --- a/docs/notebooks/03Define_sections.ipynb +++ b/docs/notebooks/03Define_sections.ipynb @@ -66,7 +66,8 @@ "outputs": [], "source": [ "print(ds.timeseries_keys) # list the available timeseeries\n", - "ds.probe1Temperature.plot(figsize=(12, 8)); # plot one of the timeseries" + "ds.probe1Temperature.plot(figsize=(12, 8))\n", + "# plot one of the timeseries" ] }, { @@ -115,24 +116,7 @@ "sections = {\n", " \"probe1Temperature\": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath\n", " \"probe2Temperature\": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath\n", - "}\n", - "ds.sections = sections" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-06T08:09:10.309314Z", - "iopub.status.busy": "2022-04-06T08:09:10.308985Z", - "iopub.status.idle": "2022-04-06T08:09:10.323444Z", - "shell.execute_reply": "2022-04-06T08:09:10.322874Z" - } - }, - "outputs": [], - "source": [ - "ds.sections" + "}" ] }, { @@ -141,13 +125,6 @@ "source": [ "NetCDF files do not support reading/writing python dictionaries. Internally the sections dictionary is stored in `ds._sections` as a string encoded with yaml, which can be saved to a netCDF file. Each time the sections dictionary is requested, yaml decodes the string and evaluates it to the Python dictionary. " ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -166,7 +143,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.11" + "version": "3.10.10" } }, "nbformat": 4, diff --git a/docs/notebooks/04Calculate_variance_Stokes.ipynb b/docs/notebooks/04Calculate_variance_Stokes.ipynb index 2e0eba4b..a167e999 100644 --- a/docs/notebooks/04Calculate_variance_Stokes.ipynb +++ b/docs/notebooks/04Calculate_variance_Stokes.ipynb @@ -81,8 +81,7 @@ "sections = {\n", " \"probe1Temperature\": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath\n", " \"probe2Temperature\": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath\n", - "}\n", - "ds.sections = sections" + "}" ] }, { @@ -120,7 +119,7 @@ }, "outputs": [], "source": [ - "I_var, residuals = ds.variance_stokes_constant(st_label=\"st\")\n", + "I_var, residuals = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", "print(\n", " \"The variance of the Stokes signal along the reference sections \"\n", " \"is approximately {:.2f} on a {:.1f} sec acquisition time\".format(\n", @@ -187,7 +186,7 @@ "x = np.linspace(mean - 3 * sigma, mean + 3 * sigma, 100)\n", "approximated_normal_fit = scipy.stats.norm.pdf(x, mean, sigma)\n", "residuals.plot.hist(bins=50, figsize=(12, 8), density=True)\n", - "plt.plot(x, approximated_normal_fit);" + "plt.plot(x, approximated_normal_fit)" ] }, { diff --git a/docs/notebooks/07Calibrate_single_ended.ipynb b/docs/notebooks/07Calibrate_single_ended.ipynb index 28616d9e..7edf0a8a 100644 --- a/docs/notebooks/07Calibrate_single_ended.ipynb +++ b/docs/notebooks/07Calibrate_single_ended.ipynb @@ -90,7 +90,7 @@ "outputs": [], "source": [ "ds = ds.sel(x=slice(-30, 101)) # dismiss parts of the fiber that are not interesting\n", - "ds.sections = {\n", + "sections = {\n", " \"probe1Temperature\": [slice(20, 25.5)], # warm bath\n", " \"probe2Temperature\": [slice(5.5, 15.5)], # cold bath\n", "}" @@ -119,8 +119,8 @@ }, "outputs": [], "source": [ - "st_var, resid = ds.variance_stokes_constant(st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes_constant(st_label=\"ast\")" + "st_var, resid = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"ast\")" ] }, { @@ -136,7 +136,7 @@ "metadata": {}, "outputs": [], "source": [ - "resid.plot(figsize=(12, 4));" + "resid.plot(figsize=(12, 4))" ] }, { @@ -160,7 +160,7 @@ }, "outputs": [], "source": [ - "ds.calibration_single_ended(st_var=st_var, ast_var=ast_var)" + "ds.calibration_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)" ] }, { @@ -177,7 +177,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds.tmpf.plot(figsize=(12, 4));" + "ds.tmpf.plot(figsize=(12, 4))" ] }, { @@ -189,7 +189,7 @@ "ds1 = ds.isel(time=0)\n", "ds1.tmpf.plot(figsize=(12, 4))\n", "(ds1.tmpf_var**0.5).plot(figsize=(12, 4))\n", - "plt.ylabel(\"$\\sigma$ ($^\\circ$C)\");" + "plt.ylabel(\"$\\sigma$ ($^\\circ$C)\")" ] }, { @@ -217,7 +217,7 @@ "outputs": [], "source": [ "ds1.st.plot(figsize=(12, 8))\n", - "ds1.ast.plot();" + "ds1.ast.plot()" ] }, { diff --git a/docs/notebooks/08Calibrate_double_ended.ipynb b/docs/notebooks/08Calibrate_double_ended.ipynb index 2ed0f35b..5f36c954 100644 --- a/docs/notebooks/08Calibrate_double_ended.ipynb +++ b/docs/notebooks/08Calibrate_double_ended.ipynb @@ -157,7 +157,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds.sections = {\n", + "sections = {\n", " \"probe1Temperature\": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath\n", " \"probe2Temperature\": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath\n", "}" @@ -186,10 +186,10 @@ }, "outputs": [], "source": [ - "st_var, resid = ds.variance_stokes_constant(st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes_constant(st_label=\"ast\")\n", - "rst_var, _ = ds.variance_stokes_constant(st_label=\"rst\")\n", - "rast_var, _ = ds.variance_stokes_constant(st_label=\"rast\")" + "st_var, resid = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"ast\")\n", + "rst_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"rst\")\n", + "rast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"rast\")" ] }, { @@ -212,7 +212,7 @@ }, "outputs": [], "source": [ - "resid.plot(figsize=(12, 4));" + "resid.plot(figsize=(12, 4))" ] }, { @@ -237,6 +237,7 @@ "outputs": [], "source": [ "ds.calibration_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -257,7 +258,7 @@ }, "outputs": [], "source": [ - "ds.tmpw.plot(figsize=(12, 4));" + "ds.tmpw.plot(figsize=(12, 4))" ] }, { @@ -295,7 +296,7 @@ "ds.tmpw_var.plot(figsize=(12, 4))\n", "ds1 = ds.isel(time=-1) # take only the first timestep\n", "(ds1.tmpw_var**0.5).plot(figsize=(12, 4))\n", - "plt.gca().set_ylabel(\"Standard error ($^\\circ$C)\");" + "plt.gca().set_ylabel(\"Standard error ($^\\circ$C)\")" ] }, { @@ -323,6 +324,7 @@ "outputs": [], "source": [ "ds.conf_int_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -331,7 +333,7 @@ " mc_sample_size=500,\n", ") # < increase sample size for better approximation\n", "\n", - "ds.tmpw_mc_var.plot(figsize=(12, 4));" + "ds.tmpw_mc_var.plot(figsize=(12, 4))" ] }, { @@ -380,7 +382,7 @@ "(ds1.tmpb_var**0.5).plot()\n", "(ds1.tmpw_var**0.5).plot()\n", "(ds1.tmpw_mc_var**0.5).plot()\n", - "plt.ylabel(\"$\\sigma$ ($^\\circ$C)\");" + "plt.ylabel(\"$\\sigma$ ($^\\circ$C)\")" ] }, { diff --git a/docs/notebooks/12Datastore_from_numpy_arrays.ipynb b/docs/notebooks/12Datastore_from_numpy_arrays.ipynb index 2ddb9a20..9cd233a5 100644 --- a/docs/notebooks/12Datastore_from_numpy_arrays.ipynb +++ b/docs/notebooks/12Datastore_from_numpy_arrays.ipynb @@ -197,13 +197,12 @@ " \"temp1\": [slice(20, 25.5)], # warm bath\n", " \"temp2\": [slice(5.5, 15.5)], # cold bath\n", "}\n", - "ds.sections = sections\n", "\n", - "st_var, resid = ds.variance_stokes_constant(st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes_constant(st_label=\"ast\")\n", - "ds.calibration_single_ended(st_var=st_var, ast_var=ast_var)\n", + "st_var, resid = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"ast\")\n", + "ds.calibration_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)\n", "\n", - "ds.isel(time=0).tmpf.plot();" + "ds.isel(time=0).tmpf.plot()" ] }, { diff --git a/docs/notebooks/13Fixed_parameter_calibration.ipynb b/docs/notebooks/13Fixed_parameter_calibration.ipynb index c5b9fb47..fc2642ae 100644 --- a/docs/notebooks/13Fixed_parameter_calibration.ipynb +++ b/docs/notebooks/13Fixed_parameter_calibration.ipynb @@ -39,8 +39,7 @@ " \"probe1Temperature\": [\n", " slice(20, 25.5)\n", " ], # we only use the warm bath in this notebook\n", - "}\n", - "ds100.sections = sections" + "}" ] }, { @@ -69,10 +68,14 @@ "fix_gamma = (481.9, 0) # (gamma value, gamma variance)\n", "fix_dalpha = (-2.014e-5, 0) # (alpha value, alpha variance)\n", "\n", - "st_var, resid = ds100.variance_stokes_constant(st_label=\"st\")\n", - "ast_var, _ = ds100.variance_stokes_constant(st_label=\"ast\")\n", + "st_var, resid = ds100.variance_stokes_constant(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds100.variance_stokes_constant(sections=sections, st_label=\"ast\")\n", "ds100.calibration_single_ended(\n", - " st_var=st_var, ast_var=ast_var, fix_gamma=fix_gamma, fix_dalpha=fix_dalpha\n", + " sections=sections,\n", + " st_var=st_var,\n", + " ast_var=ast_var,\n", + " fix_gamma=fix_gamma,\n", + " fix_dalpha=fix_dalpha,\n", ")" ] }, @@ -129,7 +132,7 @@ " linewidth=1, label=\"Device calibrated\"\n", ") # plot the temperature calibrated by the device\n", "plt.title(\"Temperature at the first time step\")\n", - "plt.legend();" + "plt.legend()" ] }, { diff --git a/docs/notebooks/14Lossy_splices.ipynb b/docs/notebooks/14Lossy_splices.ipynb index c2c98e6e..977969f3 100644 --- a/docs/notebooks/14Lossy_splices.ipynb +++ b/docs/notebooks/14Lossy_splices.ipynb @@ -72,8 +72,7 @@ "sections = {\n", " \"probe1Temperature\": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath\n", " \"probe2Temperature\": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath\n", - "}\n", - "ds.sections = sections" + "}" ] }, { @@ -122,7 +121,7 @@ "ds.isel(time=0).ast.plot(label=\"ast\")\n", "ds.isel(time=0).rst.plot(label=\"rst\")\n", "ds.isel(time=0).rast.plot(label=\"rast\")\n", - "plt.legend();" + "plt.legend()" ] }, { @@ -147,19 +146,20 @@ "source": [ "ds_a = ds.copy(deep=True)\n", "\n", - "st_var, resid = ds_a.variance_stokes(st_label=\"st\")\n", - "ast_var, _ = ds_a.variance_stokes(st_label=\"ast\")\n", - "rst_var, _ = ds_a.variance_stokes(st_label=\"rst\")\n", - "rast_var, _ = ds_a.variance_stokes(st_label=\"rast\")\n", + "st_var, resid = ds_a.variance_stokes(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"ast\")\n", + "rst_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"rst\")\n", + "rast_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"rast\")\n", "\n", "ds_a.calibration_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", " rast_var=rast_var,\n", ")\n", "\n", - "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\");" + "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\")" ] }, { @@ -184,12 +184,13 @@ }, "outputs": [], "source": [ - "st_var, resid = ds.variance_stokes(st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes(st_label=\"ast\")\n", - "rst_var, _ = ds.variance_stokes(st_label=\"rst\")\n", - "rast_var, _ = ds.variance_stokes(st_label=\"rast\")\n", + "st_var, resid = ds.variance_stokes(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds.variance_stokes(sections=sections, st_label=\"ast\")\n", + "rst_var, _ = ds.variance_stokes(sections=sections, st_label=\"rst\")\n", + "rast_var, _ = ds.variance_stokes(sections=sections, st_label=\"rast\")\n", "\n", "ds.calibration_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -199,7 +200,7 @@ "\n", "ds_a.isel(time=0).tmpw.plot(label=\"no trans. att.\")\n", "ds.isel(time=0).tmpw.plot(label=\"with trans. att.\")\n", - "plt.legend();" + "plt.legend()" ] }, { diff --git a/docs/notebooks/15Matching_sections.ipynb b/docs/notebooks/15Matching_sections.ipynb index a83c3730..3017c04b 100644 --- a/docs/notebooks/15Matching_sections.ipynb +++ b/docs/notebooks/15Matching_sections.ipynb @@ -68,8 +68,7 @@ "sections = {\n", " \"probe1Temperature\": [slice(7.5, 17.0)], # cold bath\n", " \"probe2Temperature\": [slice(24.0, 34.0)], # warm bath\n", - "}\n", - "ds.sections = sections" + "}" ] }, { @@ -123,13 +122,17 @@ "source": [ "ds_a = ds.copy(deep=True)\n", "\n", - "st_var, resid = ds_a.variance_stokes(st_label=\"st\")\n", - "ast_var, _ = ds_a.variance_stokes(st_label=\"ast\")\n", - "rst_var, _ = ds_a.variance_stokes(st_label=\"rst\")\n", - "rast_var, _ = ds_a.variance_stokes(st_label=\"rast\")\n", + "st_var, resid = ds_a.variance_stokes(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"ast\")\n", + "rst_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"rst\")\n", + "rast_var, _ = ds_a.variance_stokes(sections=sections, st_label=\"rast\")\n", "\n", "ds_a.calibration_double_ended(\n", - " st_var=st_var, ast_var=ast_var, rst_var=rst_var, rast_var=rast_var\n", + " sections=sections,\n", + " st_var=st_var,\n", + " ast_var=ast_var,\n", + " rst_var=rst_var,\n", + " rast_var=rast_var,\n", ")\n", "\n", "ds_a.isel(time=0).tmpw.plot(label=\"calibrated\")" @@ -165,12 +168,13 @@ "source": [ "matching_sections = [(slice(7.5, 17.6), slice(69, 79.1), False)]\n", "\n", - "st_var, resid = ds.variance_stokes(st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes(st_label=\"ast\")\n", - "rst_var, _ = ds.variance_stokes(st_label=\"rst\")\n", - "rast_var, _ = ds.variance_stokes(st_label=\"rast\")\n", + "st_var, resid = ds.variance_stokes(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds.variance_stokes(sections=sections, st_label=\"ast\")\n", + "rst_var, _ = ds.variance_stokes(sections=sections, st_label=\"rst\")\n", + "rast_var, _ = ds.variance_stokes(sections=sections, st_label=\"rast\")\n", "\n", "ds.calibration_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", diff --git a/docs/notebooks/16Averaging_temperatures.ipynb b/docs/notebooks/16Averaging_temperatures.ipynb index 6079bd9f..2d97bb85 100644 --- a/docs/notebooks/16Averaging_temperatures.ipynb +++ b/docs/notebooks/16Averaging_temperatures.ipynb @@ -63,8 +63,7 @@ "sections = {\n", " \"probe1Temperature\": [slice(7.5, 17.0), slice(70.0, 80.0)], # cold bath\n", " \"probe2Temperature\": [slice(24.0, 34.0), slice(85.0, 95.0)], # warm bath\n", - "}\n", - "ds.sections = sections" + "}" ] }, { @@ -80,10 +79,10 @@ }, "outputs": [], "source": [ - "st_var, resid = ds.variance_stokes(st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes(st_label=\"ast\")\n", - "rst_var, _ = ds.variance_stokes(st_label=\"rst\")\n", - "rast_var, _ = ds.variance_stokes(st_label=\"rast\")" + "st_var, resid = ds.variance_stokes(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds.variance_stokes(sections=sections, st_label=\"ast\")\n", + "rst_var, _ = ds.variance_stokes(sections=sections, st_label=\"rst\")\n", + "rast_var, _ = ds.variance_stokes(sections=sections, st_label=\"rast\")" ] }, { @@ -100,6 +99,7 @@ "outputs": [], "source": [ "ds.calibration_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -179,6 +179,7 @@ "outputs": [], "source": [ "ds.average_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -190,7 +191,7 @@ " ci_avg_time_isel=[0, 1, 2, 3, 4, 5],\n", " ci_avg_time_sel=None,\n", ")\n", - "ds.tmpw_mc_avg1.plot(hue=\"CI\", linewidth=0.8);" + "ds.tmpw_mc_avg1.plot(hue=\"CI\", linewidth=0.8)" ] }, { @@ -239,6 +240,7 @@ "outputs": [], "source": [ "ds.average_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -250,7 +252,7 @@ " ci_avg_time_isel=[0, 1, 2, 3, 4, 5],\n", " ci_avg_time_sel=None,\n", ")\n", - "ds.tmpw_mc_avg2.plot(hue=\"CI\", linewidth=0.8);" + "ds.tmpw_mc_avg2.plot(hue=\"CI\", linewidth=0.8)" ] }, { @@ -299,6 +301,7 @@ "outputs": [], "source": [ "ds.average_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -310,7 +313,7 @@ " ci_avg_x_sel=slice(7.5, 17.0),\n", " ci_avg_x_isel=None,\n", ")\n", - "ds.tmpw_mc_avgx1.plot(hue=\"CI\", linewidth=0.8);" + "ds.tmpw_mc_avgx1.plot(hue=\"CI\", linewidth=0.8)" ] }, { @@ -359,6 +362,7 @@ "outputs": [], "source": [ "ds.average_double_ended(\n", + " sections=sections,\n", " st_var=st_var,\n", " ast_var=ast_var,\n", " rst_var=rst_var,\n", @@ -370,7 +374,7 @@ " ci_avg_x_sel=slice(7.5, 17.0),\n", " ci_avg_x_isel=None,\n", ")\n", - "ds.tmpw_mc_avgx2.plot(hue=\"CI\", linewidth=0.8);" + "ds.tmpw_mc_avgx2.plot(hue=\"CI\", linewidth=0.8)" ] }, { diff --git a/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb b/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb index f270404f..301e4963 100644 --- a/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb +++ b/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb @@ -32,15 +32,15 @@ "ds = read_silixa_files(directory=filepath, timezone_netcdf=\"UTC\", file_ext=\"*.xml\")\n", "\n", "ds = ds.sel(x=slice(-30, 101)) # dismiss parts of the fiber that are not interesting\n", - "ds.sections = {\n", + "sections = {\n", " \"probe1Temperature\": [slice(20, 25.5)], # warm bath\n", " \"probe2Temperature\": [slice(5.5, 15.5)], # cold bath\n", "}\n", "\n", - "st_var, resid = ds.variance_stokes_constant(st_label=\"st\")\n", - "ast_var, _ = ds.variance_stokes_constant(st_label=\"ast\")\n", + "st_var, resid = ds.variance_stokes_constant(sections=sections, st_label=\"st\")\n", + "ast_var, _ = ds.variance_stokes_constant(sections=sections, st_label=\"ast\")\n", "\n", - "ds.calibration_single_ended(st_var=st_var, ast_var=ast_var)" + "ds.calibration_single_ended(sections=sections, st_var=st_var, ast_var=ast_var)" ] }, { @@ -73,7 +73,7 @@ "plt.fill_between(ds1.x, y1=ds1.tmpf_var, y2=stast_var, label=\"Parameter estimation\")\n", "plt.suptitle(\"Variance of the estimated temperature\")\n", "plt.ylabel(\"$\\sigma^2$ ($^\\circ$C$^2$)\")\n", - "plt.legend(fontsize=\"small\");" + "plt.legend(fontsize=\"small\")" ] }, { @@ -84,7 +84,7 @@ "outputs": [], "source": [ "# The effects of the parameter uncertainty can be further inspected\n", - "ds1.var_fw_da.plot(hue=\"comp_fw\", figsize=(12, 4));" + "ds1.var_fw_da.plot(hue=\"comp_fw\", figsize=(12, 4))" ] }, { @@ -145,7 +145,7 @@ "(ds1.tmpf_mc_var**0.5).plot(figsize=(12, 4), label=\"Monte Carlo approx.\")\n", "(ds1.tmpf_var**0.5).plot(label=\"Linear error approx.\")\n", "plt.ylabel(\"$\\sigma$ ($^\\circ$C)\")\n", - "plt.legend(fontsize=\"small\");" + "plt.legend(fontsize=\"small\")" ] }, { @@ -166,7 +166,7 @@ "ds1.tmpf.plot(linewidth=0.7, figsize=(12, 4))\n", "ds1.tmpf_mc.sel(CI=2.5).plot(linewidth=0.7, label=\"CI: 2.5%\")\n", "ds1.tmpf_mc.sel(CI=97.5).plot(linewidth=0.7, label=\"CI: 97.5%\")\n", - "plt.legend(fontsize=\"small\");" + "plt.legend(fontsize=\"small\")" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 315e0bb7..682d0c3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["hatchling>=1.8.0", "hatch-vcs", "hatch-fancy-pypi-readme"] +requires = ["hatchling>=1.8.0", "hatch-vcs"] build-backend = "hatchling.build" [tool.hatch.version] @@ -19,17 +19,15 @@ disable = true # Requires confirmation when publishing to pypi. [project] name = "dtscalibration" -description = "A Python package to load raw DTS files, perform a calibration, and plot the result." +description = "Load Distributed Temperature Sensing (DTS) files, calibrate the temperature and estimate its uncertainty." readme = "README.rst" license = "BSD-3-Clause" requires-python = ">=3.9, <3.12" authors = [ - {email = "bdestombe@gmail.com"}, - {name = "Bas des Tombe, Bart Schilperoort"} + {name = "Bas des Tombe, Bart Schilperoort"}, ] maintainers = [ - {name = "Bas des Tombe", email = "bdestombe@gmail.com"}, - {name = "Bart Schilperoort", email = "b.schilperoort@gmail.com"}, + {name = "Bas des Tombe, Bart Schilperoort"}, ] keywords = [ "DTS", @@ -52,17 +50,19 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "numpy>=1.22.4", # xarray: recommended to use >= 1.22 for full quantile method support + "numpy>=1.22.4", # >= 1.22 for full quantile method support in xarray "dask", "pandas", - "xarray[parallel,accel]", + "xarray[parallel]", # numbagg (llvmlite) is a pain to install with pip + "bottleneck", # speeds up Xarray + "flox", # speeds up Xarray "pyyaml>=6.0.1", "xmltodict", "scipy", "statsmodels", "matplotlib", "netCDF4>=1.6.4", - "nc-time-axis>=1.4.1" # optional plot dependency of xarray + "nc-time-axis>=1.4.1" # plot dependency of xarray ] dynamic = ["version"] @@ -72,15 +72,16 @@ dev = [ "bump2version", "ruff", "isort", - "black[jupyter]", - "mypy", - "types-PyYAML", # for pyyaml types + "black[jupyter]", # for formatting + "mypy", # for type checking + "types-PyYAML", # for pyyaml types "types-xmltodict", # for xmltodict types - "pandas-stubs", # for pandas types + "pandas-stubs", # for pandas types "pytest", - "pytest-cov", + "pytest-cov", # for coverage + "pytest-xdist", # for parallel testing "jupyter", - "nbformat", # Needed to run the tests + "nbformat", # Needed to run the tests ] docs = [ # Required for ReadTheDocs "IPython", @@ -110,8 +111,8 @@ lint = [ "mypy src/", ] format = ["black .", "isort .", "lint",] -test = ["pytest ./src/ ./tests/",] # --doctest-modules -fast-test = ["pytest ./tests/ -m \"not slow\"",] +test = ["pytest -n auto --dist worksteal ./src/ ./tests/",] +fast-test = ["pytest -n auto --dist worksteal ./tests/ -m \"not slow\"",] coverage = [ "pytest --cov --cov-report term --cov-report xml --junitxml=xunit-result.xml tests/", ] @@ -147,7 +148,7 @@ select = [ # It would be nice to have the commented out checks working. # "D", # pydocstyle # "C90", # mccabe complexity # "N", # PEP8-naming - # "UP", # pyupgrade (upgrade syntax to current syntax) + "UP", # pyupgrade (upgrade syntax to current syntax) "PLE", # Pylint error https://github.com/charliermarsh/ruff#error-ple # "PLR", # Pylint refactor (e.g. too-many-arguments) # "PLW", # Pylint warning (useless-else-on-loop) @@ -163,7 +164,7 @@ ignore = [ "E501", # Line too long (want to have fixed ] # Allow autofix for all enabled rules (when `--fix`) is provided. -fixable = ["E", "F"] +fixable = ["E", "F", "UP", "PLE"] unfixable = [] line-length = 88 exclude = ["docs", "build"] diff --git a/src/dtscalibration/__init__.py b/src/dtscalibration/__init__.py index 3836a779..4ecabb02 100644 --- a/src/dtscalibration/__init__.py +++ b/src/dtscalibration/__init__.py @@ -1,4 +1,3 @@ -# coding=utf-8 from dtscalibration.datastore import DataStore from dtscalibration.datastore_utils import check_dims from dtscalibration.datastore_utils import check_timestep_allclose diff --git a/src/dtscalibration/averaging_utils.py b/src/dtscalibration/averaging_utils.py new file mode 100644 index 00000000..d17b1f56 --- /dev/null +++ b/src/dtscalibration/averaging_utils.py @@ -0,0 +1,69 @@ +def inverse_variance_weighted_mean( + self, + tmp1="tmpf", + tmp2="tmpb", + tmp1_var="tmpf_mc_var", + tmp2_var="tmpb_mc_var", + tmpw_store="tmpw", + tmpw_var_store="tmpw_var", +): + """Compute inverse variance weighted average, and add result in-place. + + Parameters + ---------- + tmp1 : str + The label of the first temperature dataset that is averaged + tmp2 : str + The label of the second temperature dataset that is averaged + tmp1_var : str + The variance of tmp1 + tmp2_var : str + The variance of tmp2 + tmpw_store : str + The label of the averaged temperature dataset + tmpw_var_store : str + The label of the variance of the averaged temperature dataset + + Returns + ------- + + """ + + self[tmpw_var_store] = 1 / (1 / self[tmp1_var] + 1 / self[tmp2_var]) + + self[tmpw_store] = ( + self[tmp1] / self[tmp1_var] + self[tmp2] / self[tmp2_var] + ) * self[tmpw_var_store] + + pass + + +def inverse_variance_weighted_mean_array( + self, + tmp_label="tmpf", + tmp_var_label="tmpf_mc_var", + tmpw_store="tmpw", + tmpw_var_store="tmpw_var", + dim="time", +): + """ + Calculates the weighted average across a dimension. + + Parameters + ---------- + + Returns + ------- + + See Also + -------- + - https://en.wikipedia.org/wiki/Inverse-variance_weighting + + """ + self[tmpw_var_store] = 1 / (1 / self[tmp_var_label]).sum(dim=dim) + + self[tmpw_store] = (self[tmp_label] / self[tmp_var_label]).sum(dim=dim) / ( + 1 / self[tmp_var_label] + ).sum(dim=dim) + + pass diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index 6e90e208..960fa220 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -1,4 +1,3 @@ -# coding=utf-8 import numpy as np import scipy.sparse as sp import statsmodels.api as sm @@ -48,6 +47,7 @@ def parse_st_var(ds, st_var, st_label="st"): def calibration_single_ended_helper( self, + sections, st_var, ast_var, fix_alpha, @@ -67,6 +67,7 @@ def calibration_single_ended_helper( calc_cov = True split = calibration_single_ended_solver( self, + sections, st_var, ast_var, calc_cov=calc_cov, @@ -189,6 +190,7 @@ def calibration_single_ended_helper( def calibration_single_ended_solver( # noqa: MC0001 ds, + sections=None, st_var=None, ast_var=None, calc_cov=True, @@ -235,7 +237,7 @@ def calibration_single_ended_solver( # noqa: MC0001 """ # get ix_sec argsort so the sections are in order of increasing x - ix_sec = ds.ufunc_per_section(x_indices=True, calc_per="all") + ix_sec = ds.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") ds_sec = ds.isel(x=ix_sec) x_sec = ds_sec["x"].values @@ -255,7 +257,7 @@ def calibration_single_ended_solver( # noqa: MC0001 # X \gamma # Eq.34 cal_ref = ds.ufunc_per_section( - label="st", ref_temp_broadcasted=True, calc_per="all" + sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" ) # cal_ref = cal_ref # sort by increasing x data_gamma = 1 / (cal_ref.T.ravel() + 273.15) # gamma @@ -485,6 +487,7 @@ def calibration_single_ended_solver( # noqa: MC0001 def calibration_double_ended_helper( self, + sections, st_var, ast_var, rst_var, @@ -503,6 +506,7 @@ def calibration_double_ended_helper( if fix_alpha or fix_gamma: split = calibration_double_ended_solver( self, + sections, st_var, ast_var, rst_var, @@ -515,6 +519,7 @@ def calibration_double_ended_helper( else: out = calibration_double_ended_solver( self, + sections, st_var, ast_var, rst_var, @@ -1102,6 +1107,7 @@ def calibration_double_ended_helper( def calibration_double_ended_solver( # noqa: MC0001 ds, + sections=None, st_var=None, ast_var=None, rst_var=None, @@ -1167,7 +1173,7 @@ def calibration_double_ended_solver( # noqa: MC0001 ------- """ - ix_sec = ds.ufunc_per_section(x_indices=True, calc_per="all") + ix_sec = ds.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") ds_sec = ds.isel(x=ix_sec) ix_alpha_is_zero = ix_sec[0] # per definition of E @@ -1187,7 +1193,7 @@ def calibration_double_ended_solver( # noqa: MC0001 rast_var=rast_var, ix_alpha_is_zero=ix_alpha_is_zero, ) - df_est, db_est = calc_df_db_double_est(ds, ix_alpha_is_zero, 485.0) + df_est, db_est = calc_df_db_double_est(ds, sections, ix_alpha_is_zero, 485.0) ( E, @@ -1196,7 +1202,7 @@ def calibration_double_ended_solver( # noqa: MC0001 Zero_d, Z_TA_fw, Z_TA_bw, - ) = construct_submatrices(nt, nx_sec, ds, ds.trans_att.values, x_sec) + ) = construct_submatrices(sections, nt, nx_sec, ds, ds.trans_att.values, x_sec) # y # Eq.41--45 y_F = np.log(ds_sec.st / ds_sec.ast).values.ravel() @@ -1822,7 +1828,7 @@ def construct_submatrices_matching_sections(x, ix_sec, hix, tix, nt, trans_att): ) -def construct_submatrices(nt, nx, ds, trans_att, x_sec): +def construct_submatrices(sections, nt, nx, ds, trans_att, x_sec): """Wrapped in a function to reduce memory usage. E is zero at the first index of the reference section (ds_sec) Constructing: @@ -1840,7 +1846,9 @@ def construct_submatrices(nt, nx, ds, trans_att, x_sec): # Z \gamma # Eq.47 cal_ref = np.array( - ds.ufunc_per_section(label="st", ref_temp_broadcasted=True, calc_per="all") + ds.ufunc_per_section( + sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" + ) ) data_gamma = 1 / (cal_ref.ravel() + 273.15) # gamma coord_gamma_row = np.arange(nt * nx, dtype=int) @@ -2213,7 +2221,7 @@ def calc_alpha_double( return E, E_var -def calc_df_db_double_est(ds, ix_alpha_is_zero, gamma_est): +def calc_df_db_double_est(ds, sections, ix_alpha_is_zero, gamma_est): Ifwx0 = np.log( ds.st.isel(x=ix_alpha_is_zero) / ds.ast.isel(x=ix_alpha_is_zero) ).values @@ -2221,9 +2229,9 @@ def calc_df_db_double_est(ds, ix_alpha_is_zero, gamma_est): ds.rst.isel(x=ix_alpha_is_zero) / ds.rast.isel(x=ix_alpha_is_zero) ).values ref_temps_refs = ds.ufunc_per_section( - label="st", ref_temp_broadcasted=True, calc_per="all" + sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" ) - ix_sec = ds.ufunc_per_section(x_indices=True, calc_per="all") + ix_sec = ds.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") ref_temps_x0 = ( ref_temps_refs[ix_sec == ix_alpha_is_zero].flatten().compute() + 273.15 ) diff --git a/src/dtscalibration/calibration/section_utils.py b/src/dtscalibration/calibration/section_utils.py index 126d575b..2d1ef100 100644 --- a/src/dtscalibration/calibration/section_utils.py +++ b/src/dtscalibration/calibration/section_utils.py @@ -1,6 +1,3 @@ -from typing import Dict -from typing import List - import numpy as np import xarray as xr import yaml @@ -8,7 +5,7 @@ from dtscalibration.datastore_utils import ufunc_per_section_helper -def set_sections(ds: xr.Dataset, sections: Dict[str, List[slice]]) -> xr.Dataset: +def set_sections(ds: xr.Dataset, sections: dict[str, list[slice]]) -> xr.Dataset: sections_validated = None if sections is not None: @@ -18,7 +15,7 @@ def set_sections(ds: xr.Dataset, sections: Dict[str, List[slice]]) -> xr.Dataset return ds -def validate_sections(ds: xr.Dataset, sections: Dict[str, List[slice]]): +def validate_sections(ds: xr.Dataset, sections: dict[str, list[slice]]): assert isinstance(sections, dict) # be less restrictive for capitalized labels @@ -135,7 +132,7 @@ def ufunc_per_section( 1. Calculate the variance of the residuals in the along ALL the\ reference sections wrt the temperature of the water baths - >>> tmpf_var = d.ufunc_per_section( + >>> tmpf_var = d.ufunc_per_section(sections, >>> func='var', >>> calc_per='all', >>> label='tmpf', @@ -144,7 +141,7 @@ def ufunc_per_section( 2. Calculate the variance of the residuals in the along PER\ reference section wrt the temperature of the water baths - >>> tmpf_var = d.ufunc_per_section( + >>> tmpf_var = d.ufunc_per_section(sections, >>> func='var', >>> calc_per='stretch', >>> label='tmpf', @@ -153,7 +150,7 @@ def ufunc_per_section( 3. Calculate the variance of the residuals in the along PER\ water bath wrt the temperature of the water baths - >>> tmpf_var = d.ufunc_per_section( + >>> tmpf_var = d.ufunc_per_section(sections, >>> func='var', >>> calc_per='section', >>> label='tmpf', @@ -161,7 +158,7 @@ def ufunc_per_section( 4. Obtain the coordinates of the measurements per section - >>> locs = d.ufunc_per_section( + >>> locs = d.ufunc_per_section(sections, >>> func=None, >>> label='x', >>> temp_err=False, @@ -170,7 +167,7 @@ def ufunc_per_section( 5. Number of observations per stretch - >>> nlocs = d.ufunc_per_section( + >>> nlocs = d.ufunc_per_section(sections, >>> func=len, >>> label='x', >>> temp_err=False, @@ -182,14 +179,14 @@ def ufunc_per_section( temperature (a timeseries) is broadcasted to the shape of self[\ label]. The self[label] is not used for anything else. - >>> temp_ref = d.ufunc_per_section( + >>> temp_ref = d.ufunc_per_section(sections, >>> label='st', >>> ref_temp_broadcasted=True, >>> calc_per='all') 7. x-coordinate index - >>> ix_loc = d.ufunc_per_section(x_indices=True) + >>> ix_loc = d.ufunc_per_section(sections, x_indices=True) Note diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 01faf0c6..d25b4ff6 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -4,12 +4,9 @@ import dask import dask.array as da import numpy as np -import scipy.sparse as sp import scipy.stats as sst import xarray as xr import yaml -from scipy.optimize import minimize -from scipy.sparse import linalg as ln from dtscalibration.calibrate_utils import calibration_double_ended_helper from dtscalibration.calibrate_utils import calibration_single_ended_helper @@ -21,8 +18,13 @@ from dtscalibration.datastore_utils import ParameterIndexSingleEnded from dtscalibration.datastore_utils import check_deprecated_kwargs from dtscalibration.datastore_utils import check_timestep_allclose +from dtscalibration.datastore_utils import get_params_from_pval_double_ended +from dtscalibration.datastore_utils import get_params_from_pval_single_ended from dtscalibration.datastore_utils import ufunc_per_section_helper from dtscalibration.io_utils import _dim_attrs +from dtscalibration.variance_helpers import variance_stokes_constant_helper +from dtscalibration.variance_helpers import variance_stokes_exponential_helper +from dtscalibration.variance_helpers import variance_stokes_linear_helper dtsattr_namelist = ["double_ended_flag"] dim_attrs = {k: v for kl, v in _dim_attrs.items() for k in kl} @@ -143,19 +145,16 @@ def __repr__(self): unit = "" for k, v in self.sections.items(): - preamble_new += " {0: <23}".format(k) + preamble_new += f" {k: <23}" # Compute statistics reference section timeseries - sec_stat = "({0:6.2f}".format(float(self[k].mean())) - sec_stat += " +/-{0:5.2f}".format(float(self[k].std())) + sec_stat = f"({float(self[k].mean()):6.2f}" + sec_stat += f" +/-{float(self[k].std()):5.2f}" sec_stat += "\N{DEGREE SIGN}C)\t" preamble_new += sec_stat # print sections - vl = [ - "{0:.2f}{2} - {1:.2f}{2}".format(vi.start, vi.stop, unit) - for vi in v - ] + vl = [f"{vi.start:.2f}{unit} - {vi.stop:.2f}{unit}" for vi in v] preamble_new += " and ".join(vl) + "\n" else: @@ -213,6 +212,14 @@ def sections(self): def sections(self): self.attrs["_sections"] = yaml.dump(None) + @sections.setter + def sections(self, value): + msg = ( + "Not possible anymore. Instead, pass the sections as an argument to \n" + "ds.dts.calibrate_single_ended() or ds.dts.calibrate_double_ended()." + ) + raise DeprecationWarning(msg) + def check_reference_section_values(self): """ Checks if the values of the used sections are of the right datatype @@ -416,7 +423,7 @@ def to_netcdf( if value is None: self.attrs[attribute] = "" - return super(DataStore, self).to_netcdf( + return super().to_netcdf( path, mode, format=format, @@ -543,7 +550,7 @@ def to_mf_netcdf( paths = [ os.path.join( folder_path, - filename_preamble + "{:04d}".format(ix) + filename_extension, + filename_preamble + f"{ix:04d}" + filename_extension, ) for ix in range(len(x)) ] @@ -870,23 +877,12 @@ def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=Tr dtscalibration/python-dts-calibration/blob/main/examples/notebooks/\ 04Calculate_variance_Stokes.ipynb>`_ """ - - # var_I, resid = variance_stokes_constant_util(st_label, sections=None, reshape_residuals=True) - # def variance_stokes_constant_util(st_label, sections=None, reshape_residuals=True): - def func_fit(p, xs): - return p[:xs, None] * p[None, xs:] - - def func_cost(p, data, xs): - fit = func_fit(p, xs) - return np.sum((fit - data) ** 2) - if sections is None: sections = self.sections else: sections = validate_sections(self, sections) - assert self[st_label].dims[0] == "x", "Stokes are transposed" - + assert self[st_label].dims[0] == "x", f"{st_label} are transposed" check_timestep_allclose(self, eps=0.01) # should maybe be per section. But then residuals @@ -897,25 +893,7 @@ def func_cost(p, data, xs): ) )[0] - resid_list = [] - - for k, v in data_dict.items(): - for vi in v: - nxs, nt = vi.shape - npar = nt + nxs - - p1 = np.ones(npar) * vi.mean() ** 0.5 - - res = minimize(func_cost, p1, args=(vi, nxs), method="Powell") - assert res.success, "Unable to fit. Try variance_stokes_exponential" - - fit = func_fit(res.x, nxs) - resid_list.append(fit - vi) - - resid = np.concatenate(resid_list) - - # unbiased estimater ddof=1, originally thought it was npar - var_I = resid.var(ddof=1) + var_I, resid = variance_stokes_constant_helper(data_dict) if not reshape_residuals: return var_I, resid @@ -1014,7 +992,12 @@ def variance_stokes_exponential( Parameters ---------- - reshape_residuals + suppress_info : bool, optional + Suppress print statements. + use_statsmodels : bool, optional + Use statsmodels to fit the exponential. If `False`, use scipy. + reshape_residuals : bool, optional + Reshape the residuals to the shape of the Stokes intensity st_label : str label of the Stokes, anti-Stokes measurement. E.g., st, ast, rst, rast @@ -1084,89 +1067,12 @@ def variance_stokes_exponential( x_list.append(da.tile(_x, nt)) len_stretch_list.append(_x.size) - n_sections = len(len_stretch_list) # number of sections - n_locs = sum(len_stretch_list) # total number of locations along cable used - # for reference. - x = np.concatenate(x_list) # coordinates are already in memory y = np.concatenate(y_list) - data1 = x - data2 = np.ones(sum(len_stretch_list) * nt) - data = np.concatenate([data1, data2]) - - # alpha is NOT the same for all -> one column per section - coords1row = np.arange(nt * n_locs) - coords1col = np.hstack( - [np.ones(in_locs * nt) * i for i, in_locs in enumerate(len_stretch_list)] - ) # C for - - # second calibration parameter is different per section and per timestep - coords2row = np.arange(nt * n_locs) - coords2col = np.hstack( - [ - np.repeat( - np.arange(i * nt + n_sections, (i + 1) * nt + n_sections), in_locs - ) - for i, in_locs in enumerate(len_stretch_list) - ] - ) # C for - coords = ( - np.concatenate([coords1row, coords2row]), - np.concatenate([coords1col, coords2col]), - ) - - lny = np.log(y) - w = y.copy() # 1/std. - - ddof = n_sections + nt * n_sections # see numpy documentation on ddof - - if use_statsmodels: - # returns the same answer with statsmodel - import statsmodels.api as sm - - X = sp.coo_matrix( - (data, coords), shape=(nt * n_locs, ddof), dtype=float, copy=False - ) - - mod_wls = sm.WLS(lny, X.toarray(), weights=w**2) - res_wls = mod_wls.fit() - # print(res_wls.summary()) - a = res_wls.params - - else: - wdata = data * np.hstack((w, w)) - wX = sp.coo_matrix( - (wdata, coords), - shape=(nt * n_locs, n_sections + nt * n_sections), - dtype=float, - copy=False, - ) - - wlny = lny * w - - p0_est = np.asarray(n_sections * [0.0] + nt * n_sections * [8]) - # noinspection PyTypeChecker - a = ln.lsqr(wX, wlny, x0=p0_est, show=not suppress_info, calc_var=False)[0] - - beta = a[:n_sections] - beta_expand_to_sec = np.hstack( - [ - np.repeat(float(beta[i]), leni * nt) - for i, leni in enumerate(len_stretch_list) - ] + var_I, resid = variance_stokes_exponential_helper( + nt, x, y, len_stretch_list, use_statsmodels, suppress_info ) - G = np.asarray(a[n_sections:]) - G_expand_to_sec = np.hstack( - [ - np.repeat(G[i * nt : (i + 1) * nt], leni) - for i, leni in enumerate(len_stretch_list) - ] - ) - - I_est = np.exp(G_expand_to_sec) * np.exp(x * beta_expand_to_sec) - resid = I_est - y - var_I = resid.var(ddof=1) if not reshape_residuals: return var_I, resid @@ -1324,60 +1230,25 @@ def variance_stokes_linear( sections = validate_sections(self, sections) assert self[st_label].dims[0] == "x", "Stokes are transposed" - _, resid = self.variance_stokes(sections=sections, st_label=st_label) + _, resid = self.variance_stokes( + sections=sections, st_label=st_label, reshape_residuals=False + ) ix_sec = self.ufunc_per_section( sections=sections, x_indices=True, calc_per="all" ) - st = self.isel(x=ix_sec)[st_label].values.ravel() - diff_st = resid.isel(x=ix_sec).values.ravel() - - # Adjust nbin silently to fit residuals in - # rectangular matrix and use numpy for computation - nbin_ = nbin - while st.size % nbin_: - nbin_ -= 1 - - if nbin_ != nbin: - print( - "Estimation of linear variance of", - st_label, - "Adjusting nbin to:", - nbin_, - ) - nbin = nbin_ - - isort = np.argsort(st) - st_sort_mean = st[isort].reshape((nbin, -1)).mean(axis=1) - st_sort_var = diff_st[isort].reshape((nbin, -1)).var(axis=1) - if through_zero: - # VAR(Stokes) = slope * Stokes - offset = 0.0 - slope = np.linalg.lstsq(st_sort_mean[:, None], st_sort_var, rcond=None)[0] + st = self[st_label].isel(x=ix_sec).values.ravel() + diff_st = resid.ravel() - else: - # VAR(Stokes) = slope * Stokes + offset - slope, offset = np.linalg.lstsq( - np.hstack((st_sort_mean[:, None], np.ones((nbin, 1)))), - st_sort_var, - rcond=None, - )[0] - - if offset < 0: - warnings.warn( - f"Warning! Offset of variance_stokes_linear() " - f"of {st_label} is negative. This is phisically " - f"not possible. Most likely, your {st_label} do " - f"not vary enough to fit a linear curve. Either " - f"use `through_zero` option or use " - f"`ds.variance_stokes_constant()`. Another reason " - f"could be that your sections are defined to be " - f"wider than they actually are." - ) - - def var_fun(stokes): - return slope * stokes + offset + ( + slope, + offset, + st_sort_mean, + st_sort_var, + resid, + var_fun, + ) = variance_stokes_linear_helper(st, diff_st, nbin, through_zero) if plot_fit: plt.figure() @@ -1458,80 +1329,6 @@ def i_var(self, st_var, ast_var, st_label="st", ast_label="ast"): return st**-2 * st_var + ast**-2 * ast_var - def inverse_variance_weighted_mean( - self, - tmp1="tmpf", - tmp2="tmpb", - tmp1_var="tmpf_mc_var", - tmp2_var="tmpb_mc_var", - tmpw_store="tmpw", - tmpw_var_store="tmpw_var", - ): - """ - Average two temperature datasets with the inverse of the variance as - weights. The two - temperature datasets `tmp1` and `tmp2` with their variances - `tmp1_var` and `tmp2_var`, - respectively. Are averaged and stored in the DataStore. - - Parameters - ---------- - tmp1 : str - The label of the first temperature dataset that is averaged - tmp2 : str - The label of the second temperature dataset that is averaged - tmp1_var : str - The variance of tmp1 - tmp2_var : str - The variance of tmp2 - tmpw_store : str - The label of the averaged temperature dataset - tmpw_var_store : str - The label of the variance of the averaged temperature dataset - - Returns - ------- - - """ - - self[tmpw_var_store] = 1 / (1 / self[tmp1_var] + 1 / self[tmp2_var]) - - self[tmpw_store] = ( - self[tmp1] / self[tmp1_var] + self[tmp2] / self[tmp2_var] - ) * self[tmpw_var_store] - - pass - - def inverse_variance_weighted_mean_array( - self, - tmp_label="tmpf", - tmp_var_label="tmpf_mc_var", - tmpw_store="tmpw", - tmpw_var_store="tmpw_var", - dim="time", - ): - """ - Calculates the weighted average across a dimension. - - Parameters - ---------- - - Returns - ------- - - See Also - -------- - - https://en.wikipedia.org/wiki/Inverse-variance_weighting - - """ - self[tmpw_var_store] = 1 / (1 / self[tmp_var_label]).sum(dim=dim) - - self[tmpw_store] = (self[tmp_label] / self[tmp_var_label]).sum(dim=dim) / ( - 1 / self[tmp_var_label] - ).sum(dim=dim) - - pass - def set_trans_att(self, trans_att=None): """Gracefully set the locations that introduce directional differential attenuation @@ -1700,6 +1497,9 @@ def calibration_single_ended( variance of the estimate of dalpha. Covariances between alpha and other parameters are not accounted for. + fix_alpha : Tuple[array-like, array-like], optional + A tuple containing two array-likes. The first array-like is the integrated + differential attenuation of length x, and the second item is its variance. Returns ------- @@ -1740,7 +1540,9 @@ def calibration_single_ended( else: matching_indices = None - ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") + ix_sec = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) assert not np.any(self.st.isel(x=ix_sec) <= 0.0), ( "There is uncontrolled noise in the ST signal. Are your sections" "correctly defined?" @@ -1753,6 +1555,7 @@ def calibration_single_ended( if method == "wls": p_cov, p_val, p_var = calibration_single_ended_helper( self, + sections, st_var, ast_var, fix_alpha, @@ -1790,127 +1593,76 @@ def calibration_single_ended( assert p_cov.shape == (ip.npar, ip.npar) # store calibration parameters in DataStore - self["gamma"] = (tuple(), p_val[ip.gamma].item()) - self["gamma_var"] = (tuple(), p_var[ip.gamma].item()) - - if nta > 0: - self["talpha_fw"] = ( - ("trans_att", "time"), - ip.get_taf_pars(p_val), - ) - self["talpha_fw_var"] = ( - ("trans_att", "time"), - ip.get_taf_pars(p_var), - ) - else: - self["talpha_fw"] = (("trans_att", "time"), np.zeros((0, nt))) - self["talpha_fw_var"] = (("trans_att", "time"), np.zeros((0, nt))) - - self["c"] = (("time",), p_val[ip.c]) - self["c_var"] = (("time",), p_var[ip.c]) - - if fix_alpha: - self["alpha"] = (("x",), fix_alpha[0]) - self["alpha_var"] = (("x",), fix_alpha[1]) - - else: - self["dalpha"] = (tuple(), p_val[ip.dalpha].item()) - self["dalpha_var"] = (tuple(), p_var[ip.dalpha].item()) - - self["alpha"] = self.dalpha * self.x - self["alpha_var"] = self["dalpha_var"] * self.x**2 - - self["talpha_fw_full"] = ( - ("x", "time"), - ip.get_taf_values( - pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" - ), - ) - self["talpha_fw_full_var"] = ( - ("x", "time"), - ip.get_taf_values( - pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" - ), + coords = {"x": self["x"], "time": self["time"], "trans_att": self["trans_att"]} + params, param_covs = get_params_from_pval_single_ended( + ip, coords, p_val=p_val, p_var=p_var, p_cov=p_cov, fix_alpha=fix_alpha ) - tmpf = self.gamma / ( - (np.log(self.st / self.ast) + (self.c + self["talpha_fw_full"])) - + self.alpha + tmpf = params["gamma"] / ( + (np.log(self.st / self.ast) + (params["c"] + params["talpha_fw_full"])) + + params["alpha"] ) - self["tmpf"] = tmpf - 273.15 + out = xr.Dataset({"tmpf": tmpf - 273.15}) + out["tmpf"].attrs.update(_dim_attrs[("tmpf",)]) # tmpf_var deriv_dict = dict( - T_gamma_fw=tmpf / self.gamma, - T_st_fw=-(tmpf**2) / (self.gamma * self.st), - T_ast_fw=tmpf**2 / (self.gamma * self.ast), - T_c_fw=-(tmpf**2) / self.gamma, - T_dalpha_fw=-self.x * (tmpf**2) / self.gamma, - T_alpha_fw=-(tmpf**2) / self.gamma, - T_ta_fw=-(tmpf**2) / self.gamma, + T_gamma_fw=tmpf / params["gamma"], + T_st_fw=-(tmpf**2) / (params["gamma"] * self.st), + T_ast_fw=tmpf**2 / (params["gamma"] * self.ast), + T_c_fw=-(tmpf**2) / params["gamma"], + T_dalpha_fw=-self.x * (tmpf**2) / params["gamma"], + T_alpha_fw=-(tmpf**2) / params["gamma"], + T_ta_fw=-(tmpf**2) / params["gamma"], ) deriv_ds = xr.Dataset(deriv_dict) - # self["deriv"] = deriv_ds.to_array(dim="com2") - - sigma2_gamma_c = p_cov[np.ix_(ip.gamma, ip.c)] - sigma2_tafw_gamma = ip.get_taf_values( - pval=p_cov[ip.gamma], - x=self.x.values, - trans_att=self.trans_att.values, - axis="", - ) - sigma2_tafw_c = ip.get_taf_values( - pval=p_cov[ip.c], - x=self.x.values, - trans_att=self.trans_att.values, - axis="time", - ) + var_fw_dict = dict( dT_dst=deriv_ds.T_st_fw**2 * parse_st_var(self, st_var, st_label="st"), dT_dast=deriv_ds.T_ast_fw**2 * parse_st_var(self, ast_var, st_label="ast"), - dT_gamma=deriv_ds.T_gamma_fw**2 * self["gamma_var"], - dT_dc=deriv_ds.T_c_fw**2 * self["c_var"], + dT_gamma=deriv_ds.T_gamma_fw**2 * param_covs["gamma"], + dT_dc=deriv_ds.T_c_fw**2 * param_covs["c"], dT_ddalpha=deriv_ds.T_alpha_fw**2 - * self["alpha_var"], # same as dT_dalpha - dT_dta=deriv_ds.T_ta_fw**2 * self["talpha_fw_full_var"], - dgamma_dc=(2 * deriv_ds.T_gamma_fw * deriv_ds.T_c_fw * sigma2_gamma_c), - dta_dgamma=(2 * deriv_ds.T_ta_fw * deriv_ds.T_gamma_fw * sigma2_tafw_gamma), - dta_dc=(2 * deriv_ds.T_ta_fw * deriv_ds.T_c_fw * sigma2_tafw_c), + * param_covs["alpha"], # same as dT_dalpha + dT_dta=deriv_ds.T_ta_fw**2 * param_covs["talpha_fw_full"], + dgamma_dc=( + 2 * deriv_ds.T_gamma_fw * deriv_ds.T_c_fw * param_covs["gamma_c"] + ), + dta_dgamma=( + 2 * deriv_ds.T_ta_fw * deriv_ds.T_gamma_fw * param_covs["tafw_gamma"] + ), + dta_dc=(2 * deriv_ds.T_ta_fw * deriv_ds.T_c_fw * param_covs["tafw_c"]), ) if not fix_alpha: # These correlations don't exist in case of fix_alpha. Including them reduces tmpf_var. - sigma2_gamma_dalpha = p_cov[np.ix_(ip.dalpha, ip.gamma)] - sigma2_dalpha_c = p_cov[np.ix_(ip.dalpha, ip.c)] - sigma2_tafw_dalpha = ip.get_taf_values( - pval=p_cov[ip.dalpha], - x=self.x.values, - trans_att=self.trans_att.values, - axis="", - ) var_fw_dict.update( dict( dgamma_ddalpha=( 2 * deriv_ds.T_gamma_fw * deriv_ds.T_dalpha_fw - * sigma2_gamma_dalpha + * param_covs["gamma_dalpha"] ), ddalpha_dc=( - 2 * deriv_ds.T_dalpha_fw * deriv_ds.T_c_fw * sigma2_dalpha_c + 2 + * deriv_ds.T_dalpha_fw + * deriv_ds.T_c_fw + * param_covs["dalpha_c"] ), dta_ddalpha=( - 2 * deriv_ds.T_ta_fw * deriv_ds.T_dalpha_fw * sigma2_tafw_dalpha + 2 + * deriv_ds.T_ta_fw + * deriv_ds.T_dalpha_fw + * param_covs["tafw_dalpha"] ), ) ) - self["var_fw_da"] = xr.Dataset(var_fw_dict).to_array(dim="comp_fw") - self["tmpf_var"] = self["var_fw_da"].sum(dim="comp_fw") - - self["tmpf"].attrs.update(_dim_attrs[("tmpf",)]) - self["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) + out["var_fw_da"] = xr.Dataset(var_fw_dict).to_array(dim="comp_fw") + out["tmpf_var"] = out["var_fw_da"].sum(dim="comp_fw") + out["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) drop_vars = [ k for k, v in self.items() if {"params1", "params2"}.intersection(v.dims) @@ -1919,9 +1671,14 @@ def calibration_single_ended( for k in drop_vars: del self[k] - self["p_val"] = (("params1",), p_val) - self["p_cov"] = (("params1", "params2"), p_cov) + out["p_val"] = (("params1",), p_val) + out["p_cov"] = (("params1", "params2"), p_cov) + out.update(params) + for key, dataarray in param_covs.data_vars.items(): + out[key + "_var"] = dataarray + + self.update(out) pass def calibration_double_ended( @@ -2158,7 +1915,9 @@ def calibration_double_ended( nx = self.x.size nt = self["time"].size nta = self.trans_att.size - ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") + ix_sec = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) nx_sec = ix_sec.size assert self.st.dims[0] == "x", "Stokes are transposed" @@ -2204,6 +1963,7 @@ def calibration_double_ended( if method == "wls": p_cov, p_val, p_var = calibration_double_ended_helper( self, + sections, st_var, ast_var, rst_var, @@ -2238,215 +1998,126 @@ def calibration_double_ended( assert p_var.size == ip.npar assert p_cov.shape == (ip.npar, ip.npar) - # save estimates and variances to datastore, skip covariances - self["gamma"] = (tuple(), p_val[ip.gamma].item()) - self["alpha"] = (("x",), p_val[ip.alpha]) - self["df"] = (("time",), p_val[ip.df]) - self["db"] = (("time",), p_val[ip.db]) - - if nta: - self["talpha_fw"] = ( - ("time", "trans_att"), - p_val[ip.taf].reshape((nt, nta), order="C"), - ) - self["talpha_bw"] = ( - ("time", "trans_att"), - p_val[ip.tab].reshape((nt, nta), order="C"), - ) - self["talpha_fw_var"] = ( - ("time", "trans_att"), - p_var[ip.taf].reshape((nt, nta), order="C"), - ) - self["talpha_bw_var"] = ( - ("time", "trans_att"), - p_var[ip.tab].reshape((nt, nta), order="C"), - ) - else: - self["talpha_fw"] = (("time", "trans_att"), np.zeros((nt, 0))) - self["talpha_bw"] = (("time", "trans_att"), np.zeros((nt, 0))) - self["talpha_fw_var"] = (("time", "trans_att"), np.zeros((nt, 0))) - self["talpha_bw_var"] = (("time", "trans_att"), np.zeros((nt, 0))) - - self["talpha_fw_full"] = ( - ("x", "time"), - ip.get_taf_values( - pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" - ), - ) - self["talpha_bw_full"] = ( - ("x", "time"), - ip.get_tab_values( - pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" - ), - ) - - self["tmpf"] = ( - self.gamma - / ( - np.log(self.st / self.ast) - + self["df"] - + self.alpha - + self["talpha_fw_full"] - ) - - 273.15 - ) - - self["tmpb"] = ( - self.gamma - / ( - np.log(self.rst / self.rast) - + self["db"] - - self.alpha - + self["talpha_bw_full"] - ) - - 273.15 - ) - - self["gamma_var"] = (tuple(), p_var[ip.gamma].item()) - self["alpha_var"] = (("x",), p_var[ip.alpha]) - self["df_var"] = (("time",), p_var[ip.df]) - self["db_var"] = (("time",), p_var[ip.db]) - self["talpha_fw_full_var"] = ( - ("x", "time"), - ip.get_taf_values( - pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" - ), - ) - self["talpha_bw_full_var"] = ( - ("x", "time"), - ip.get_tab_values( - pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" - ), + coords = {"x": self["x"], "time": self["time"], "trans_att": self["trans_att"]} + params = get_params_from_pval_double_ended(ip, coords, p_val=p_val) + param_covs = get_params_from_pval_double_ended( + ip, coords, p_val=p_var, p_cov=p_cov ) - # extract covariances and ensure broadcastable to (nx, nt) - sigma2_gamma_df = p_cov[np.ix_(ip.gamma, ip.df)] - sigma2_gamma_db = p_cov[np.ix_(ip.gamma, ip.db)] - sigma2_gamma_alpha = p_cov[np.ix_(ip.alpha, ip.gamma)] - sigma2_df_db = p_cov[ip.df, ip.db][None] # Shape is [0, nt] ? - sigma2_alpha_df = p_cov[np.ix_(ip.alpha, ip.df)] - sigma2_alpha_db = p_cov[np.ix_(ip.alpha, ip.db)] - sigma2_tafw_gamma = ip.get_taf_values( - pval=p_cov[ip.gamma], - x=self.x.values, - trans_att=self.trans_att.values, - axis="", - ) - sigma2_tabw_gamma = ip.get_tab_values( - pval=p_cov[ip.gamma], - x=self.x.values, - trans_att=self.trans_att.values, - axis="", - ) - sigma2_tafw_alpha = ip.get_taf_values( - pval=p_cov[ip.alpha], - x=self.x.values, - trans_att=self.trans_att.values, - axis="x", - ) - sigma2_tabw_alpha = ip.get_tab_values( - pval=p_cov[ip.alpha], - x=self.x.values, - trans_att=self.trans_att.values, - axis="x", - ) - sigma2_tafw_df = ip.get_taf_values( - pval=p_cov[ip.df], - x=self.x.values, - trans_att=self.trans_att.values, - axis="time", - ) - sigma2_tafw_db = ip.get_taf_values( - pval=p_cov[ip.db], - x=self.x.values, - trans_att=self.trans_att.values, - axis="time", - ) - sigma2_tabw_db = ip.get_tab_values( - pval=p_cov[ip.db], - x=self.x.values, - trans_att=self.trans_att.values, - axis="time", - ) - sigma2_tabw_df = ip.get_tab_values( - pval=p_cov[ip.df], - x=self.x.values, - trans_att=self.trans_att.values, - axis="time", + out = xr.Dataset( + { + "tmpf": params["gamma"] + / ( + np.log(self.st / self.ast) + + params["df"] + + params["alpha"] + + params["talpha_fw_full"] + ) + - 273.15, + "tmpb": params["gamma"] + / ( + np.log(self.rst / self.rast) + + params["db"] + - params["alpha"] + + params["talpha_bw_full"] + ) + - 273.15, + } ) - # sigma2_tafw_tabw - tmpf = self["tmpf"] + 273.15 - tmpb = self["tmpb"] + 273.15 + tmpf = out["tmpf"] + 273.15 + tmpb = out["tmpb"] + 273.15 deriv_dict = dict( - T_gamma_fw=tmpf / self.gamma, - T_st_fw=-(tmpf**2) / (self.gamma * self.st), - T_ast_fw=tmpf**2 / (self.gamma * self.ast), - T_df_fw=-(tmpf**2) / self.gamma, - T_alpha_fw=-(tmpf**2) / self.gamma, - T_ta_fw=-(tmpf**2) / self.gamma, - T_gamma_bw=tmpb / self.gamma, - T_rst_bw=-(tmpb**2) / (self.gamma * self.rst), - T_rast_bw=tmpb**2 / (self.gamma * self.rast), - T_db_bw=-(tmpb**2) / self.gamma, - T_alpha_bw=tmpb**2 / self.gamma, - T_ta_bw=-(tmpb**2) / self.gamma, + T_gamma_fw=tmpf / params["gamma"], + T_st_fw=-(tmpf**2) / (params["gamma"] * self.st), + T_ast_fw=tmpf**2 / (params["gamma"] * self.ast), + T_df_fw=-(tmpf**2) / params["gamma"], + T_alpha_fw=-(tmpf**2) / params["gamma"], + T_ta_fw=-(tmpf**2) / params["gamma"], + T_gamma_bw=tmpb / params["gamma"], + T_rst_bw=-(tmpb**2) / (params["gamma"] * self.rst), + T_rast_bw=tmpb**2 / (params["gamma"] * self.rast), + T_db_bw=-(tmpb**2) / params["gamma"], + T_alpha_bw=tmpb**2 / params["gamma"], + T_ta_bw=-(tmpb**2) / params["gamma"], ) deriv_ds = xr.Dataset(deriv_dict) - self["deriv"] = deriv_ds.to_array(dim="com2") + out["deriv"] = deriv_ds.to_array(dim="com2") var_fw_dict = dict( dT_dst=deriv_ds.T_st_fw**2 * parse_st_var(self, st_var, st_label="st"), dT_dast=deriv_ds.T_ast_fw**2 * parse_st_var(self, ast_var, st_label="ast"), - dT_gamma=deriv_ds.T_gamma_fw**2 * self["gamma_var"], - dT_ddf=deriv_ds.T_df_fw**2 * self["df_var"], - dT_dalpha=deriv_ds.T_alpha_fw**2 * self["alpha_var"], - dT_dta=deriv_ds.T_ta_fw**2 * self["talpha_fw_full_var"], - dgamma_ddf=(2 * deriv_ds.T_gamma_fw * deriv_ds.T_df_fw * sigma2_gamma_df), + dT_gamma=deriv_ds.T_gamma_fw**2 * param_covs["gamma"], + dT_ddf=deriv_ds.T_df_fw**2 * param_covs["df"], + dT_dalpha=deriv_ds.T_alpha_fw**2 * param_covs["alpha"], + dT_dta=deriv_ds.T_ta_fw**2 * param_covs["talpha_fw_full"], + dgamma_ddf=( + 2 * deriv_ds.T_gamma_fw * deriv_ds.T_df_fw * param_covs["gamma_df"] + ), dgamma_dalpha=( - 2 * deriv_ds.T_gamma_fw * deriv_ds.T_alpha_fw * sigma2_gamma_alpha + 2 + * deriv_ds.T_gamma_fw + * deriv_ds.T_alpha_fw + * param_covs["gamma_alpha"] + ), + dalpha_ddf=( + 2 * deriv_ds.T_alpha_fw * deriv_ds.T_df_fw * param_covs["alpha_df"] + ), + dta_dgamma=( + 2 * deriv_ds.T_ta_fw * deriv_ds.T_gamma_fw * param_covs["tafw_gamma"] + ), + dta_ddf=(2 * deriv_ds.T_ta_fw * deriv_ds.T_df_fw * param_covs["tafw_df"]), + dta_dalpha=( + 2 * deriv_ds.T_ta_fw * deriv_ds.T_alpha_fw * param_covs["tafw_alpha"] ), - dalpha_ddf=(2 * deriv_ds.T_alpha_fw * deriv_ds.T_df_fw * sigma2_alpha_df), - dta_dgamma=(2 * deriv_ds.T_ta_fw * deriv_ds.T_gamma_fw * sigma2_tafw_gamma), - dta_ddf=(2 * deriv_ds.T_ta_fw * deriv_ds.T_df_fw * sigma2_tafw_df), - dta_dalpha=(2 * deriv_ds.T_ta_fw * deriv_ds.T_alpha_fw * sigma2_tafw_alpha), ) var_bw_dict = dict( dT_drst=deriv_ds.T_rst_bw**2 * parse_st_var(self, rst_var, st_label="rst"), dT_drast=deriv_ds.T_rast_bw**2 * parse_st_var(self, rast_var, st_label="rast"), - dT_gamma=deriv_ds.T_gamma_bw**2 * self["gamma_var"], - dT_ddb=deriv_ds.T_db_bw**2 * self["db_var"], - dT_dalpha=deriv_ds.T_alpha_bw**2 * self["alpha_var"], - dT_dta=deriv_ds.T_ta_bw**2 * self["talpha_bw_full_var"], - dgamma_ddb=(2 * deriv_ds.T_gamma_bw * deriv_ds.T_db_bw * sigma2_gamma_db), + dT_gamma=deriv_ds.T_gamma_bw**2 * param_covs["gamma"], + dT_ddb=deriv_ds.T_db_bw**2 * param_covs["db"], + dT_dalpha=deriv_ds.T_alpha_bw**2 * param_covs["alpha"], + dT_dta=deriv_ds.T_ta_bw**2 * param_covs["talpha_bw_full"], + dgamma_ddb=( + 2 * deriv_ds.T_gamma_bw * deriv_ds.T_db_bw * param_covs["gamma_db"] + ), dgamma_dalpha=( - 2 * deriv_ds.T_gamma_bw * deriv_ds.T_alpha_bw * sigma2_gamma_alpha + 2 + * deriv_ds.T_gamma_bw + * deriv_ds.T_alpha_bw + * param_covs["gamma_alpha"] + ), + dalpha_ddb=( + 2 * deriv_ds.T_alpha_bw * deriv_ds.T_db_bw * param_covs["alpha_db"] + ), + dta_dgamma=( + 2 * deriv_ds.T_ta_bw * deriv_ds.T_gamma_bw * param_covs["tabw_gamma"] + ), + dta_ddb=(2 * deriv_ds.T_ta_bw * deriv_ds.T_db_bw * param_covs["tabw_db"]), + dta_dalpha=( + 2 * deriv_ds.T_ta_bw * deriv_ds.T_alpha_bw * param_covs["tabw_alpha"] ), - dalpha_ddb=(2 * deriv_ds.T_alpha_bw * deriv_ds.T_db_bw * sigma2_alpha_db), - dta_dgamma=(2 * deriv_ds.T_ta_bw * deriv_ds.T_gamma_bw * sigma2_tabw_gamma), - dta_ddb=(2 * deriv_ds.T_ta_bw * deriv_ds.T_db_bw * sigma2_tabw_db), - dta_dalpha=(2 * deriv_ds.T_ta_bw * deriv_ds.T_alpha_bw * sigma2_tabw_alpha), ) - self["var_fw_da"] = xr.Dataset(var_fw_dict).to_array(dim="comp_fw") - self["var_bw_da"] = xr.Dataset(var_bw_dict).to_array(dim="comp_bw") + out["var_fw_da"] = xr.Dataset(var_fw_dict).to_array(dim="comp_fw") + out["var_bw_da"] = xr.Dataset(var_bw_dict).to_array(dim="comp_bw") - self["tmpf_var"] = self["var_fw_da"].sum(dim="comp_fw") - self["tmpb_var"] = self["var_bw_da"].sum(dim="comp_bw") + out["tmpf_var"] = out["var_fw_da"].sum(dim="comp_fw") + out["tmpb_var"] = out["var_bw_da"].sum(dim="comp_bw") # First estimate of tmpw_var - self["tmpw_var" + "_approx"] = 1 / (1 / self["tmpf_var"] + 1 / self["tmpb_var"]) - self["tmpw"] = ( - (tmpf / self["tmpf_var"] + tmpb / self["tmpb_var"]) - * self["tmpw_var" + "_approx"] + out["tmpw_var" + "_approx"] = 1 / (1 / out["tmpf_var"] + 1 / out["tmpb_var"]) + out["tmpw"] = ( + (tmpf / out["tmpf_var"] + tmpb / out["tmpb_var"]) + * out["tmpw_var" + "_approx"] ) - 273.15 - weightsf = self["tmpw_var" + "_approx"] / self["tmpf_var"] - weightsb = self["tmpw_var" + "_approx"] / self["tmpb_var"] + weightsf = out["tmpw_var" + "_approx"] / out["tmpf_var"] + weightsb = out["tmpw_var" + "_approx"] / out["tmpb_var"] deriv_dict2 = dict( T_gamma_w=weightsf * deriv_dict["T_gamma_fw"] @@ -2473,61 +2144,84 @@ def calibration_double_ended( * parse_st_var(self, rst_var, st_label="rst"), dT_drast=deriv_ds2.T_rast_w**2 * parse_st_var(self, rast_var, st_label="rast"), - dT_gamma=deriv_ds2.T_gamma_w**2 * self["gamma_var"], - dT_ddf=deriv_ds2.T_df_w**2 * self["df_var"], - dT_ddb=deriv_ds2.T_db_w**2 * self["db_var"], - dT_dalpha=deriv_ds2.T_alpha_w**2 * self["alpha_var"], - dT_dtaf=deriv_ds2.T_taf_w**2 * self["talpha_fw_full_var"], - dT_dtab=deriv_ds2.T_tab_w**2 * self["talpha_bw_full_var"], - dgamma_ddf=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_df_w * sigma2_gamma_df, - dgamma_ddb=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_db_w * sigma2_gamma_db, + dT_gamma=deriv_ds2.T_gamma_w**2 * param_covs["gamma"], + dT_ddf=deriv_ds2.T_df_w**2 * param_covs["df"], + dT_ddb=deriv_ds2.T_db_w**2 * param_covs["db"], + dT_dalpha=deriv_ds2.T_alpha_w**2 * param_covs["alpha"], + dT_dtaf=deriv_ds2.T_taf_w**2 * param_covs["talpha_fw_full"], + dT_dtab=deriv_ds2.T_tab_w**2 * param_covs["talpha_bw_full"], + dgamma_ddf=2 + * deriv_ds2.T_gamma_w + * deriv_ds2.T_df_w + * param_covs["gamma_df"], + dgamma_ddb=2 + * deriv_ds2.T_gamma_w + * deriv_ds2.T_db_w + * param_covs["gamma_db"], dgamma_dalpha=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_alpha_w - * sigma2_gamma_alpha, - dgamma_dtaf=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_taf_w * sigma2_tafw_gamma, - dgamma_dtab=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_tab_w * sigma2_tabw_gamma, - ddf_ddb=2 * deriv_ds2.T_df_w * deriv_ds2.T_db_w * sigma2_df_db, - ddf_dalpha=2 * deriv_ds2.T_df_w * deriv_ds2.T_alpha_w * sigma2_alpha_df, - ddf_dtaf=2 * deriv_ds2.T_df_w * deriv_ds2.T_taf_w * sigma2_tafw_df, - ddf_dtab=2 * deriv_ds2.T_df_w * deriv_ds2.T_tab_w * sigma2_tabw_df, - ddb_dalpha=2 * deriv_ds2.T_db_w * deriv_ds2.T_alpha_w * sigma2_alpha_db, - ddb_dtaf=2 * deriv_ds2.T_db_w * deriv_ds2.T_taf_w * sigma2_tafw_db, - ddb_dtab=2 * deriv_ds2.T_db_w * deriv_ds2.T_tab_w * sigma2_tabw_db, - # dtaf_dtab=2 * deriv_ds2.T_tab_w * deriv_ds2.T_tab_w * sigma2_tafw_tabw, + * param_covs["gamma_alpha"], + dgamma_dtaf=2 + * deriv_ds2.T_gamma_w + * deriv_ds2.T_taf_w + * param_covs["tafw_gamma"], + dgamma_dtab=2 + * deriv_ds2.T_gamma_w + * deriv_ds2.T_tab_w + * param_covs["tabw_gamma"], + ddf_ddb=2 * deriv_ds2.T_df_w * deriv_ds2.T_db_w * param_covs["df_db"], + ddf_dalpha=2 + * deriv_ds2.T_df_w + * deriv_ds2.T_alpha_w + * param_covs["alpha_df"], + ddf_dtaf=2 * deriv_ds2.T_df_w * deriv_ds2.T_taf_w * param_covs["tafw_df"], + ddf_dtab=2 * deriv_ds2.T_df_w * deriv_ds2.T_tab_w * param_covs["tabw_df"], + ddb_dalpha=2 + * deriv_ds2.T_db_w + * deriv_ds2.T_alpha_w + * param_covs["alpha_db"], + ddb_dtaf=2 * deriv_ds2.T_db_w * deriv_ds2.T_taf_w * param_covs["tafw_db"], + ddb_dtab=2 * deriv_ds2.T_db_w * deriv_ds2.T_tab_w * param_covs["tabw_db"], + # dtaf_dtab=2 * deriv_ds2.T_tab_w * deriv_ds2.T_tab_w * param_covs["tafw_tabw"], ) - self["var_w_da"] = xr.Dataset(var_w_dict).to_array(dim="comp_w") - self["tmpw_var"] = self["var_w_da"].sum(dim="comp_w") + out["var_w_da"] = xr.Dataset(var_w_dict).to_array(dim="comp_w") + out["tmpw_var"] = out["var_w_da"].sum(dim="comp_w") + # Compute uncertainty solely due to noise in Stokes signal, neglecting parameter uncenrtainty tmpf_var_excl_par = ( - self["var_fw_da"].sel(comp_fw=["dT_dst", "dT_dast"]).sum(dim="comp_fw") + out["var_fw_da"].sel(comp_fw=["dT_dst", "dT_dast"]).sum(dim="comp_fw") ) tmpb_var_excl_par = ( - self["var_bw_da"].sel(comp_bw=["dT_drst", "dT_drast"]).sum(dim="comp_bw") - ) - self["tmpw_var" + "_lower"] = 1 / ( - 1 / tmpf_var_excl_par + 1 / tmpb_var_excl_par + out["var_bw_da"].sel(comp_bw=["dT_drst", "dT_drast"]).sum(dim="comp_bw") ) + out["tmpw_var" + "_lower"] = 1 / (1 / tmpf_var_excl_par + 1 / tmpb_var_excl_par) - self["tmpf"].attrs.update(_dim_attrs[("tmpf",)]) - self["tmpb"].attrs.update(_dim_attrs[("tmpb",)]) - self["tmpw"].attrs.update(_dim_attrs[("tmpw",)]) - self["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) - self["tmpb_var"].attrs.update(_dim_attrs[("tmpb_var",)]) - self["tmpw_var"].attrs.update(_dim_attrs[("tmpw_var",)]) - self["tmpw_var" + "_approx"].attrs.update(_dim_attrs[("tmpw_var_approx",)]) - self["tmpw_var" + "_lower"].attrs.update(_dim_attrs[("tmpw_var_lower",)]) + out["tmpf"].attrs.update(_dim_attrs[("tmpf",)]) + out["tmpb"].attrs.update(_dim_attrs[("tmpb",)]) + out["tmpw"].attrs.update(_dim_attrs[("tmpw",)]) + out["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) + out["tmpb_var"].attrs.update(_dim_attrs[("tmpb_var",)]) + out["tmpw_var"].attrs.update(_dim_attrs[("tmpw_var",)]) + out["tmpw_var" + "_approx"].attrs.update(_dim_attrs[("tmpw_var_approx",)]) + out["tmpw_var" + "_lower"].attrs.update(_dim_attrs[("tmpw_var_lower",)]) drop_vars = [ k for k, v in self.items() if {"params1", "params2"}.intersection(v.dims) ] for k in drop_vars: + print(f"removing {k}") del self[k] - self["p_val"] = (("params1",), p_val) - self["p_cov"] = (("params1", "params2"), p_cov) + out["p_val"] = (("params1",), p_val) + out["p_cov"] = (("params1", "params2"), p_cov) + + out.update(params) + for key, dataarray in param_covs.data_vars.items(): + out[key + "_var"] = dataarray + self.update(out) pass def average_single_ended( @@ -2679,7 +2373,9 @@ def average_single_ended( if var_only_sections is not None: raise NotImplementedError() - self.conf_int_single_ended( + out = xr.Dataset() + + mcparams = self.conf_int_single_ended( p_val=p_val, p_cov=p_cov, st_var=st_var, @@ -2691,85 +2387,86 @@ def average_single_ended( reduce_memory_usage=reduce_memory_usage, **kwargs, ) + mcparams["tmpf"] = self["tmpf"] if ci_avg_time_sel is not None: time_dim2 = "time" + "_avg" x_dim2 = "x" - self.coords[time_dim2] = ( + mcparams.coords[time_dim2] = ( (time_dim2,), - self["time"].sel(**{"time": ci_avg_time_sel}).data, + mcparams["time"].sel(**{"time": ci_avg_time_sel}).data, ) - self["tmpf_avgsec"] = ( + mcparams["tmpf_avgsec"] = ( ("x", time_dim2), - self["tmpf"].sel(**{"time": ci_avg_time_sel}).data, + mcparams["tmpf"].sel(**{"time": ci_avg_time_sel}).data, ) - self["tmpf_mc_set"] = ( + mcparams["tmpf_mc_set"] = ( ("mc", "x", time_dim2), - self["tmpf" + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, + mcparams["tmpf" + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, ) elif ci_avg_time_isel is not None: time_dim2 = "time" + "_avg" x_dim2 = "x" - self.coords[time_dim2] = ( + mcparams.coords[time_dim2] = ( (time_dim2,), - self["time"].isel(**{"time": ci_avg_time_isel}).data, + mcparams["time"].isel(**{"time": ci_avg_time_isel}).data, ) - self["tmpf_avgsec"] = ( + mcparams["tmpf_avgsec"] = ( ("x", time_dim2), - self["tmpf"].isel(**{"time": ci_avg_time_isel}).data, + mcparams["tmpf"].isel(**{"time": ci_avg_time_isel}).data, ) - self["tmpf_mc_set"] = ( + mcparams["tmpf_mc_set"] = ( ("mc", "x", time_dim2), - self["tmpf" + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, + mcparams["tmpf" + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, ) elif ci_avg_x_sel is not None: time_dim2 = "time" x_dim2 = "x_avg" - self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) - self["tmpf_avgsec"] = ( + mcparams.coords[x_dim2] = ((x_dim2,), mcparams.x.sel(x=ci_avg_x_sel).data) + mcparams["tmpf_avgsec"] = ( (x_dim2, "time"), - self["tmpf"].sel(x=ci_avg_x_sel).data, + mcparams["tmpf"].sel(x=ci_avg_x_sel).data, ) - self["tmpf_mc_set"] = ( + mcparams["tmpf_mc_set"] = ( ("mc", x_dim2, "time"), - self["tmpf_mc_set"].sel(x=ci_avg_x_sel).data, + mcparams["tmpf_mc_set"].sel(x=ci_avg_x_sel).data, ) elif ci_avg_x_isel is not None: time_dim2 = "time" x_dim2 = "x_avg" - self.coords[x_dim2] = ((x_dim2,), self.x.isel(x=ci_avg_x_isel).data) - self["tmpf_avgsec"] = ( + mcparams.coords[x_dim2] = ((x_dim2,), mcparams.x.isel(x=ci_avg_x_isel).data) + mcparams["tmpf_avgsec"] = ( (x_dim2, time_dim2), - self["tmpf"].isel(x=ci_avg_x_isel).data, + mcparams["tmpf"].isel(x=ci_avg_x_isel).data, ) - self["tmpf_mc_set"] = ( + mcparams["tmpf_mc_set"] = ( ("mc", x_dim2, time_dim2), - self["tmpf_mc_set"].isel(x=ci_avg_x_isel).data, + mcparams["tmpf_mc_set"].isel(x=ci_avg_x_isel).data, ) else: - self["tmpf_avgsec"] = self["tmpf"] + mcparams["tmpf_avgsec"] = mcparams["tmpf"] x_dim2 = "x" time_dim2 = "time" # subtract the mean temperature - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] - self["tmpf_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] + out["tmpf_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) if ci_avg_x_flag1: # unweighted mean - self["tmpf_avgx1"] = self["tmpf" + "_avgsec"].mean(dim=x_dim2) + out["tmpf_avgx1"] = mcparams["tmpf" + "_avgsec"].mean(dim=x_dim2) - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] qvar = q.var(dim=["mc", x_dim2], ddof=1) - self["tmpf_mc_avgx1_var"] = qvar + out["tmpf_mc_avgx1_var"] = qvar if conf_ints: - new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[2]) - avg_axis = self["tmpf_mc_set"].get_axis_num(["mc", x_dim2]) - q = self["tmpf_mc_set"].data.map_blocks( + new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[2]) + avg_axis = mcparams["tmpf_mc_set"].get_axis_num(["mc", x_dim2]) + q = mcparams["tmpf_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, @@ -2777,28 +2474,28 @@ def average_single_ended( new_axis=0, ) # The new CI dim is added as firsaxis - self["tmpf_mc_avgx1"] = (("CI", time_dim2), q) + out["tmpf_mc_avgx1"] = (("CI", time_dim2), q) if ci_avg_x_flag2: - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] qvar = q.var(dim=["mc"], ddof=1) # Inverse-variance weighting avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - self["tmpf_mc_avgx2_var"] = avg_x_var + out["tmpf_mc_avgx2_var"] = avg_x_var - self["tmpf" + "_mc_avgx2_set"] = (self["tmpf_mc_set"] / qvar).sum( + mcparams["tmpf" + "_mc_avgx2_set"] = (mcparams["tmpf_mc_set"] / qvar).sum( dim=x_dim2 ) * avg_x_var - self["tmpf" + "_avgx2"] = self["tmpf" + "_mc_avgx2_set"].mean(dim="mc") + out["tmpf" + "_avgx2"] = mcparams["tmpf" + "_mc_avgx2_set"].mean(dim="mc") if conf_ints: - new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[2]) - avg_axis_avgx = self["tmpf_mc_set"].get_axis_num("mc") + new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[2]) + avg_axis_avgx = mcparams["tmpf_mc_set"].get_axis_num("mc") - qq = self["tmpf_mc_avgx2_set"].data.map_blocks( + qq = mcparams["tmpf_mc_avgx2_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), chunks=new_chunks, # drop_axis=avg_axis_avgx, @@ -2807,20 +2504,20 @@ def average_single_ended( dtype=float, ) # The new CI dimension is added as # firsaxis - self["tmpf_mc_avgx2"] = (("CI", time_dim2), qq) + out["tmpf_mc_avgx2"] = (("CI", time_dim2), qq) if ci_avg_time_flag1 is not None: # unweighted mean - self["tmpf_avg1"] = self["tmpf" + "_avgsec"].mean(dim=time_dim2) + out["tmpf_avg1"] = mcparams["tmpf_avgsec"].mean(dim=time_dim2) - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] qvar = q.var(dim=["mc", time_dim2], ddof=1) - self["tmpf_mc_avg1_var"] = qvar + out["tmpf_mc_avg1_var"] = qvar if conf_ints: - new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[1]) - avg_axis = self["tmpf_mc_set"].get_axis_num(["mc", time_dim2]) - q = self["tmpf_mc_set"].data.map_blocks( + new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[1]) + avg_axis = mcparams["tmpf_mc_set"].get_axis_num(["mc", time_dim2]) + q = mcparams["tmpf_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, @@ -2828,28 +2525,28 @@ def average_single_ended( new_axis=0, ) # The new CI dim is added as firsaxis - self["tmpf_mc_avg1"] = (("CI", x_dim2), q) + out["tmpf_mc_avg1"] = (("CI", x_dim2), q) if ci_avg_time_flag2: - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + q = mcparams["tmpf_mc_set"] - mcparams["tmpf_avgsec"] qvar = q.var(dim=["mc"], ddof=1) # Inverse-variance weighting avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - self["tmpf_mc_avg2_var"] = avg_time_var + out["tmpf_mc_avg2_var"] = avg_time_var - self["tmpf" + "_mc_avg2_set"] = (self["tmpf_mc_set"] / qvar).sum( + mcparams["tmpf" + "_mc_avg2_set"] = (mcparams["tmpf_mc_set"] / qvar).sum( dim=time_dim2 ) * avg_time_var - self["tmpf_avg2"] = self["tmpf" + "_mc_avg2_set"].mean(dim="mc") + out["tmpf_avg2"] = mcparams["tmpf" + "_mc_avg2_set"].mean(dim="mc") if conf_ints: - new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[1]) - avg_axis_avg2 = self["tmpf_mc_set"].get_axis_num("mc") + new_chunks = (len(conf_ints), mcparams["tmpf_mc_set"].chunks[1]) + avg_axis_avg2 = mcparams["tmpf_mc_set"].get_axis_num("mc") - qq = self["tmpf_mc_avg2_set"].data.map_blocks( + qq = mcparams["tmpf_mc_avg2_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), chunks=new_chunks, # drop_axis=avg_axis_avg2, @@ -2858,7 +2555,8 @@ def average_single_ended( dtype=float, ) # The new CI dimension is added as # firsaxis - self["tmpf_mc_avg2"] = (("CI", x_dim2), qq) + out["tmpf_mc_avg2"] = (("CI", x_dim2), qq) + # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: remove_mc_set = [ @@ -2879,12 +2577,15 @@ def average_single_ended( remove_mc_set.append("tmpf_mc_avgsec_var") for k in remove_mc_set: - if k in self: - del self[k] + if k in out: + del out[k] + + self.update(out) pass def average_double_ended( self, + sections=None, p_val="p_val", p_cov="p_cov", st_var=None, @@ -3028,9 +2729,7 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): if direction == "fw": arr = da.concatenate( ( - da.zeros( - (1, i_splice, 1), chunks=((1, i_splice, 1)), dtype=bool - ), + da.zeros((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), da.ones( (1, no - i_splice, 1), chunks=(1, no - i_splice, 1), @@ -3045,7 +2744,7 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), da.zeros( (1, no - i_splice, 1), - chunks=((1, no - i_splice, 1)), + chunks=(1, no - i_splice, 1), dtype=bool, ), ), @@ -3070,7 +2769,10 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): else: pass - self.conf_int_double_ended( + out = xr.Dataset() + + mcparams = self.conf_int_double_ended( + sections=sections, p_val=p_val, p_cov=p_cov, st_var=st_var, @@ -3089,83 +2791,89 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): if ci_avg_time_sel is not None: time_dim2 = "time" + "_avg" x_dim2 = "x" - self.coords[time_dim2] = ( + mcparams.coords[time_dim2] = ( (time_dim2,), - self["time"].sel(**{"time": ci_avg_time_sel}).data, + mcparams["time"].sel(**{"time": ci_avg_time_sel}).data, ) - self[label + "_avgsec"] = ( + mcparams[label + "_avgsec"] = ( ("x", time_dim2), self[label].sel(**{"time": ci_avg_time_sel}).data, ) - self[label + "_mc_set"] = ( + mcparams[label + "_mc_set"] = ( ("mc", "x", time_dim2), - self[label + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, + mcparams[label + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, ) elif ci_avg_time_isel is not None: time_dim2 = "time" + "_avg" x_dim2 = "x" - self.coords[time_dim2] = ( + mcparams.coords[time_dim2] = ( (time_dim2,), - self["time"].isel(**{"time": ci_avg_time_isel}).data, + mcparams["time"].isel(**{"time": ci_avg_time_isel}).data, ) - self[label + "_avgsec"] = ( + mcparams[label + "_avgsec"] = ( ("x", time_dim2), self[label].isel(**{"time": ci_avg_time_isel}).data, ) - self[label + "_mc_set"] = ( + mcparams[label + "_mc_set"] = ( ("mc", "x", time_dim2), - self[label + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, + mcparams[label + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, ) elif ci_avg_x_sel is not None: time_dim2 = "time" x_dim2 = "x_avg" - self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) - self[label + "_avgsec"] = ( + mcparams.coords[x_dim2] = ( + (x_dim2,), + mcparams.x.sel(x=ci_avg_x_sel).data, + ) + mcparams[label + "_avgsec"] = ( (x_dim2, "time"), self[label].sel(x=ci_avg_x_sel).data, ) - self[label + "_mc_set"] = ( + mcparams[label + "_mc_set"] = ( ("mc", x_dim2, "time"), - self[label + "_mc_set"].sel(x=ci_avg_x_sel).data, + mcparams[label + "_mc_set"].sel(x=ci_avg_x_sel).data, ) elif ci_avg_x_isel is not None: time_dim2 = "time" x_dim2 = "x_avg" - self.coords[x_dim2] = ((x_dim2,), self.x.isel(x=ci_avg_x_isel).data) - self[label + "_avgsec"] = ( + mcparams.coords[x_dim2] = ( + (x_dim2,), + mcparams.x.isel(x=ci_avg_x_isel).data, + ) + mcparams[label + "_avgsec"] = ( (x_dim2, time_dim2), self[label].isel(x=ci_avg_x_isel).data, ) - self[label + "_mc_set"] = ( + mcparams[label + "_mc_set"] = ( ("mc", x_dim2, time_dim2), - self[label + "_mc_set"].isel(x=ci_avg_x_isel).data, + mcparams[label + "_mc_set"].isel(x=ci_avg_x_isel).data, ) else: - self[label + "_avgsec"] = self[label] + mcparams[label + "_avgsec"] = self[label] x_dim2 = "x" time_dim2 = "time" - memchunk = self[label + "_mc_set"].chunks + memchunk = mcparams[label + "_mc_set"].chunks # subtract the mean temperature - q = self[label + "_mc_set"] - self[label + "_avgsec"] - self[label + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] + out[label + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) if ci_avg_x_flag1: # unweighted mean - self[label + "_avgx1"] = self[label + "_avgsec"].mean(dim=x_dim2) + out[label + "_avgx1"] = mcparams[label + "_avgsec"].mean(dim=x_dim2) - q = self[label + "_mc_set"] - self[label + "_avgsec"] + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] qvar = q.var(dim=["mc", x_dim2], ddof=1) - self[label + "_mc_avgx1_var"] = qvar + out[label + "_mc_avgx1_var"] = qvar if conf_ints: - new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[2]) - avg_axis = self[label + "_mc_set"].get_axis_num(["mc", x_dim2]) - q = self[label + "_mc_set"].data.map_blocks( + new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[2]) + avg_axis = mcparams[label + "_mc_set"].get_axis_num(["mc", x_dim2]) + q = mcparams[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, @@ -3173,28 +2881,28 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): new_axis=0, ) # The new CI dim is added as firsaxis - self[label + "_mc_avgx1"] = (("CI", time_dim2), q) + out[label + "_mc_avgx1"] = (("CI", time_dim2), q) if ci_avg_x_flag2: - q = self[label + "_mc_set"] - self[label + "_avgsec"] + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] qvar = q.var(dim=["mc"], ddof=1) # Inverse-variance weighting avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - self[label + "_mc_avgx2_var"] = avg_x_var + out[label + "_mc_avgx2_var"] = avg_x_var - self[label + "_mc_avgx2_set"] = (self[label + "_mc_set"] / qvar).sum( - dim=x_dim2 - ) * avg_x_var - self[label + "_avgx2"] = self[label + "_mc_avgx2_set"].mean(dim="mc") + mcparams[label + "_mc_avgx2_set"] = ( + mcparams[label + "_mc_set"] / qvar + ).sum(dim=x_dim2) * avg_x_var + out[label + "_avgx2"] = mcparams[label + "_mc_avgx2_set"].mean(dim="mc") if conf_ints: - new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[2]) - avg_axis_avgx = self[label + "_mc_set"].get_axis_num("mc") + new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[2]) + avg_axis_avgx = mcparams[label + "_mc_set"].get_axis_num("mc") - qq = self[label + "_mc_avgx2_set"].data.map_blocks( + qq = mcparams[label + "_mc_avgx2_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), chunks=new_chunks, # drop_axis=avg_axis_avgx, @@ -3203,20 +2911,22 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): dtype=float, ) # The new CI dimension is added as # firsaxis - self[label + "_mc_avgx2"] = (("CI", time_dim2), qq) + out[label + "_mc_avgx2"] = (("CI", time_dim2), qq) if ci_avg_time_flag1 is not None: # unweighted mean - self[label + "_avg1"] = self[label + "_avgsec"].mean(dim=time_dim2) + out[label + "_avg1"] = mcparams[label + "_avgsec"].mean(dim=time_dim2) - q = self[label + "_mc_set"] - self[label + "_avgsec"] + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] qvar = q.var(dim=["mc", time_dim2], ddof=1) - self[label + "_mc_avg1_var"] = qvar + out[label + "_mc_avg1_var"] = qvar if conf_ints: - new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[1]) - avg_axis = self[label + "_mc_set"].get_axis_num(["mc", time_dim2]) - q = self[label + "_mc_set"].data.map_blocks( + new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[1]) + avg_axis = mcparams[label + "_mc_set"].get_axis_num( + ["mc", time_dim2] + ) + q = mcparams[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, @@ -3224,28 +2934,28 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): new_axis=0, ) # The new CI dim is added as firsaxis - self[label + "_mc_avg1"] = (("CI", x_dim2), q) + out[label + "_mc_avg1"] = (("CI", x_dim2), q) if ci_avg_time_flag2: - q = self[label + "_mc_set"] - self[label + "_avgsec"] + q = mcparams[label + "_mc_set"] - mcparams[label + "_avgsec"] qvar = q.var(dim=["mc"], ddof=1) # Inverse-variance weighting avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - self[label + "_mc_avg2_var"] = avg_time_var + out[label + "_mc_avg2_var"] = avg_time_var - self[label + "_mc_avg2_set"] = (self[label + "_mc_set"] / qvar).sum( - dim=time_dim2 - ) * avg_time_var - self[label + "_avg2"] = self[label + "_mc_avg2_set"].mean(dim="mc") + mcparams[label + "_mc_avg2_set"] = ( + mcparams[label + "_mc_set"] / qvar + ).sum(dim=time_dim2) * avg_time_var + out[label + "_avg2"] = mcparams[label + "_mc_avg2_set"].mean(dim="mc") if conf_ints: - new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[1]) - avg_axis_avg2 = self[label + "_mc_set"].get_axis_num("mc") + new_chunks = (len(conf_ints), mcparams[label + "_mc_set"].chunks[1]) + avg_axis_avg2 = mcparams[label + "_mc_set"].get_axis_num("mc") - qq = self[label + "_mc_avg2_set"].data.map_blocks( + qq = mcparams[label + "_mc_avg2_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), chunks=new_chunks, # drop_axis=avg_axis_avg2, @@ -3254,40 +2964,40 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): dtype=float, ) # The new CI dimension is added as # firsaxis - self[label + "_mc_avg2"] = (("CI", x_dim2), qq) + out[label + "_mc_avg2"] = (("CI", x_dim2), qq) # Weighted mean of the forward and backward tmpw_var = 1 / ( - 1 / self["tmpf_mc" + "_avgsec_var"] + 1 / self["tmpb_mc" + "_avgsec_var"] + 1 / out["tmpf_mc" + "_avgsec_var"] + 1 / out["tmpb_mc" + "_avgsec_var"] ) q = ( - self["tmpf_mc_set"] / self["tmpf_mc" + "_avgsec_var"] - + self["tmpb_mc_set"] / self["tmpb_mc" + "_avgsec_var"] + mcparams["tmpf_mc_set"] / out["tmpf_mc" + "_avgsec_var"] + + mcparams["tmpb_mc_set"] / out["tmpb_mc" + "_avgsec_var"] ) * tmpw_var - self["tmpw" + "_mc_set"] = q # + mcparams["tmpw" + "_mc_set"] = q # - # self["tmpw"] = self["tmpw" + '_mc_set'].mean(dim='mc') - self["tmpw" + "_avgsec"] = ( - self["tmpf_avgsec"] / self["tmpf_mc" + "_avgsec_var"] - + self["tmpb_avgsec"] / self["tmpb_mc" + "_avgsec_var"] + # out["tmpw"] = out["tmpw" + '_mc_set'].mean(dim='mc') + out["tmpw" + "_avgsec"] = ( + mcparams["tmpf_avgsec"] / out["tmpf_mc" + "_avgsec_var"] + + mcparams["tmpb_avgsec"] / out["tmpb_mc" + "_avgsec_var"] ) * tmpw_var - q = self["tmpw" + "_mc_set"] - self["tmpw" + "_avgsec"] - self["tmpw" + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + q = mcparams["tmpw" + "_mc_set"] - out["tmpw_avgsec"] + out["tmpw" + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) if ci_avg_time_flag1: - self["tmpw" + "_avg1"] = self["tmpw" + "_avgsec"].mean(dim=time_dim2) + out["tmpw" + "_avg1"] = out["tmpw" + "_avgsec"].mean(dim=time_dim2) - self["tmpw" + "_mc_avg1_var"] = self["tmpw" + "_mc_set"].var( + out["tmpw" + "_mc_avg1_var"] = mcparams["tmpw" + "_mc_set"].var( dim=["mc", time_dim2] ) if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis = self["tmpw" + "_mc_set"].get_axis_num(["mc", time_dim2]) - q2 = self["tmpw" + "_mc_set"].data.map_blocks( + avg_axis = mcparams["tmpw" + "_mc_set"].get_axis_num(["mc", time_dim2]) + q2 = mcparams["tmpw" + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks @@ -3296,32 +3006,32 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): dtype=float, ) # The new CI dimension is added as # first axis - self["tmpw" + "_mc_avg1"] = (("CI", x_dim2), q2) + out["tmpw" + "_mc_avg1"] = (("CI", x_dim2), q2) if ci_avg_time_flag2: tmpw_var_avg2 = 1 / ( - 1 / self["tmpf_mc_avg2_var"] + 1 / self["tmpb_mc_avg2_var"] + 1 / out["tmpf_mc_avg2_var"] + 1 / out["tmpb_mc_avg2_var"] ) q = ( - self["tmpf_mc_avg2_set"] / self["tmpf_mc_avg2_var"] - + self["tmpb_mc_avg2_set"] / self["tmpb_mc_avg2_var"] + mcparams["tmpf_mc_avg2_set"] / out["tmpf_mc_avg2_var"] + + mcparams["tmpb_mc_avg2_set"] / out["tmpb_mc_avg2_var"] ) * tmpw_var_avg2 - self["tmpw" + "_mc_avg2_set"] = q # + mcparams["tmpw" + "_mc_avg2_set"] = q # - self["tmpw" + "_avg2"] = ( - self["tmpf_avg2"] / self["tmpf_mc_avg2_var"] - + self["tmpb_avg2"] / self["tmpb_mc_avg2_var"] + out["tmpw" + "_avg2"] = ( + out["tmpf_avg2"] / out["tmpf_mc_avg2_var"] + + out["tmpb_avg2"] / out["tmpb_mc_avg2_var"] ) * tmpw_var_avg2 - self["tmpw" + "_mc_avg2_var"] = tmpw_var_avg2 + out["tmpw" + "_mc_avg2_var"] = tmpw_var_avg2 if conf_ints: # We first need to know the x-dim-chunk-size new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis_avg2 = self["tmpw" + "_mc_avg2_set"].get_axis_num("mc") - q2 = self["tmpw" + "_mc_avg2_set"].data.map_blocks( + avg_axis_avg2 = mcparams["tmpw" + "_mc_avg2_set"].get_axis_num("mc") + q2 = mcparams["tmpw" + "_mc_avg2_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), chunks=new_chunks_weighted, # Explicitly define output chunks @@ -3329,17 +3039,17 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): new_axis=0, dtype=float, ) # The new CI dimension is added as firstax - self["tmpw" + "_mc_avg2"] = (("CI", x_dim2), q2) + out["tmpw" + "_mc_avg2"] = (("CI", x_dim2), q2) if ci_avg_x_flag1: - self["tmpw" + "_avgx1"] = self["tmpw" + "_avgsec"].mean(dim=x_dim2) + out["tmpw" + "_avgx1"] = out["tmpw" + "_avgsec"].mean(dim=x_dim2) - self["tmpw" + "_mc_avgx1_var"] = self["tmpw" + "_mc_set"].var(dim=x_dim2) + out["tmpw" + "_mc_avgx1_var"] = mcparams["tmpw" + "_mc_set"].var(dim=x_dim2) if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis = self["tmpw" + "_mc_set"].get_axis_num(["mc", x_dim2]) - q2 = self["tmpw" + "_mc_set"].data.map_blocks( + avg_axis = mcparams["tmpw" + "_mc_set"].get_axis_num(["mc", x_dim2]) + q2 = mcparams["tmpw" + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks @@ -3348,32 +3058,32 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): dtype=float, ) # The new CI dimension is added as # first axis - self["tmpw" + "_mc_avgx1"] = (("CI", time_dim2), q2) + out["tmpw" + "_mc_avgx1"] = (("CI", time_dim2), q2) if ci_avg_x_flag2: tmpw_var_avgx2 = 1 / ( - 1 / self["tmpf_mc_avgx2_var"] + 1 / self["tmpb_mc_avgx2_var"] + 1 / out["tmpf_mc_avgx2_var"] + 1 / out["tmpb_mc_avgx2_var"] ) q = ( - self["tmpf_mc_avgx2_set"] / self["tmpf_mc_avgx2_var"] - + self["tmpb_mc_avgx2_set"] / self["tmpb_mc_avgx2_var"] + mcparams["tmpf_mc_avgx2_set"] / out["tmpf_mc_avgx2_var"] + + mcparams["tmpb_mc_avgx2_set"] / out["tmpb_mc_avgx2_var"] ) * tmpw_var_avgx2 - self["tmpw" + "_mc_avgx2_set"] = q # + mcparams["tmpw" + "_mc_avgx2_set"] = q # - self["tmpw" + "_avgx2"] = ( - self["tmpf_avgx2"] / self["tmpf_mc_avgx2_var"] - + self["tmpb_avgx2"] / self["tmpb_mc_avgx2_var"] + out["tmpw" + "_avgx2"] = ( + out["tmpf_avgx2"] / out["tmpf_mc_avgx2_var"] + + out["tmpb_avgx2"] / out["tmpb_mc_avgx2_var"] ) * tmpw_var_avgx2 - self["tmpw" + "_mc_avgx2_var"] = tmpw_var_avgx2 + out["tmpw" + "_mc_avgx2_var"] = tmpw_var_avgx2 if conf_ints: # We first need to know the x-dim-chunk-size new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis_avgx2 = self["tmpw" + "_mc_avgx2_set"].get_axis_num("mc") - q2 = self["tmpw" + "_mc_avgx2_set"].data.map_blocks( + avg_axis_avgx2 = mcparams["tmpw" + "_mc_avgx2_set"].get_axis_num("mc") + q2 = mcparams["tmpw" + "_mc_avgx2_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx2), chunks=new_chunks_weighted, # Explicitly define output chunks @@ -3381,7 +3091,7 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): new_axis=0, dtype=float, ) # The new CI dimension is added as firstax - self["tmpw" + "_mc_avgx2"] = (("CI", time_dim2), q2) + out["tmpw" + "_mc_avgx2"] = (("CI", time_dim2), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: @@ -3406,13 +3116,16 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): remove_mc_set.append(i + "_mc_avgx2_set") remove_mc_set.append(i + "_mc_avgsec_var") - if self.trans_att.size: + if "trans_att" in mcparams and mcparams.trans_att.size: remove_mc_set.append('talpha"_fw_mc') remove_mc_set.append('talpha"_bw_mc') for k in remove_mc_set: - if k in self: - del self[k] + if k in out: + print(f"Removed from results: {k}") + del out[k] + + self.update(out) pass def conf_int_single_ended( @@ -3496,6 +3209,9 @@ def conf_int_single_ended( """ check_deprecated_kwargs(kwargs) + out = xr.Dataset() + params = xr.Dataset() + if da_random_state: state = da_random_state else: @@ -3504,6 +3220,7 @@ def conf_int_single_ended( no, nt = self.st.data.shape if "trans_att" in self.keys(): nta = self.trans_att.size + else: nta = 0 @@ -3521,10 +3238,13 @@ def conf_int_single_ended( else: raise Exception("The size of `p_val` is not what I expected") - self.coords["mc"] = range(mc_sample_size) + params.coords["mc"] = range(mc_sample_size) + params.coords["x"] = self.x + params.coords["time"] = self.time if conf_ints: - self.coords["CI"] = conf_ints + out.coords["CI"] = conf_ints + params.coords["CI"] = conf_ints # WLS if isinstance(p_cov, str): @@ -3534,20 +3254,20 @@ def conf_int_single_ended( p_mc = sst.multivariate_normal.rvs(mean=p_val, cov=p_cov, size=mc_sample_size) if fixed_alpha: - self["alpha_mc"] = (("mc", "x"), p_mc[:, 1 : no + 1]) - self["c_mc"] = (("mc", "time"), p_mc[:, 1 + no : 1 + no + nt]) + params["alpha_mc"] = (("mc", "x"), p_mc[:, 1 : no + 1]) + params["c_mc"] = (("mc", "time"), p_mc[:, 1 + no : 1 + no + nt]) else: - self["dalpha_mc"] = (("mc",), p_mc[:, 1]) - self["c_mc"] = (("mc", "time"), p_mc[:, 2 : nt + 2]) + params["dalpha_mc"] = (("mc",), p_mc[:, 1]) + params["c_mc"] = (("mc", "time"), p_mc[:, 2 : nt + 2]) - self["gamma_mc"] = (("mc",), p_mc[:, 0]) + params["gamma_mc"] = (("mc",), p_mc[:, 0]) if nta: - self["ta_mc"] = ( + params["ta_mc"] = ( ("mc", "trans_att", "time"), np.reshape(p_mc[:, -nt * nta :], (mc_sample_size, nta, nt)), ) - rsize = (self.mc.size, self.x.size, self.time.size) + rsize = (params.mc.size, params.x.size, params.time.size) if reduce_memory_usage: memchunk = da.ones( @@ -3594,7 +3314,7 @@ def conf_int_single_ended( else: st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) - self[k] = ( + params[k] = ( ("mc", "x", "time"), state.normal( loc=loc, # has chunks=memchunk[1:] @@ -3607,52 +3327,50 @@ def conf_int_single_ended( ta_arr = np.zeros((mc_sample_size, no, nt)) if nta: - for ii, ta in enumerate(self["ta_mc"]): + for ii, ta in enumerate(params["ta_mc"]): for tai, taxi in zip(ta.values, self.trans_att.values): ta_arr[ii, self.x.values >= taxi] = ( ta_arr[ii, self.x.values >= taxi] + tai ) - self["ta_mc_arr"] = (("mc", "x", "time"), ta_arr) + params["ta_mc_arr"] = (("mc", "x", "time"), ta_arr) if fixed_alpha: - self["tmpf_mc_set"] = ( - self["gamma_mc"] + params["tmpf_mc_set"] = ( + params["gamma_mc"] / ( ( - np.log(self["r_st"]) - - np.log(self["r_ast"]) - + (self["c_mc"] + self["ta_mc_arr"]) + np.log(params["r_st"]) + - np.log(params["r_ast"]) + + (params["c_mc"] + params["ta_mc_arr"]) ) - + self["alpha_mc"] + + params["alpha_mc"] ) - 273.15 ) else: - self["tmpf_mc_set"] = ( - self["gamma_mc"] + params["tmpf_mc_set"] = ( + params["gamma_mc"] / ( ( - np.log(self["r_st"]) - - np.log(self["r_ast"]) - + (self["c_mc"] + self["ta_mc_arr"]) + np.log(params["r_st"]) + - np.log(params["r_ast"]) + + (params["c_mc"] + params["ta_mc_arr"]) ) - + (self["dalpha_mc"] * self.x) + + (params["dalpha_mc"] * params.x) ) - 273.15 ) avg_dims = ["mc"] - - avg_axis = self["tmpf_mc_set"].get_axis_num(avg_dims) - - self["tmpf_mc_var"] = (self["tmpf_mc_set"] - self["tmpf"]).var( + avg_axis = params["tmpf_mc_set"].get_axis_num(avg_dims) + out["tmpf_mc_var"] = (params["tmpf_mc_set"] - self["tmpf"]).var( dim=avg_dims, ddof=1 ) if conf_ints: - new_chunks = ((len(conf_ints),),) + self["tmpf_mc_set"].chunks[1:] + new_chunks = ((len(conf_ints),),) + params["tmpf_mc_set"].chunks[1:] - qq = self["tmpf_mc_set"] + qq = params["tmpf_mc_set"] q = qq.data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), @@ -3661,28 +3379,17 @@ def conf_int_single_ended( new_axis=0, ) # The new CI dimension is added as first axis - self["tmpf_mc"] = (("CI", "x", "time"), q) + out["tmpf_mc"] = (("CI", "x", "time"), q) - if mc_remove_set_flag: - drop_var = [ - "gamma_mc", - "dalpha_mc", - "alpha_mc", - "c_mc", - "mc", - "r_st", - "r_ast", - "tmpf_mc_set", - "ta_mc_arr", - ] - for k in drop_var: - if k in self: - del self[k] + if not mc_remove_set_flag: + out.update(params) - pass + self.update(out) + return out def conf_int_double_ended( self, + sections=None, p_val="p_val", p_cov="p_cov", st_var=None, @@ -3825,9 +3532,7 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): if direction == "fw": arr = da.concatenate( ( - da.zeros( - (1, i_splice, 1), chunks=((1, i_splice, 1)), dtype=bool - ), + da.zeros((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), da.ones( (1, no - i_splice, 1), chunks=(1, no - i_splice, 1), @@ -3842,7 +3547,7 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), da.zeros( (1, no - i_splice, 1), - chunks=((1, no - i_splice, 1)), + chunks=(1, no - i_splice, 1), dtype=bool, ), ), @@ -3852,6 +3557,9 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): check_deprecated_kwargs(kwargs) + out = xr.Dataset() + params = xr.Dataset() + if da_random_state: # In testing environments assert isinstance(da_random_state, da.random.RandomState) @@ -3881,9 +3589,13 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} ).chunks - self.coords["mc"] = range(mc_sample_size) + params.coords["mc"] = range(mc_sample_size) + params.coords["x"] = self.x + params.coords["time"] = self.time + if conf_ints: self.coords["CI"] = conf_ints + params.coords["CI"] = conf_ints assert isinstance(p_val, (str, np.ndarray, np.generic)) if isinstance(p_val, str): @@ -3903,10 +3615,10 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): d_bw = p_val[1 + nt : 2 * nt + 1] alpha = p_val[2 * nt + 1 : 2 * nt + 1 + no] - self["gamma_mc"] = (tuple(), gamma) - self["alpha_mc"] = (("x",), alpha) - self["df_mc"] = (("time",), d_fw) - self["db_mc"] = (("time",), d_bw) + params["gamma_mc"] = (tuple(), gamma) + params["alpha_mc"] = (("x",), alpha) + params["df_mc"] = (("time",), d_fw) + params["db_mc"] = (("time",), d_bw) if nta: ta = p_val[2 * nt + 1 + no :].reshape((nt, 2, nta), order="F") @@ -3914,19 +3626,19 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): ta_bw = ta[:, 1, :] ta_fw_arr = np.zeros((no, nt)) - for tai, taxi in zip(ta_fw.T, self.coords["trans_att"].values): - ta_fw_arr[self.x.values >= taxi] = ( - ta_fw_arr[self.x.values >= taxi] + tai + for tai, taxi in zip(ta_fw.T, params.coords["trans_att"].values): + ta_fw_arr[params.x.values >= taxi] = ( + ta_fw_arr[params.x.values >= taxi] + tai ) ta_bw_arr = np.zeros((no, nt)) - for tai, taxi in zip(ta_bw.T, self.coords["trans_att"].values): - ta_bw_arr[self.x.values < taxi] = ( - ta_bw_arr[self.x.values < taxi] + tai + for tai, taxi in zip(ta_bw.T, params.coords["trans_att"].values): + ta_bw_arr[params.x.values < taxi] = ( + ta_bw_arr[params.x.values < taxi] + tai ) - self["talpha_fw_mc"] = (("x", "time"), ta_fw_arr) - self["talpha_bw_mc"] = (("x", "time"), ta_bw_arr) + params["talpha_fw_mc"] = (("x", "time"), ta_fw_arr) + params["talpha_bw_mc"] = (("x", "time"), ta_bw_arr) elif isinstance(p_cov, bool) and p_cov: raise NotImplementedError("Not an implemented option. Check p_cov argument") @@ -3937,7 +3649,10 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): p_cov = self[p_cov].values assert p_cov.shape == (npar, npar) - ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") + assert sections is not None, "Define sections" + ix_sec = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) nx_sec = ix_sec.size from_i = np.concatenate( ( @@ -3958,9 +3673,9 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): d_fw = po_mc[:, 1 : nt + 1] d_bw = po_mc[:, 1 + nt : 2 * nt + 1] - self["gamma_mc"] = (("mc",), gamma) - self["df_mc"] = (("mc", "time"), d_fw) - self["db_mc"] = (("mc", "time"), d_bw) + params["gamma_mc"] = (("mc",), gamma) + params["df_mc"] = (("mc", "time"), d_fw) + params["db_mc"] = (("mc", "time"), d_bw) # calculate alpha seperately alpha = np.zeros((mc_sample_size, no), dtype=float) @@ -3980,7 +3695,7 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): alpha[:, not_ix_sec] = not_alpha_mc - self["alpha_mc"] = (("mc", "x"), alpha) + params["alpha_mc"] = (("mc", "x"), alpha) if nta: ta = po_mc[:, 2 * nt + 1 + nx_sec :].reshape( @@ -3993,10 +3708,10 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): (mc_sample_size, no, nt), chunks=memchunk, dtype=float ) for tai, taxi in zip( - ta_fw.swapaxes(0, 2), self.coords["trans_att"].values + ta_fw.swapaxes(0, 2), params.coords["trans_att"].values ): # iterate over the splices - i_splice = sum(self.x.values < taxi) + i_splice = sum(params.x.values < taxi) mask = create_da_ta2(no, i_splice, direction="fw", chunks=memchunk) ta_fw_arr += mask * tai.T[:, None, :] @@ -4005,15 +3720,15 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): (mc_sample_size, no, nt), chunks=memchunk, dtype=float ) for tai, taxi in zip( - ta_bw.swapaxes(0, 2), self.coords["trans_att"].values + ta_bw.swapaxes(0, 2), params.coords["trans_att"].values ): - i_splice = sum(self.x.values < taxi) + i_splice = sum(params.x.values < taxi) mask = create_da_ta2(no, i_splice, direction="bw", chunks=memchunk) ta_bw_arr += mask * tai.T[:, None, :] - self["talpha_fw_mc"] = (("mc", "x", "time"), ta_fw_arr) - self["talpha_bw_mc"] = (("mc", "x", "time"), ta_bw_arr) + params["talpha_fw_mc"] = (("mc", "x", "time"), ta_fw_arr) + params["talpha_bw_mc"] = (("mc", "x", "time"), ta_bw_arr) # Draw from the normal distributions for the Stokes intensities for k, st_labeli, st_vari in zip( @@ -4053,7 +3768,7 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): else: st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) - self[k] = ( + params[k] = ( ("mc", "x", "time"), state.normal( loc=loc, # has chunks=memchunk[1:] @@ -4067,65 +3782,69 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): if "tmpw" or label: if label == "tmpf": if nta: - self["tmpf_mc_set"] = ( - self["gamma_mc"] + params["tmpf_mc_set"] = ( + params["gamma_mc"] / ( - np.log(self["r_st"] / self["r_ast"]) - + self["df_mc"] - + self["alpha_mc"] - + self["talpha_fw_mc"] + np.log(params["r_st"] / params["r_ast"]) + + params["df_mc"] + + params["alpha_mc"] + + params["talpha_fw_mc"] ) - 273.15 ) else: - self["tmpf_mc_set"] = ( - self["gamma_mc"] + params["tmpf_mc_set"] = ( + params["gamma_mc"] / ( - np.log(self["r_st"] / self["r_ast"]) - + self["df_mc"] - + self["alpha_mc"] + np.log(params["r_st"] / params["r_ast"]) + + params["df_mc"] + + params["alpha_mc"] ) - 273.15 ) else: if nta: - self["tmpb_mc_set"] = ( - self["gamma_mc"] + params["tmpb_mc_set"] = ( + params["gamma_mc"] / ( - np.log(self["r_rst"] / self["r_rast"]) - + self["db_mc"] - - self["alpha_mc"] - + self["talpha_bw_mc"] + np.log(params["r_rst"] / params["r_rast"]) + + params["db_mc"] + - params["alpha_mc"] + + params["talpha_bw_mc"] ) - 273.15 ) else: - self["tmpb_mc_set"] = ( - self["gamma_mc"] + params["tmpb_mc_set"] = ( + params["gamma_mc"] / ( - np.log(self["r_rst"] / self["r_rast"]) - + self["db_mc"] - - self["alpha_mc"] + np.log(params["r_rst"] / params["r_rast"]) + + params["db_mc"] + - params["alpha_mc"] ) - 273.15 ) if var_only_sections: # sets the values outside the reference sections to NaN - xi = self.ufunc_per_section(x_indices=True, calc_per="all") - x_mask_ = [True if ix in xi else False for ix in range(self.x.size)] + xi = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) + x_mask_ = [ + True if ix in xi else False for ix in range(params.x.size) + ] x_mask = np.reshape(x_mask_, (1, -1, 1)) - self[label + "_mc_set"] = self[label + "_mc_set"].where(x_mask) + params[label + "_mc_set"] = params[label + "_mc_set"].where(x_mask) # subtract the mean temperature - q = self[label + "_mc_set"] - self[label] - self[label + "_mc_var"] = q.var(dim="mc", ddof=1) + q = params[label + "_mc_set"] - self[label] + out[label + "_mc_var"] = q.var(dim="mc", ddof=1) if conf_ints: - new_chunks = list(self[label + "_mc_set"].chunks) + new_chunks = list(params[label + "_mc_set"].chunks) new_chunks[0] = (len(conf_ints),) - avg_axis = self[label + "_mc_set"].get_axis_num("mc") - q = self[label + "_mc_set"].data.map_blocks( + avg_axis = params[label + "_mc_set"].get_axis_num("mc") + q = params[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, @@ -4133,37 +3852,37 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): new_axis=0, ) # The new CI dimension is added as firsaxis - self[label + "_mc"] = (("CI", "x", "time"), q) + out[label + "_mc"] = (("CI", "x", "time"), q) # Weighted mean of the forward and backward - tmpw_var = 1 / (1 / self["tmpf_mc_var"] + 1 / self["tmpb_mc_var"]) + tmpw_var = 1 / (1 / out["tmpf_mc_var"] + 1 / out["tmpb_mc_var"]) q = ( - self["tmpf_mc_set"] / self["tmpf_mc_var"] - + self["tmpb_mc_set"] / self["tmpb_mc_var"] + params["tmpf_mc_set"] / out["tmpf_mc_var"] + + params["tmpb_mc_set"] / out["tmpb_mc_var"] ) * tmpw_var - self["tmpw" + "_mc_set"] = q # + params["tmpw" + "_mc_set"] = q # - self["tmpw"] = ( - self["tmpf"] / self["tmpf_mc_var"] + self["tmpb"] / self["tmpb_mc_var"] + out["tmpw"] = ( + self["tmpf"] / out["tmpf_mc_var"] + self["tmpb"] / out["tmpb_mc_var"] ) * tmpw_var - q = self["tmpw" + "_mc_set"] - self["tmpw"] - self["tmpw" + "_mc_var"] = q.var(dim="mc", ddof=1) + q = params["tmpw" + "_mc_set"] - self["tmpw"] + out["tmpw" + "_mc_var"] = q.var(dim="mc", ddof=1) # Calculate the CI of the weighted MC_set if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + memchunk[1:] - avg_axis = self["tmpw" + "_mc_set"].get_axis_num("mc") - q2 = self["tmpw" + "_mc_set"].data.map_blocks( + avg_axis = params["tmpw" + "_mc_set"].get_axis_num("mc") + q2 = params["tmpw" + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks drop_axis=avg_axis, # avg dimensions are dropped new_axis=0, dtype=float, ) # The new CI dimension is added as first axis - self["tmpw" + "_mc"] = (("CI", "x", "time"), q2) + out["tmpw" + "_mc"] = (("CI", "x", "time"), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: @@ -4186,9 +3905,14 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): remove_mc_set.append('talpha"_bw_mc') for k in remove_mc_set: - if k in self: - del self[k] - pass + if k in out: + del out[k] + + if not mc_remove_set_flag: + out.update(params) + + self.update(out) + return out def in_confidence_interval(self, ci_label, conf_ints=None, sections=None): """ @@ -4344,6 +4068,7 @@ def ufunc_per_section( reference sections wrt the temperature of the water baths >>> tmpf_var = d.ufunc_per_section( + >>> sections=sections, >>> func='var', >>> calc_per='all', >>> label='tmpf', @@ -4353,6 +4078,7 @@ def ufunc_per_section( reference section wrt the temperature of the water baths >>> tmpf_var = d.ufunc_per_section( + >>> sections=sections, >>> func='var', >>> calc_per='stretch', >>> label='tmpf', @@ -4362,6 +4088,7 @@ def ufunc_per_section( water bath wrt the temperature of the water baths >>> tmpf_var = d.ufunc_per_section( + >>> sections=sections, >>> func='var', >>> calc_per='section', >>> label='tmpf', @@ -4370,6 +4097,7 @@ def ufunc_per_section( 4. Obtain the coordinates of the measurements per section >>> locs = d.ufunc_per_section( + >>> sections=sections, >>> func=None, >>> label='x', >>> temp_err=False, @@ -4379,6 +4107,7 @@ def ufunc_per_section( 5. Number of observations per stretch >>> nlocs = d.ufunc_per_section( + >>> sections=sections, >>> func=len, >>> label='x', >>> temp_err=False, @@ -4397,7 +4126,7 @@ def ufunc_per_section( 7. x-coordinate index - >>> ix_loc = d.ufunc_per_section(x_indices=True) + >>> ix_loc = d.ufunc_per_section(sections=sections, x_indices=True) Note @@ -4405,9 +4134,8 @@ def ufunc_per_section( If `self[label]` or `self[subtract_from_label]` is a Dask array, a Dask array is returned else a numpy array is returned """ - if sections is None: - sections = self.sections - + # if sections is None: + # sections = self.sections if label is None: dataarray = None else: @@ -4416,7 +4144,10 @@ def ufunc_per_section( if x_indices: x_coords = self.x reference_dataset = None + else: + sections = validate_sections(self, sections) + x_coords = None reference_dataset = {k: self[k] for k in sections} diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index dc2daad3..0d54dd31 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -1,4 +1,3 @@ -# coding=utf-8 from typing import TYPE_CHECKING from typing import Optional from typing import Union @@ -528,6 +527,264 @@ def get_netcdf_encoding( return encoding +def get_params_from_pval_double_ended(ip, coords, p_val=None, p_cov=None): + if p_val is not None: + assert len(p_val) == ip.npar, "Length of p_val is incorrect" + + params = xr.Dataset(coords=coords) + + # save estimates and variances to datastore, skip covariances + params["gamma"] = (tuple(), p_val[ip.gamma].item()) + params["alpha"] = (("x",), p_val[ip.alpha]) + params["df"] = (("time",), p_val[ip.df]) + params["db"] = (("time",), p_val[ip.db]) + + if ip.nta: + params["talpha_fw"] = ( + ("time", "trans_att"), + p_val[ip.taf].reshape((ip.nt, ip.nta), order="C"), + ) + params["talpha_bw"] = ( + ("time", "trans_att"), + p_val[ip.tab].reshape((ip.nt, ip.nta), order="C"), + ) + else: + params["talpha_fw"] = (("time", "trans_att"), np.zeros((ip.nt, 0))) + params["talpha_bw"] = (("time", "trans_att"), np.zeros((ip.nt, 0))) + + params["talpha_fw_full"] = ( + ("x", "time"), + ip.get_taf_values( + pval=p_val, + x=params.x.values, + trans_att=params.trans_att.values, + axis="", + ), + ) + params["talpha_bw_full"] = ( + ("x", "time"), + ip.get_tab_values( + pval=p_val, + x=params.x.values, + trans_att=params.trans_att.values, + axis="", + ), + ) + if p_cov is not None: + assert p_cov.shape == (ip.npar, ip.npar), "Shape of p_cov is incorrect" + + # extract covariances and ensure broadcastable to (nx, nt) + params["gamma_df"] = (("time",), p_cov[np.ix_(ip.gamma, ip.df)][0]) + params["gamma_db"] = (("time",), p_cov[np.ix_(ip.gamma, ip.db)][0]) + params["gamma_alpha"] = (("x",), p_cov[np.ix_(ip.alpha, ip.gamma)][:, 0]) + params["df_db"] = ( + ("time",), + p_cov[ip.df, ip.db], + ) + params["alpha_df"] = ( + ( + "x", + "time", + ), + p_cov[np.ix_(ip.alpha, ip.df)], + ) + params["alpha_db"] = ( + ( + "x", + "time", + ), + p_cov[np.ix_(ip.alpha, ip.db)], + ) + params["tafw_gamma"] = ( + ( + "x", + "time", + ), + ip.get_taf_values( + pval=p_cov[ip.gamma], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="", + ), + ) + params["tabw_gamma"] = ( + ( + "x", + "time", + ), + ip.get_tab_values( + pval=p_cov[ip.gamma], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="", + ), + ) + params["tafw_alpha"] = ( + ( + "x", + "time", + ), + ip.get_taf_values( + pval=p_cov[ip.alpha], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="x", + ), + ) + params["tabw_alpha"] = ( + ( + "x", + "time", + ), + ip.get_tab_values( + pval=p_cov[ip.alpha], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="x", + ), + ) + params["tafw_df"] = ( + ( + "x", + "time", + ), + ip.get_taf_values( + pval=p_cov[ip.df], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="time", + ), + ) + params["tafw_db"] = ( + ( + "x", + "time", + ), + ip.get_taf_values( + pval=p_cov[ip.db], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="time", + ), + ) + params["tabw_db"] = ( + ( + "x", + "time", + ), + ip.get_tab_values( + pval=p_cov[ip.db], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="time", + ), + ) + params["tabw_df"] = ( + ( + "x", + "time", + ), + ip.get_tab_values( + pval=p_cov[ip.df], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="time", + ), + ) + # sigma2_tafw_tabw + return params + + +def get_params_from_pval_single_ended( + ip, coords, p_val=None, p_var=None, p_cov=None, fix_alpha=None +): + assert len(p_val) == ip.npar, "Length of p_val is incorrect" + + params = xr.Dataset(coords=coords) + param_covs = xr.Dataset(coords=coords) + + params["gamma"] = (tuple(), p_val[ip.gamma].item()) + param_covs["gamma"] = (tuple(), p_var[ip.gamma].item()) + + if ip.nta > 0: + params["talpha_fw"] = ( + ("trans_att", "time"), + ip.get_taf_pars(p_val), + ) + param_covs["talpha_fw"] = ( + ("trans_att", "time"), + ip.get_taf_pars(p_var), + ) + else: + params["talpha_fw"] = (("trans_att", "time"), np.zeros((0, ip.nt))) + param_covs["talpha_fw"] = (("trans_att", "time"), np.zeros((0, ip.nt))) + + params["c"] = (("time",), p_val[ip.c]) + param_covs["c"] = (("time",), p_var[ip.c]) + + if fix_alpha is not None: + params["alpha"] = (("x",), fix_alpha[0]) + param_covs["alpha"] = (("x",), fix_alpha[1]) + + else: + params["dalpha"] = (tuple(), p_val[ip.dalpha].item()) + param_covs["dalpha"] = (tuple(), p_var[ip.dalpha].item()) + + params["alpha"] = params["dalpha"] * params["x"] + param_covs["alpha"] = param_covs["dalpha"] * params["x"] ** 2 + + param_covs["gamma_dalpha"] = (tuple(), p_cov[np.ix_(ip.dalpha, ip.gamma)][0, 0]) + param_covs["dalpha_c"] = (("time",), p_cov[np.ix_(ip.dalpha, ip.c)][0, :]) + param_covs["tafw_dalpha"] = ( + ("x", "time"), + ip.get_taf_values( + pval=p_cov[ip.dalpha], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="", + ), + ) + + params["talpha_fw_full"] = ( + ("x", "time"), + ip.get_taf_values( + pval=p_val, + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="", + ), + ) + param_covs["talpha_fw_full"] = ( + ("x", "time"), + ip.get_taf_values( + pval=p_var, + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="", + ), + ) + param_covs["gamma_c"] = (("time",), p_cov[np.ix_(ip.gamma, ip.c)][0, :]) + param_covs["tafw_gamma"] = ( + ("x", "time"), + ip.get_taf_values( + pval=p_cov[ip.gamma], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="", + ), + ) + param_covs["tafw_c"] = ( + ("x", "time"), + ip.get_taf_values( + pval=p_cov[ip.c], + x=params["x"].values, + trans_att=params["trans_att"].values, + axis="time", + ), + ) + return params, param_covs + + def merge_double_ended( ds_fw: "DataStore", ds_bw: "DataStore", diff --git a/src/dtscalibration/io.py b/src/dtscalibration/io.py index be7d0dd6..413851f5 100644 --- a/src/dtscalibration/io.py +++ b/src/dtscalibration/io.py @@ -282,7 +282,7 @@ def read_silixa_files( else: raise NotImplementedError( - "Silixa xml version " + "{0} not implemented".format(xml_version) + "Silixa xml version " + f"{xml_version} not implemented" ) ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) @@ -332,7 +332,7 @@ def read_sensortran_files( # Check if corresponding temperature file exists if not os.path.isfile(filepathlist_temp[ii]): raise FileNotFoundError( - "Could not find BinaryTemp " + "file corresponding to {}".format(fname) + "Could not find BinaryTemp " + f"file corresponding to {fname}" ) version = sensortran_binary_version_check(filepathlist_dts) @@ -347,7 +347,7 @@ def read_sensortran_files( ) else: raise NotImplementedError( - "Sensortran binary version " + "{0} not implemented".format(version) + "Sensortran binary version " + f"{version} not implemented" ) ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) @@ -423,10 +423,9 @@ def read_apsensing_files( else: warnings.warn( - "AP sensing device " - '"{0}"'.format(device) - + " has not been tested.\nPlease open an issue on github" - + " and provide an example file" + "AP sensing device {device}" + " has not been tested.\nPlease open an issue on github" + " and provide an example file" ) data_vars, coords, attrs = read_apsensing_files_routine( @@ -544,10 +543,9 @@ def read_sensornet_files( else: flip_reverse_measurements = False warnings.warn( - "Sensornet .dff version " - '"{0}"'.format(ddf_version) - + " has not been tested.\nPlease open an issue on github" - + " and provide an example file" + f"Sensornet .dff version {ddf_version}" + " has not been tested.\nPlease open an issue on github" + " and provide an example file" ) data_vars, coords, attrs = read_sensornet_files_routine_v3( diff --git a/src/dtscalibration/io_utils.py b/src/dtscalibration/io_utils.py index e880a6ca..ca5dbb3a 100644 --- a/src/dtscalibration/io_utils.py +++ b/src/dtscalibration/io_utils.py @@ -1,4 +1,3 @@ -# coding=utf-8 import os import struct from contextlib import contextmanager @@ -1263,8 +1262,8 @@ def read_sensortran_files_routine( AST = np.zeros((nx, ntime), dtype=np.int32) TMP = np.zeros((nx, ntime)) - ST_zero = np.zeros((ntime)) - AST_zero = np.zeros((ntime)) + ST_zero = np.zeros(ntime) + AST_zero = np.zeros(ntime) for ii in range(ntime): data_dts, meta_dts = read_sensortran_single(filepathlist_dts[ii]) @@ -1468,7 +1467,7 @@ def read_apsensing_files_routine( + "/{0}wellLogSet/{0}wellLog" ).format(namespace) ) - logdata_tree = logtree.find("./{0}logData".format(namespace)) + logdata_tree = logtree.find(f"./{namespace}logData") # Amount of datapoints is the size of the logdata tree nx = len(logdata_tree) @@ -1490,7 +1489,7 @@ def read_apsensing_files_routine( attrs["backwardMeasurementChannel"] = "N/A" data_item_names = [ - attrs["wellbore:wellLogSet:wellLog:logCurveInfo_{0}:mnemonic".format(x)] + attrs[f"wellbore:wellLogSet:wellLog:logCurveInfo_{x}:mnemonic"] for x in range(0, 4) ] @@ -1590,9 +1589,7 @@ def grab_data_per_file(file_handle): elif name in dim_attrs_apsensing: data_vars[tld[name]] = (["x", "time"], data_arri, dim_attrs_apsensing[name]) else: - raise ValueError( - "Dont know what to do with the" + " {} data column".format(name) - ) + raise ValueError("Dont know what to do with the" + f" {name} data column") _time_dtype = [("filename_tstamp", np.int64), ("acquisitionTime", " one column per section + coords1row = np.arange(nt * n_locs) + coords1col = np.hstack( + [np.ones(in_locs * nt) * i for i, in_locs in enumerate(len_stretch_list)] + ) # C for + + # second calibration parameter is different per section and per timestep + coords2row = np.arange(nt * n_locs) + coords2col = np.hstack( + [ + np.repeat( + np.arange(i * nt + n_sections, (i + 1) * nt + n_sections), in_locs + ) + for i, in_locs in enumerate(len_stretch_list) + ] + ) # C for + coords = ( + np.concatenate([coords1row, coords2row]), + np.concatenate([coords1col, coords2col]), + ) + + lny = np.log(y) + w = y.copy() # 1/std. + + ddof = n_sections + nt * n_sections # see numpy documentation on ddof + + if use_statsmodels: + # returns the same answer with statsmodel + import statsmodels.api as sm + + X = sp.coo_matrix( + (data, coords), shape=(nt * n_locs, ddof), dtype=float, copy=False + ) + + mod_wls = sm.WLS(lny, X.toarray(), weights=w**2) + res_wls = mod_wls.fit() + # print(res_wls.summary()) + a = res_wls.params + + else: + wdata = data * np.hstack((w, w)) + wX = sp.coo_matrix( + (wdata, coords), + shape=(nt * n_locs, n_sections + nt * n_sections), + dtype=float, + copy=False, + ) + + wlny = lny * w + + p0_est = np.asarray(n_sections * [0.0] + nt * n_sections * [8]) + # noinspection PyTypeChecker + a = ln.lsqr(wX, wlny, x0=p0_est, show=not suppress_info, calc_var=False)[0] + + beta = a[:n_sections] + beta_expand_to_sec = np.hstack( + [ + np.repeat(float(beta[i]), leni * nt) + for i, leni in enumerate(len_stretch_list) + ] + ) + G = np.asarray(a[n_sections:]) + G_expand_to_sec = np.hstack( + [ + np.repeat(G[i * nt : (i + 1) * nt], leni) + for i, leni in enumerate(len_stretch_list) + ] + ) + + I_est = np.exp(G_expand_to_sec) * np.exp(x * beta_expand_to_sec) + resid = I_est - y + var_I = resid.var(ddof=1) + return var_I, resid + + +def variance_stokes_linear_helper(st_sec, resid_sec, nbin, through_zero): + # Adjust nbin silently to fit residuals in + # rectangular matrix and use numpy for computation + nbin_ = nbin + while st_sec.size % nbin_: + nbin_ -= 1 + + if nbin_ != nbin: + print(f"Adjusting nbin to: {nbin_} to fit residuals in ") + nbin = nbin_ + + isort = np.argsort(st_sec) + st_sort_mean = st_sec[isort].reshape((nbin, -1)).mean(axis=1) + st_sort_var = resid_sec[isort].reshape((nbin, -1)).var(axis=1) + + if through_zero: + # VAR(Stokes) = slope * Stokes + offset = 0.0 + slope = np.linalg.lstsq(st_sort_mean[:, None], st_sort_var, rcond=None)[0] + + else: + # VAR(Stokes) = slope * Stokes + offset + slope, offset = np.linalg.lstsq( + np.hstack((st_sort_mean[:, None], np.ones((nbin, 1)))), + st_sort_var, + rcond=None, + )[0] + + if offset < 0: + warnings.warn( + "Warning! Offset of variance_stokes_linear() " + "is negative. This is phisically " + "not possible. Most likely, your Stokes intensities do " + "not vary enough to fit a linear curve. Either " + "use `through_zero` option or use " + "`ds.variance_stokes_constant()`. Another reason " + "could be that your sections are defined to be " + "wider than they actually are." + ) + + def var_fun(stokes): + return slope * stokes + offset + + return slope, offset, st_sort_mean, st_sort_var, resid_sec, var_fun diff --git a/tests/test_datastore.py b/tests/test_datastore.py index b6b40c77..15a6cc38 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -1,4 +1,3 @@ -# coding=utf-8 import hashlib import os import tempfile diff --git a/tests/test_dtscalibration.py b/tests/test_dtscalibration.py index 805a900e..b354c584 100644 --- a/tests/test_dtscalibration.py +++ b/tests/test_dtscalibration.py @@ -1,4 +1,3 @@ -# coding=utf-8 import os import numpy as np @@ -131,7 +130,11 @@ def test_variance_input_types_single(): ) ds.conf_int_single_ended( - st_var=st_var, ast_var=st_var, mc_sample_size=100, da_random_state=state + st_var=st_var, + ast_var=st_var, + mc_sample_size=100, + da_random_state=state, + mc_remove_set_flag=False, ) assert_almost_equal_verbose( @@ -321,6 +324,7 @@ def test_variance_input_types_double(): ) ds.conf_int_double_ended( + sections=sections, st_var=st_var, ast_var=st_var, rst_var=st_var, @@ -353,6 +357,7 @@ def st_var_callable(stokes): ) ds.conf_int_double_ended( + sections=sections, st_var=st_var_callable, ast_var=st_var_callable, rst_var=st_var_callable, @@ -382,6 +387,7 @@ def st_var_callable(stokes): ) ds.conf_int_double_ended( + sections=sections, st_var=st_var, ast_var=st_var, rst_var=st_var, @@ -408,6 +414,7 @@ def st_var_callable(stokes): ) ds.conf_int_double_ended( + sections=sections, st_var=st_var, ast_var=st_var, rst_var=st_var, @@ -437,6 +444,7 @@ def st_var_callable(stokes): ) ds.conf_int_double_ended( + sections=sections, st_var=st_var, ast_var=st_var, rst_var=st_var, @@ -565,6 +573,7 @@ def test_double_ended_variance_estimate_synthetic(): assert_almost_equal_verbose(ds.tmpb.mean(), 12.0, decimal=3) ds.conf_int_double_ended( + sections=sections, p_val="p_val", p_cov="p_cov", st_var=mst_var, @@ -578,20 +587,28 @@ def test_double_ended_variance_estimate_synthetic(): # Calibrated variance stdsf1 = ds.ufunc_per_section( - label="tmpf", func=np.std, temp_err=True, calc_per="stretch" + sections=sections, label="tmpf", func=np.std, temp_err=True, calc_per="stretch" ) stdsb1 = ds.ufunc_per_section( - label="tmpb", func=np.std, temp_err=True, calc_per="stretch" + sections=sections, label="tmpb", func=np.std, temp_err=True, calc_per="stretch" ) # Use a single timestep to better check if the parameter uncertainties propagate ds1 = ds.isel(time=1) # Estimated VAR stdsf2 = ds1.ufunc_per_section( - label="tmpf_mc_var", func=np.mean, temp_err=False, calc_per="stretch" + sections=sections, + label="tmpf_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", ) stdsb2 = ds1.ufunc_per_section( - label="tmpb_mc_var", func=np.mean, temp_err=False, calc_per="stretch" + sections=sections, + label="tmpb_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", ) for (_, v1), (_, v2) in zip(stdsf1.items(), stdsf2.items()): @@ -702,14 +719,23 @@ def test_single_ended_variance_estimate_synthetic(): # Calibrated variance stdsf1 = ds.ufunc_per_section( - label="tmpf", func=np.std, temp_err=True, calc_per="stretch", ddof=1 + sections=sections, + label="tmpf", + func=np.std, + temp_err=True, + calc_per="stretch", + ddof=1, ) # Use a single timestep to better check if the parameter uncertainties propagate ds1 = ds.isel(time=1) # Estimated VAR stdsf2 = ds1.ufunc_per_section( - label="tmpf_mc_var", func=np.mean, temp_err=False, calc_per="stretch" + sections=sections, + label="tmpf_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", ) for (_, v1), (_, v2) in zip(stdsf1.items(), stdsf2.items()): @@ -2513,6 +2539,7 @@ def test_double_ended_exponential_variance_estimate_synthetic(): ) ds.conf_int_double_ended( + sections=sections, p_val="p_val", p_cov="p_cov", st_label=st_label, @@ -2530,10 +2557,10 @@ def test_double_ended_exponential_variance_estimate_synthetic(): # Calibrated variance stdsf1 = ds.ufunc_per_section( - label="tmpf", func=np.std, temp_err=True, calc_per="stretch" + sections=sections, label="tmpf", func=np.std, temp_err=True, calc_per="stretch" ) stdsb1 = ds.ufunc_per_section( - label="tmpb", func=np.std, temp_err=True, calc_per="stretch" + sections=sections, label="tmpb", func=np.std, temp_err=True, calc_per="stretch" ) # Use a single timestep to better check if the parameter uncertainties propagate @@ -2541,10 +2568,18 @@ def test_double_ended_exponential_variance_estimate_synthetic(): # Estimated VAR stdsf2 = ds1.ufunc_per_section( - label="tmpf_mc_var", func=np.mean, temp_err=False, calc_per="stretch" + sections=sections, + label="tmpf_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", ) stdsb2 = ds1.ufunc_per_section( - label="tmpb_mc_var", func=np.mean, temp_err=False, calc_per="stretch" + sections=sections, + label="tmpb_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", ) for (_, v1), (_, v2) in zip(stdsf1.items(), stdsf2.items()): @@ -2662,6 +2697,7 @@ def test_estimate_variance_of_temperature_estimate(): ) ds.conf_int_double_ended( + sections=sections, p_val="p_val", p_cov="p_cov", st_var=mst_var, @@ -3527,7 +3563,12 @@ def test_single_ended_exponential_variance_estimate_synthetic(): # Calibrated variance stdsf1 = ds.ufunc_per_section( - label="tmpf", func=np.var, temp_err=True, calc_per="stretch", ddof=1 + sections=sections, + label="tmpf", + func=np.var, + temp_err=True, + calc_per="stretch", + ddof=1, ) # Use a single timestep to better check if the parameter uncertainties @@ -3535,7 +3576,11 @@ def test_single_ended_exponential_variance_estimate_synthetic(): ds1 = ds.isel(time=1) # Estimated VAR stdsf2 = ds1.ufunc_per_section( - label="tmpf_mc_var", func=np.mean, temp_err=False, calc_per="stretch" + sections=sections, + label="tmpf_mc_var", + func=np.mean, + temp_err=False, + calc_per="stretch", ) for (_, v1), (_, v2) in zip(stdsf1.items(), stdsf2.items()): @@ -3675,6 +3720,7 @@ def test_average_measurements_double_ended(): solver="sparse", ) ds.average_double_ended( + sections=sections, p_val="p_val", p_cov="p_cov", st_var=st_var, @@ -3688,6 +3734,7 @@ def test_average_measurements_double_ended(): ) ix = ds.get_section_indices(slice(6, 10)) ds.average_double_ended( + sections=sections, p_val="p_val", p_cov="p_cov", st_var=st_var, @@ -3704,6 +3751,7 @@ def test_average_measurements_double_ended(): np.datetime64("2018-03-28T00:41:12.084000000"), ) ds.average_double_ended( + sections=sections, p_val="p_val", p_cov="p_cov", st_var=st_var, @@ -3717,6 +3765,7 @@ def test_average_measurements_double_ended(): ci_avg_time_sel=sl, ) ds.average_double_ended( + sections=sections, p_val="p_val", p_cov="p_cov", st_var=st_var,