From 2cff48222b9497dbe2659c7dbcfe5e7687a1058d Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sat, 4 Nov 2023 14:12:29 +0100 Subject: [PATCH] Fix docs reference (#208) * Fix docs reference * typo * Black formatted dts_accessor.py * Skip errors in coverage of example notebooks * Fix doc headers to end with colons * Improved docs Parallel tests are configured by using xdist. worksteal resulted in issues and xdist was already set * Use isinstance() rather than type() for a typecheck. * Formatting --- .gitignore | 3 +- README.rst | 2 +- docs/reference/index.rst | 70 ++-- pyproject.toml | 30 +- src/dtscalibration/averaging_utils.py | 69 ---- src/dtscalibration/calibrate_utils.py | 228 ++++++++---- .../calibration/section_utils.py | 44 +-- src/dtscalibration/datastore_utils.py | 75 ++-- src/dtscalibration/dts_accessor.py | 348 +++++++++--------- src/dtscalibration/io/apsensing.py | 43 +-- src/dtscalibration/io/sensornet.py | 36 +- src/dtscalibration/io/sensortran.py | 15 +- src/dtscalibration/io/silixa.py | 96 ++--- src/dtscalibration/io/utils.py | 23 +- src/dtscalibration/plot.py | 35 +- src/dtscalibration/variance_helpers.py | 5 +- src/dtscalibration/variance_stokes.py | 34 +- 17 files changed, 555 insertions(+), 601 deletions(-) delete mode 100644 src/dtscalibration/averaging_utils.py diff --git a/.gitignore b/.gitignore index dd531a85..affedd6d 100644 --- a/.gitignore +++ b/.gitignore @@ -57,7 +57,7 @@ output/*/index.html # Sphinx docs/_build -**/generated/**/* +**/generated .DS_Store *~ @@ -74,3 +74,4 @@ docs/_build *.bak .vscode/settings.json *.code-workspace +xunit-result.xml diff --git a/README.rst b/README.rst index 03f01ee2..2870cc8d 100644 --- a/README.rst +++ b/README.rst @@ -95,7 +95,7 @@ Documentation * A full calibration procedure for single-ended setups is presented in notebook `07Calibrate_single_ended.ipynb `_ and for double-ended setups in `08Calibrate_double_ended.ipynb `_. * Documentation at https://python-dts-calibration.readthedocs.io/ . -* Example notebooks that work within the browser can be viewed `here `_. +* Example notebooks (`./docs/notebooks`) that work within the browser can be viewed `here `_. How to cite =========== diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 73c3961c..9c667877 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -4,48 +4,70 @@ Reference Load the data ------------- -See example notebooks 01, A2, A3, and A4. +See example notebooks 01, A2, A3, and A4. Import directly from `dtscalibration`. -.. automodule:: dtscalibration.io - :members: dtscalibration.read_apsensing_files - :nosignatures: +.. currentmodule:: dtscalibration +.. autosummary:: + :toctree: ./generated + :nosignatures: + + read_apsensing_files + read_sensornet_files + read_sensortran_files + read_silixa_files Compute the variance in the Stokes measurements ----------------------------------------------- -See example notebooks 04 and have a look at the docstring of the dtscalibration.variance_stokes funcitons. +See example notebooks 04. Import from `dtscalibration.variance_stokes`. + +.. currentmodule:: dtscalibration.variance_stokes +.. autosummary:: + :toctree: ./generated + :nosignatures: -.. automodule:: dtscalibration.variance_stokes - :members: - :nosignatures: + variance_stokes_constant + variance_stokes_linear + variance_stokes_exponential The DTS Accessor ---------------- -See example natebooks 07, 08, and 17. +These methods are available as an `xarray.Dataset` accessor. Add +`from dtscalibration.dts_accessor import DtsAccessor` to your import +statements. See example natebooks 07, 08, and 17. -.. currentmodule:: xarray +.. currentmodule:: xarray.Dataset .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst :nosignatures: - Dataset.dts.sections - Dataset.dts.calibrate_single_ended - Dataset.dts.calibrate_double_ended - Dataset.dts.monte_carlo_single_ended - Dataset.dts.monte_carlo_double_ended - Dataset.dts.average_monte_carlo_single_ended - Dataset.dts.average_monte_carlo_double_ended - Dataset.dts.get_default_encoding - Dataset.dts.get_timeseries_keys - Dataset.dts.matching_sections - Dataset.dts.ufunc_per_section + dts.sections + dts.calibrate_single_ended + dts.calibrate_double_ended + dts.monte_carlo_single_ended + dts.monte_carlo_double_ended + dts.average_monte_carlo_single_ended + dts.average_monte_carlo_double_ended + dts.get_default_encoding + dts.get_timeseries_keys + dts.matching_sections + dts.ufunc_per_section Plot the results ---------------- -.. automodule:: dtscalibration.plot - :members: - :nosignatures: \ No newline at end of file +Import from `dtscalibration.plot`. + +.. currentmodule:: dtscalibration.plot +.. autosummary:: + :toctree: ./generated + :nosignatures: + + plot_residuals_reference_sections + plot_residuals_reference_sections_single + plot_accuracy + sigma_report + plot_location_residuals_double_ended \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4fae6751..1ff141ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,16 +106,16 @@ features = ["dev"] [tool.hatch.envs.default.scripts] lint = [ - "ruff check .", - "black --check .", - "isort --check-only --diff .", + "ruff check src/ tests/ examples/", + "black --check src/ tests/ examples/", + "isort --check-only --diff src/ tests/ examples/", "mypy src/", ] -format = ["black .", "isort .", "lint",] -test = ["pytest -n auto --dist worksteal ./src/ ./tests/",] -fast-test = ["pytest -n auto --dist worksteal ./tests/ -m \"not slow\"",] +format = ["isort src/ tests/ examples/", "ruff check src/ tests/ examples/ --fix", "black src/ tests/ examples/", "lint",] +test = ["pytest ./src/ ./tests/",] +fast-test = ["pytest ./tests/ -m \"not slow\"",] coverage = [ - "pytest --cov --cov-report term --cov-report xml --junitxml=xunit-result.xml tests/", + "pytest --cov --cov-report term --cov-report xml --cov-branch --junitxml=xunit-result.xml tests/", ] [tool.hatch.envs.docs] @@ -152,12 +152,15 @@ select = [ # It would be nice to have the commented out checks working. "UP", # pyupgrade (upgrade syntax to current syntax) "PLE", # Pylint error https://github.com/charliermarsh/ruff#error-ple # "PLR", # Pylint refactor (e.g. too-many-arguments) - # "PLW", # Pylint warning (useless-else-on-loop) + "PLW", # Pylint warning (useless-else-on-loop) + # "I", # isort + "SIM", # flake8-simplify + ] extend-select = [ - # "D401", # First line should be in imperative mood - # "D400", # First line should end in a period. - # "D404", # First word of the docstring should not be "This" + "D401", # First line should be in imperative mood + "D400", # First line should end in a period. + "D404", # First word of the docstring should not be "This" "TID252", # No relative imports (not pep8 compliant) ] ignore = [ @@ -165,7 +168,7 @@ ignore = [ "E501", # Line too long (want to have fixed ] # Allow autofix for all enabled rules (when `--fix`) is provided. -fixable = ["E", "F", "UP", "PLE"] +fixable = ["E", "F", "UP", "PLE", "D"] unfixable = [] line-length = 88 exclude = ["docs", "build"] @@ -183,6 +186,9 @@ convention = "google" [tool.ruff.mccabe] max-complexity = 10 +[tool.ruff.lint.isort] +force-single-line = true + [tool.isort] py_version=39 force_single_line = true diff --git a/src/dtscalibration/averaging_utils.py b/src/dtscalibration/averaging_utils.py deleted file mode 100644 index 662510b5..00000000 --- a/src/dtscalibration/averaging_utils.py +++ /dev/null @@ -1,69 +0,0 @@ -def inverse_variance_weighted_mean( - self, - tmp1="tmpf", - tmp2="tmpb", - tmp1_var="tmpf_mc_var", - tmp2_var="tmpb_mc_var", - tmpw_store="tmpw", - tmpw_var_store="tmpw_var", -): - """Compute inverse variance weighted average, and add result in-place. - - Parameters - ---------- - tmp1 : str - The label of the first temperature dataset that is averaged - tmp2 : str - The label of the second temperature dataset that is averaged - tmp1_var : str - The variance of tmp1 - tmp2_var : str - The variance of tmp2 - tmpw_store : str - The label of the averaged temperature dataset - tmpw_var_store : str - The label of the variance of the averaged temperature dataset - - Returns - ------- - - """ - - self[tmpw_var_store] = 1 / (1 / self[tmp1_var] + 1 / self[tmp2_var]) - - self[tmpw_store] = ( - self[tmp1] / self[tmp1_var] + self[tmp2] / self[tmp2_var] - ) * self[tmpw_var_store] - - pass - - -def inverse_variance_weighted_mean_array( - self, - tmp_label="tmpf", - tmp_var_label="tmpf_mc_var", - tmpw_store="tmpw", - tmpw_var_store="tmpw_var", - dim="time", -): - """ - Calculates the weighted average across a dimension. - - Parameters - ---------- - - Returns - ------- - - See Also - -------- - - https://en.wikipedia.org/wiki/Inverse-variance_weighting - - """ - self[tmpw_var_store] = 1 / (1 / self[tmp_var_label]).sum(dim=dim) - - self[tmpw_store] = (self[tmp_label] / self[tmp_var_label]).sum(dim=dim) / ( - 1 / self[tmp_var_label] - ).sum(dim=dim) - - pass diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index e1156157..29779155 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -6,33 +6,25 @@ def parse_st_var(st, st_var): - """ - Utility function to check the st_var input and to return in DataArray - format. + """Utility function to check the st_var input and to return in DataArray format. Parameters ---------- st : DataArray Stokes/anti-stokes data variable for which the variance is being parsed. - st_var : float, callable, array-like - If `float` the variance of the noise from the Stokes detector is - described with a single value. - If `callable` the variance of the noise from the Stokes detector is - a function of the intensity, as defined in the callable function. - Or when the variance is a function of the intensity (Poisson - distributed) define a DataArray of the shape as ds.st, where the - variance can be a function of time and/or x. - - Returns + If `float` the variance of the noise from the Stokes detector is described with a single value. + If `callable` the variance of the noise from the Stokes detector is a function of the intensity, as defined in the callable function. + Or when the variance is a function of the intensity (Poisson distributed) define a DataArray of the shape as ds.st, + where the variance can be a function of time and/or x. + + Returns: ------- st_var_sec : DataArray The variance of the noise from the Stokes detector. """ - if callable(st_var): - st_var_sec = st_var(st) - else: - st_var_sec = xr.ones_like(st) * st_var + st_var_sec = st_var(st) if callable(st_var) else xr.ones_like(st) * st_var + return st_var_sec assert np.all( np.isfinite(st_var_sec) @@ -57,7 +49,7 @@ def calibration_single_ended_helper( trans_att, solver, ): - """Only used in `calibration_single_ended()`""" + """Only used in `calibration_single_ended()`.""" nt = self.dts.nt nx = self.dts.nx nta = len(trans_att) @@ -203,9 +195,7 @@ def calibration_single_ended_solver( # noqa: MC0001 trans_att=[], verbose=False, ): - """ - The solver for single-ended setups. Assumes `ds` is pre-configured with - `sections` and `trans_att`. + """The solver for single-ended setups. Assumes `ds` is pre-configured with `sections` and `trans_att`. Parameters ---------- @@ -234,12 +224,16 @@ def calibration_single_ended_solver( # noqa: MC0001 matching_indices : array-like Is an array of size (np, 2), where np is the number of paired locations. This array is produced by `matching_sections()`. - verbose : bool - Returns + Returns: ------- - + p_cov : np.ndarray + The covariance matrix of the estimated parameters. + p_val : np.ndarray + The estimated parameters. + p_var : np.ndarray + The variance of the estimated parameters. """ # get ix_sec argsort so the sections are in order of increasing x ix_sec = ds.dts.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") @@ -1131,9 +1125,7 @@ def calibrate_double_ended_solver( # noqa: MC0001 nta=None, verbose=False, ): - """ - The solver for double-ended setups. Assumes `ds` is pre-configured with - `sections` and `trans_att`. + """The solver for double-ended setups. Assumes `ds` is pre-configured with `sections` and `trans_att`. The construction of X differs a bit from what is presented in the article. The choice to divert from the article is made because @@ -1145,6 +1137,12 @@ def calibrate_double_ended_solver( # noqa: MC0001 Parameters ---------- ds : DataStore + sections : Dict[str, List[slice]] + Sections are defined in a dictionary with its keywords of the + names of the reference + temperature time series. Its values are lists of slice objects, + where each slice object + is a stretch. st_var : float, array-like, optional If `float` the variance of the noise from the Stokes detector is described with a single value. Or when the @@ -1183,8 +1181,14 @@ def calibrate_double_ended_solver( # noqa: MC0001 locations. This array is produced by `matching_sections()`. verbose : bool - Returns + Returns: ------- + p_cov : ndarray + Covariance matrix of the estimated parameters. + p_val : ndarray + Estimated parameters. + p_var : ndarray + Variance of the estimated parameters. """ ix_sec = ds.dts.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") @@ -1573,6 +1577,32 @@ def calibrate_double_ended_solver( # noqa: MC0001 def matching_section_location_indices(ix_sec, hix, tix): + """Returns all indices of the entire fiber that either are used for calibrating to reference temperature or for matching sections. + + The indices are sorted. + + Parameters + ---------- + ix_sec : array_like + Indices in the section of interest. + + hix : array_like + Indices of the hot sections. + + tix : array_like + Indices of the cold sections. + + Returns: + ------- + ix_from_cal_match_to_glob : ndarray + Contains the global coordinate indices of the E. + """ + ix_cal_match = np.unique(np.concatenate((ix_sec, hix, tix))) + nx_cal_match = ix_cal_match.size + ix_sec2 = np.searchsorted(ix_cal_match, ix_sec) + ix_E0_mask = np.array([ix for ix in range(nx_cal_match) if ix != ix_sec2[0]]) + ix_from_cal_match_to_glob = ix_cal_match[ix_E0_mask] + return ix_from_cal_match_to_glob # contains all indices of the entire fiber that either are used for # calibrating to reference temperature or for matching sections. Is sorted. ix_cal_match = np.unique(np.concatenate((ix_sec, hix, tix))) @@ -1592,11 +1622,10 @@ def matching_section_location_indices(ix_sec, hix, tix): def construct_submatrices_matching_sections(x, ix_sec, hix, tix, nt, trans_att): - """ - For all matching indices, where subscript 1 refers to the indices in - `hix` and subscript 2 refers to the indices in `tix`. + """For all matching indices, where subscript 1 refers to the indices in `hix` and subscript 2 refers to the indices in `tix`. + F1 - F2 = E2 - E1 + TAF2 - TAF1 # EQ1 - B1 - B2 = E1 - E2 + TAB2 - TAB1 # EQ2 + B1 - B2 = E1 - E2 + TAB2 - TAB1 # EQ2. For matching indices (`hix` and `tix`) that are outside of the reference sections an additional equation is needed for `E` per time step. @@ -1628,8 +1657,33 @@ def construct_submatrices_matching_sections(x, ix_sec, hix, tix, nt, trans_att): tix : array-like of int nt : int - Returns + Returns: ------- + E_match_F : sparse matrix + E in EQ1 + E_match_B : sparse matrix + E in EQ2 + E_match_no_cal : sparse matrix + E in EQ3 + Z_TA_eq1 : sparse matrix + TAF in EQ1 + Z_TA_eq2 : sparse matrix + TAB in EQ2 + Z_TA_eq3 : sparse matrix + TAF and TAB in EQ3 + d_no_cal : sparse matrix + df and db in EQ3 + ix_from_cal_match_to_glob : ndarray + Contains the global coordinate indices of the E. + ix_match_not_cal : ndarray + Indices in the section of interest. Excluding E0 + Zero_eq12_gamma : sparse matrix + Zero in EQ1 and EQ2 + Zero_eq3_gamma : sparse matrix + Zero in EQ3 + Zero_d_eq12 : sparse matrix + Zero in EQ1 and EQ2 + """ # contains all indices of the entire fiber that either are using for @@ -1827,6 +1881,7 @@ def construct_submatrices_matching_sections(x, ix_sec, hix, tix, nt, trans_att): def construct_submatrices(sections, nt, nx, ds, trans_att, x_sec): """Wrapped in a function to reduce memory usage. + E is zero at the first index of the reference section (ds_sec) Constructing: Z_gamma (nt * nx, 1). Data: positive 1/temp @@ -1835,12 +1890,17 @@ def construct_submatrices(sections, nt, nx, ds, trans_att, x_sec): Zero_gamma (nt * nx, 1) zero_d (nt * nx, nt) Z_TA_fw (nt * nx, nta * 2 * nt) minus ones - Z_TA_bw (nt * nx, nta * 2 * nt) minus ones + Z_TA_bw (nt * nx, nta * 2 * nt) minus ones. I_fw = 1/Tref*gamma - D_fw - E - TA_fw I_bw = 1/Tref*gamma - D_bw + E - TA_bw - """ + Parameters + ---------- + sections : + Sections to use for calibration. + + """ # Z \gamma # Eq.47 cal_ref = np.array( ds.dts.ufunc_per_section( @@ -1937,13 +1997,14 @@ def wls_sparse( return_werr=False, **solver_kwargs, ): - """ + """Weighted least squares solver. + If some initial estimate x0 is known and if damp == 0, one could proceed as follows: - Compute a residual vector r0 = b - A*x0. - Use LSQR to solve the system A*dx = r0. - Add the correction dx to obtain a final solution x = x0 + dx. from: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse. - linalg.lsqr.html + linalg.lsqr.html. Parameters ---------- @@ -1954,9 +2015,14 @@ def wls_sparse( verbose kwargs - Returns + Returns: ------- - + p_sol : ndarray + The solution. + p_var : ndarray + The variance of the solution. + p_cov : ndarray + The covariance of the solution. """ # The var returned by ln.lsqr is normalized by the variance of the error. # To obtain the correct variance, it needs to be scaled by the variance @@ -1993,10 +2059,7 @@ def wls_sparse( w_std = np.broadcast_to(np.atleast_2d(np.squeeze(w_std)).T, (X.shape[0], 1)) - if not sp.issparse(X): - wX = w_std * X - else: - wX = X.multiply(w_std) + wX = w_std * X if not sp.issparse(X) else X.multiply(w_std) if x0 is None: # noinspection PyTypeChecker @@ -2051,7 +2114,7 @@ def wls_sparse( def wls_stats(X, y, w=1.0, calc_cov=False, x0=None, return_werr=False, verbose=False): - """ + """Weighted least squares solver using statsmodels. Parameters ---------- @@ -2059,11 +2122,18 @@ def wls_stats(X, y, w=1.0, calc_cov=False, x0=None, return_werr=False, verbose=F y w calc_cov + x0 + return_werr verbose - Returns + Returns: ------- - + p_sol : ndarray + The solution. + p_var : ndarray + The variance of the solution. + p_cov : ndarray + The covariance of the solution. """ y = np.asarray(y) w = np.asarray(w) @@ -2113,28 +2183,14 @@ def calc_alpha_double( talpha_fw_var=None, talpha_bw_var=None, ): - """Eq.50 if weighted least squares. Assumes ds has `trans_att` - pre-configured.""" - + """Eq.50 if weighted least squares. Assumes ds has `trans_att` pre-configured.""" assert ix_alpha_is_zero >= 0, "Define ix_alpha_is_zero" + str(ix_alpha_is_zero) if st_var is not None: - if callable(st_var): - st_var_val = st_var(ds.st) - else: - st_var_val = np.asarray(st_var) - if callable(ast_var): - ast_var_val = ast_var(ds.ast) - else: - ast_var_val = np.asarray(ast_var) - if callable(rst_var): - rst_var_val = rst_var(ds.rst) - else: - rst_var_val = np.asarray(rst_var) - if callable(rast_var): - rast_var_val = rast_var(ds.rast) - else: - rast_var_val = np.asarray(rast_var) + st_var_val = st_var(ds.st) if callable(st_var) else np.asarray(st_var) + ast_var_val = ast_var(ds.ast) if callable(ast_var) else np.asarray(ast_var) + rst_var_val = rst_var(ds.rst) if callable(rst_var) else np.asarray(rst_var) + rast_var_val = rast_var(ds.rast) if callable(rast_var) else np.asarray(rast_var) i_var_fw = ds.st**-2 * st_var_val + ds.ast**-2 * ast_var_val i_var_bw = ds.rst**-2 * rst_var_val + ds.rast**-2 * rast_var_val @@ -2216,6 +2272,40 @@ def calc_alpha_double( def calc_df_db_double_est(ds, sections, ix_alpha_is_zero, gamma_est): + """Calculate forward and backward double-ended calibration coefficients. + + Parameters + ---------- + ds : xarray.Dataset + The dataset containing the temperature data. + sections : List[Tuple[slice, slice, bool]] + The sections to use for calibration. + ix_alpha_is_zero : int + The index of the reference section. + gamma_est : float + The estimated gamma value. + + Returns: + ------- + Tuple[xarray.DataArray, xarray.DataArray] + The forward and backward double-ended calibration coefficients. + """ + Ifwx0 = np.log( + ds.st.isel(x=ix_alpha_is_zero) / ds.ast.isel(x=ix_alpha_is_zero) + ).values + Ibwx0 = np.log( + ds.rst.isel(x=ix_alpha_is_zero) / ds.rast.isel(x=ix_alpha_is_zero) + ).values + ref_temps_refs = ds.dts.ufunc_per_section( + sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" + ) + ix_sec = ds.dts.ufunc_per_section(sections=sections, x_indices=True, calc_per="all") + ref_temps_x0 = ( + ref_temps_refs[ix_sec == ix_alpha_is_zero].flatten().compute() + 273.15 + ) + df_est = gamma_est / ref_temps_x0 - Ifwx0 + db_est = gamma_est / ref_temps_x0 - Ibwx0 + return df_est, db_est Ifwx0 = np.log( ds.st.isel(x=ix_alpha_is_zero) / ds.ast.isel(x=ix_alpha_is_zero) ).values @@ -2235,8 +2325,8 @@ def calc_df_db_double_est(ds, sections, ix_alpha_is_zero, gamma_est): def match_sections(ds, matching_sections): - """ - Matches location indices of two sections. + """Matches location indices of two sections. + Parameters ---------- ds @@ -2246,13 +2336,13 @@ def match_sections(ds, matching_sections): that are matched. The third item is a boolean and is True if the two sections have a reverse direction ("J-configuration"), most common. - Returns + Returns: ------- matching_indices : array-like Is an array of size (np, 2), where np is the number of paired locations. The array contains indices to locations along the fiber. """ - for hslice, tslice, reverse_flag in matching_sections: + for hslice, tslice, _ in matching_sections: hxs = ds.x.sel(x=hslice).size txs = ds.x.sel(x=tslice).size diff --git a/src/dtscalibration/calibration/section_utils.py b/src/dtscalibration/calibration/section_utils.py index df9edcb7..97af8b73 100644 --- a/src/dtscalibration/calibration/section_utils.py +++ b/src/dtscalibration/calibration/section_utils.py @@ -2,8 +2,6 @@ import xarray as xr import yaml -from dtscalibration.datastore_utils import ufunc_per_section_helper - def set_sections(ds: xr.Dataset, sections: dict[str, list[slice]]): ds.attrs["_sections"] = yaml.dump(sections) @@ -14,8 +12,7 @@ def set_matching_sections(ds: xr.Dataset, matching_sections: dict[str, list[slic def validate_no_overlapping_sections(sections: dict[str, list[slice]]): - """ - Check if the sections do not overlap. + """Check if the sections do not overlap. Parameters ---------- @@ -23,11 +20,11 @@ def validate_no_overlapping_sections(sections: dict[str, list[slice]]): The keys of the dictionary are the names of the sections. The values are lists of slice objects. - Returns + Returns: ------- None - Raises + Raises: ------ AssertionError If the sections overlap. @@ -50,11 +47,10 @@ def validate_no_overlapping_sections(sections: dict[str, list[slice]]): def validate_sections_definition(sections: dict[str, list[slice]]): - """ - Check if the sections are defined correctly. The sections are defined + """Check if the sections are defined correctly. The sections are defined correctly if: - The keys of the sections-dictionary are strings (assertion) - - The values of the sections-dictionary are lists (assertion) + - The values of the sections-dictionary are lists (assertion). Parameters ---------- @@ -62,11 +58,11 @@ def validate_sections_definition(sections: dict[str, list[slice]]): The keys of the dictionary are the names of the sections. The values are lists of slice objects. - Returns + Returns: ------- None - Raises + Raises: ------ AssertionError If the sections are not defined correctly. @@ -84,14 +80,13 @@ def validate_sections_definition(sections: dict[str, list[slice]]): def validate_sections(ds: xr.Dataset, sections: dict[str, list[slice]]): - """ - Check if the sections are valid. The sections are valid if: + """Check if the sections are valid. The sections are valid if: - The keys of the sections-dictionary refer to a valid timeserie already stored in ds.data_vars (assertion) - The values of the sections-dictionary are lists of slice objects. (assertion) - The slices are within the x-dimension (assertion) - - The slices do not overlap (assertion) + - The slices do not overlap (assertion). Parameters ---------- @@ -102,11 +97,11 @@ def validate_sections(ds: xr.Dataset, sections: dict[str, list[slice]]): The keys of the dictionary are the names of the sections. The values are lists of slice objects. - Returns + Returns: ------- None - Raises + Raises: ------ AssertionError If the sections are not valid. @@ -143,8 +138,7 @@ def ufunc_per_section( calc_per="stretch", **func_kwargs, ): - """ - User function applied to parts of the cable. Super useful, + """User function applied to parts of the cable. Super useful, many options and slightly complicated. @@ -183,12 +177,11 @@ def ufunc_per_section( to a list and concatenating after. - Returns + Returns: ------- - Examples + Examples: -------- - 1. Calculate the variance of the residuals in the along ALL the\ reference sections wrt the temperature of the water baths @@ -249,15 +242,14 @@ def ufunc_per_section( >>> ix_loc = d.ufunc_per_section(sections, x_indices=True) - Note + Note: ---- If `self[label]` or `self[subtract_from_label]` is a Dask array, a Dask array is returned else a numpy array is returned """ - if label is None: - dataarray = None - else: - dataarray = ds[label] + from dtscalibration.datastore_utils import ufunc_per_section_helper + + dataarray = None if label is None else ds[label] if x_indices: x_coords = ds.x diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 3fe3555a..7c0a74fa 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -14,8 +14,7 @@ def check_dims( labels: Union[list[str], tuple[str]], correct_dims: Optional[tuple[str]] = None, ) -> None: - """ - Compare the dimensions of different labels. For example: ['st', 'rst']. + """Compare the dimensions of different labels. For example: ['st', 'rst']. If a calculation is performed and the dimensions do not agree, the answers do not make sense and the matrices are broadcasted and the memory usage will explode. If no correct dims provided the dimensions of the different are compared. @@ -29,7 +28,7 @@ def check_dims( correct_dims : The correct dimensions. - Returns + Returns: ------- """ @@ -52,8 +51,7 @@ def check_dims( class ParameterIndexDoubleEnded: - """ - npar = 1 + 2 * nt + nx + 2 * nt * nta + """npar = 1 + 2 * nt + nx + 2 * nt * nta assert pval.size == npar assert p_var.size == npar if calc_cov: @@ -70,7 +68,7 @@ class ParameterIndexDoubleEnded: if nta > 0: ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) - self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) + self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]). """ def __init__(self, nt, nx, nta, fix_gamma=False, fix_alpha=False): @@ -144,19 +142,17 @@ def ta(self): @property def taf(self): - """ - Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. + """Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) + self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]). """ return self.ta[:, 0, :].flatten(order="C") @property def tab(self): - """ - Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. + """Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) + self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]). """ return self.ta[:, 1, :].flatten(order="C") @@ -174,15 +170,15 @@ def get_ta_pars(self, pval): return np.zeros(shape=(self.nt, 2, 0)) def get_taf_pars(self, pval): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + """Returns taf parameters of shape (nt, nta) or (nt, nta, a).""" return self.get_ta_pars(pval=pval)[:, 0, :] def get_tab_pars(self, pval): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + """Returns taf parameters of shape (nt, nta) or (nt, nta, a).""" return self.get_ta_pars(pval=pval)[:, 1, :] def get_taf_values(self, pval, x, trans_att, axis=""): - """returns taf parameters of shape (nx, nt)""" + """Returns taf parameters of shape (nx, nt).""" pval = np.atleast_2d(pval) assert pval.ndim == 2 and pval.shape[1] == self.npar @@ -222,7 +218,7 @@ def get_taf_values(self, pval, x, trans_att, axis=""): return arr_out def get_tab_values(self, pval, x, trans_att, axis=""): - """returns tab parameters of shape (nx, nt)""" + """Returns tab parameters of shape (nx, nt).""" assert pval.shape[-1] == self.npar arr_out = np.zeros((self.nx, self.nt)) @@ -261,9 +257,8 @@ def get_tab_values(self, pval, x, trans_att, axis=""): class ParameterIndexSingleEnded: - """ - if parameter fixed, they are not in - npar = 1 + 1 + nt + nta * nt + """if parameter fixed, they are not in + npar = 1 + 1 + nt + nta * nt. """ def __init__(self, nt, nx, nta, includes_alpha=False, includes_dalpha=True): @@ -320,7 +315,7 @@ def c(self): @property def taf(self): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + """Returns taf parameters of shape (nt, nta) or (nt, nta, a).""" # ta = p_val[-nt * nta:].reshape((nt, nta), order='F') # self["talpha"] = (('time', 'trans_att'), ta[:, :]) if self.includes_alpha: @@ -352,7 +347,7 @@ def get_taf_pars(self, pval): return np.zeros(shape=(self.nt, 0)) def get_taf_values(self, pval, x, trans_att, axis=""): - """returns taf parameters of shape (nx, nt)""" + """Returns taf parameters of shape (nx, nt).""" # assert pval.ndim == 2 and pval.shape[1] == self.npar arr_out = np.zeros((self.nx, self.nt)) @@ -407,7 +402,7 @@ def get_netcdf_encoding( complevel ds : DataStore - Returns + Returns: ------- encoding: Encoding dictionary. @@ -691,8 +686,7 @@ def merge_double_ended( plot_result: bool = True, verbose: bool = True, ) -> xr.Dataset: - """ - Some measurements are not set up on the DTS-device as double-ended + """Some measurements are not set up on the DTS-device as double-ended meausurements. This means that the two channels have to be merged manually. This function can merge two single-ended DataStore objects into a single @@ -712,7 +706,7 @@ def merge_double_ended( Plot the aligned Stokes of the forward and backward channels verbose : bool - Returns + Returns: ------- ds : DataStore object With the two channels merged @@ -798,7 +792,7 @@ def merge_double_ended_times( verbose : bool Print additional information to screen - Returns + Returns: ------- ds_fw_sel : DataSore object DataStore object representing the forward measurement channel with @@ -896,8 +890,7 @@ def merge_double_ended_times( def shift_double_ended( ds: xr.Dataset, i_shift: int, verbose: bool = True ) -> xr.Dataset: - """ - The cable length was initially configured during the DTS measurement. For double ended + """The cable length was initially configured during the DTS measurement. For double ended measurements it is important to enter the correct length so that the forward channel and the backward channel are aligned. @@ -923,7 +916,7 @@ def shift_double_ended( If True, the function will inform the user which variables are dropped. If False, the function will silently drop the variables. - Returns + Returns: ------- ds2 : DataStore object With a shifted x-axis @@ -1013,7 +1006,7 @@ def suggest_cable_shift_double_ended( plot_result : bool Plot the summed error as a function of the shift. - Returns + Returns: ------- ishift1: int Suggested shift based on Err1 @@ -1121,8 +1114,7 @@ def ufunc_per_section_helper( calc_per="stretch", **func_kwargs, ): - """ - User function applied to parts of the cable. Super useful, + """User function applied to parts of the cable. Super useful, many options and slightly complicated. @@ -1166,12 +1158,11 @@ def ufunc_per_section_helper( func_kwargs : dict Dictionary with options that are passed to func - Returns + Returns: ------- - Examples + Examples: -------- - 1. Calculate the variance of the residuals in the along ALL the\ reference sections wrt the temperature of the water baths @@ -1232,7 +1223,7 @@ def ufunc_per_section_helper( >>> ix_loc = ufunc_per_section_helper(x_coords=d.x) - Note + Note: ---- If `dataarray` or `subtract_from_dataarray` is a Dask array, a Dask array is returned else a numpy array is returned @@ -1240,13 +1231,11 @@ def ufunc_per_section_helper( if not func: def func(a): - """ - - Parameters + """Parameters ---------- a - Returns + Returns: ------- """ @@ -1255,13 +1244,11 @@ def func(a): elif isinstance(func, str) and func == "var": def func(a): - """ - - Parameters + """Parameters ---------- a - Returns + Returns: ------- """ diff --git a/src/dtscalibration/dts_accessor.py b/src/dtscalibration/dts_accessor.py index b841b4c3..753e0a35 100644 --- a/src/dtscalibration/dts_accessor.py +++ b/src/dtscalibration/dts_accessor.py @@ -1,3 +1,5 @@ +"""This module provides an xarray accessor for DTS calibration.""" + import dask.array as da import numpy as np import scipy.stats as sst @@ -22,7 +24,16 @@ @xr.register_dataset_accessor("dts") class DtsAccessor: + """An xarray accessor for DTS calibration.""" + def __init__(self, xarray_obj): + """Initialize the DtsAccessor with an xarray object. + + Parameters + ---------- + xarray_obj : xarray.Dataset + The xarray object to be used for DTS calibration. + """ # cache xarray_obj self._obj = xarray_obj self.attrs = xarray_obj.attrs @@ -43,6 +54,7 @@ def __init__(self, xarray_obj): self.acquisitiontime_bw = xarray_obj.get("userAcquisitionTimeBW") def __repr__(self): + """Return a string representation of the DtsAccessor object.""" # __repr__ from xarray is used and edited. # 'xarray' is prepended. so we remove it and add 'dtscalibration' s = xr.core.formatting.dataset_repr(self._obj) @@ -93,26 +105,24 @@ def __repr__(self): # noinspection PyIncorrectDocstring @property def sections(self): - """ - Define calibration sections. Each section requires a reference - temperature time series, such as the temperature measured by an - external temperature sensor. They should already be part of the - DataStore object. + """Define calibration sections. - Please look at the example notebook on `sections` if you encounter - difficulties. + Each section requires a reference temperature time series, such as the temperature measured by an + external temperature sensor. They should already be part of the DataStore object. + + Please look at the example notebook on `sections` if you encounter difficulties. Parameters ---------- sections : Dict[str, List[slice]] - Sections are defined in a dictionary with its keywords of the - names of the reference - temperature time series. Its values are lists of slice objects, - where each slice object + Sections are defined in a dictionary with its keywords of the names of the reference + temperature time series. Its values are lists of slice objects, where each slice object is a stretch. - Returns - ------- + Returns: + ------- + dict + A dictionary with the names of the reference temperature time series as keys and lists of slice objects as values. """ if "_sections" not in self._obj.attrs: self._obj.attrs["_sections"] = yaml.dump(None) @@ -134,25 +144,24 @@ def sections(self, value): # noinspection PyIncorrectDocstring @property def matching_sections(self): - """ - Define calibration sections. Each matching_section requires a reference - temperature time series, such as the temperature measured by an - external temperature sensor. They should already be part of the - DataStore object. + """Define calibration sections. + + Each matching_section requires a reference temperature time series, such as the temperature measured by an + external temperature sensor. They should already be part of the DataStore object. - Please look at the example notebook on `matching_sections` if you encounter - difficulties. + Please look at the example notebook on `matching_sections` if you encounter difficulties. Parameters ---------- matching_sections : List[Tuple[slice, slice, bool]], optional - Provide a list of tuples. A tuple per matching section. Each tuple - has three items. The first two items are the slices of the sections - that are matched. The third item is a boolean and is True if the two - sections have a reverse direction ("J-configuration"). - Returns - ------- + Provide a list of tuples. A tuple per matching section. Each tuple has three items. The first two items are + the slices of the sections that are matched. The third item is a boolean and is True if the two sections have + a reverse direction ("J-configuration"). + Returns: + ------- + dict + A dictionary with the names of the reference temperature time series as keys and lists of slice objects as values. """ if "_matching_sections" not in self._obj.attrs: self._obj.attrs["_matching_sections"] = yaml.dump(None) @@ -174,12 +183,12 @@ def matching_sections(self, value): raise NotImplementedError(msg) def get_default_encoding(self, time_chunks_from_key=None): - """ - Returns a dictionary with sensible compression setting for writing - netCDF files. + """Returns a dictionary with sensible compression setting for writing netCDF files. - Returns + Returns: ------- + dict + A dictionary with sensible compression setting for writing netCDF files. """ # The following variables are stored with a sufficiently large @@ -272,9 +281,7 @@ def get_default_encoding(self, time_chunks_from_key=None): return encoding def get_timeseries_keys(self): - """ - Returns a list of the keys of the time series variables. - """ + """Returns a list of the keys of the time series variables.""" return [k for k, v in self._obj.data_vars.items() if v.dims == ("time",)] def ufunc_per_section( @@ -290,17 +297,16 @@ def ufunc_per_section( suppress_section_validation=False, **func_kwargs, ): - """ - User function applied to parts of the cable. Super useful, - many options and slightly - complicated. + """Functions applied to parts of the cable. + + Super useful,many options and slightlycomplicated. The function `func` is taken over all the timesteps and calculated - per `calc_per`. This - is returned as a dictionary + per `calc_per`. This is returned as a dictionary Parameters ---------- + sections : Dict[str, List[slice]], optional If `None` is supplied, `ds.sections` is used. Define calibration sections. Each section requires a reference temperature time series, @@ -330,12 +336,14 @@ def ufunc_per_section( to a list and concatenating after. - Returns + Returns: ------- + dict + A dictionary with the keys of the sections and the values are the + result of the function applied to the data in the section. - Examples + Examples: -------- - 1. Calculate the variance of the residuals in the along ALL the\ reference sections wrt the temperature of the water baths @@ -401,7 +409,7 @@ def ufunc_per_section( >>> ix_loc = d.ufunc_per_section(sections=sections, x_indices=True) - Note + Notes: ---- If `self[label]` or `self[subtract_from_label]` is a Dask array, a Dask array is returned else a numpy array is returned @@ -416,10 +424,7 @@ def ufunc_per_section( k in self._obj ), f"{k} is not in the Dataset but is in `sections` and is required to compute temp_err" - if label is None: - dataarray = None - else: - dataarray = self._obj[label] + dataarray = None if label is None else self._obj[label] if x_indices: x_coords = self.x @@ -459,10 +464,13 @@ def calibrate_single_ended( fix_dalpha=None, fix_alpha=None, ): - r""" + r"""Calibrate the measured data of a single ended setup to temperature. + Calibrate the Stokes (`ds.st`) and anti-Stokes (`ds.ast`) data to temperature using fiber sections with a known temperature - (`ds.sections`) for single-ended setups. The calibrated temperature is + (`ds.sections`) for single-ended setups. + + The calibrated temperature is stored under `ds.tmpf` and its variance under `ds.tmpf_var`. In single-ended setups, Stokes and anti-Stokes intensity is measured @@ -472,31 +480,31 @@ def calibrate_single_ended( .. math:: - \int_0^x{\Delta\\alpha(x')\,\mathrm{d}x'} \\approx \Delta\\alpha x + \int_0^x{\Delta\alpha(x')\,\mathrm{d}x'} \approx \Delta \alpha x The temperature can now be written from Equation 10 [1]_ as: .. math:: - T(x,t) \\approx \\frac{\gamma}{I(x,t) + C(t) + \Delta\\alpha x} + T(x,t) \approx \frac{\gamma}{I(x,t) + C(t) + \Delta\alpha x} where .. math:: - I(x,t) = \ln{\left(\\frac{P_+(x,t)}{P_-(x,t)}\\right)} + I(x,t) = \ln{\left(\frac{P_+(x,t)}{P_-(x,t)}\right)} .. math:: - C(t) = \ln{\left(\\frac{\eta_-(t)K_-/\lambda_-^4}{\eta_+(t)K_+/\lambda_+^4}\\right)} + C(t) = \ln{\left(\frac{\eta_-(t)K_-/\lambda_-^4}{\eta_+(t)K_+/\lambda_+^4}\right)} where :math:`C` is the lumped effect of the difference in gain at :math:`x=0` between Stokes and anti-Stokes intensity measurements and the dependence of the scattering intensity on the wavelength. The parameters :math:`P_+` and :math:`P_-` are the Stokes and anti-Stokes intensity measurements, respectively. - The parameters :math:`\gamma`, :math:`C(t)`, and :math:`\Delta\\alpha` + The parameters :math:`\gamma`, :math:`C(t)`, and :math:`\Delta\alpha` must be estimated from calibration to reference sections, as discussed in Section 5 [1]_. The parameter :math:`C` must be estimated for each time and is constant along the fiber. :math:`T` in the listed @@ -504,15 +512,16 @@ def calibrate_single_ended( Parameters ---------- + p_val : array-like, optional Define `p_val`, `p_var`, `p_cov` if you used an external function for calibration. Has size 2 + `nt`. First value is :math:`\gamma`, - second is :math:`\Delta \\alpha`, others are :math:`C` for each + second is :math:`\Delta \alpha`, others are :math:`C` for each timestep. p_var : array-like, optional Define `p_val`, `p_var`, `p_cov` if you used an external function for calibration. Has size 2 + `nt`. First value is :math:`\gamma`, - second is :math:`\Delta \\alpha`, others are :math:`C` for each + second is :math:`\Delta \alpha`, others are :math:`C` for each timestep. p_cov : array-like, optional The covariances of `p_val`. @@ -565,7 +574,7 @@ def calibrate_single_ended( for. fix_dalpha : Tuple[float, float], optional A tuple containing two floats. The first float is the value of - dalpha (:math:`\Delta \\alpha` in [1]_), and the second item is the + dalpha (:math:`\Delta \alpha` in [1]_), and the second item is the variance of the estimate of dalpha. Covariances between alpha and other parameters are not accounted for. @@ -573,17 +582,22 @@ def calibrate_single_ended( A tuple containing two array-likes. The first array-like is the integrated differential attenuation of length x, and the second item is its variance. - Returns + Returns: ------- + out : xarray.Dataset + A Dataset with the calibrated temperature under `tmpf` and its + variance under `tmpf_var`. The Dataset also contains the + calibration parameters under `gamma`, `dalpha`, and `c`. The + covariance matrix of the calibration parameters is stored. - References + References: ---------- .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation of Temperature and Associated Uncertainty from Fiber-Optic Raman- Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. https://doi.org/10.3390/s20082235 - Examples + Examples: -------- - `Example notebook 7: Calibrate single ended `_ + - `Example notebook 8: Calibrate double ended ` """ # out contains the state out = xr.Dataset( @@ -1247,17 +1289,14 @@ def monte_carlo_single_ended( reduce_memory_usage=False, mc_remove_set_flag=True, ): - """The result object is what comes out of the single_ended_calibration routine) + """The result object is what comes out of the single_ended_calibration routine). TODO: Use get_params_from_pval_single_ended() to extract parameter sets from mc """ assert self.st.dims[0] == "x", "Stokes are transposed" assert self.ast.dims[0] == "x", "Stokes are transposed" - if da_random_state: - state = da_random_state - else: - state = da.random.RandomState() + state = da_random_state if da_random_state else da.random.RandomState() # out contains the state out = xr.Dataset( @@ -1321,7 +1360,7 @@ def monte_carlo_single_ended( ["r_st", "r_ast"], [self.st, self.ast], [st_var, ast_var] ): # Load the mean as chunked Dask array, otherwise eats memory - if type(sti.data) == da.core.Array: + if isinstance(sti.data, da.core.Array): loc = da.asarray(sti.data, chunks=memchunk[1:]) else: loc = da.from_array(sti.data, chunks=memchunk[1:]) @@ -1329,24 +1368,30 @@ def monte_carlo_single_ended( # Make sure variance is of size (no, nt) if np.size(st_vari) > 1: if st_vari.shape == sti.shape: - pass + st_vari_broadcasted = st_vari else: - st_vari = np.broadcast_to(st_vari, (no, nt)) + st_vari_broadcasted = np.broadcast_to(st_vari, (no, nt)) else: - pass + st_vari_broadcasted = st_vari # Load variance as chunked Dask array, otherwise eats memory - if type(st_vari) == da.core.Array: - st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) + if isinstance(st_vari_broadcasted, da.core.Array): + st_vari_da = da.asarray(st_vari_broadcasted, chunks=memchunk[1:]) - elif callable(st_vari) and type(sti.data) == da.core.Array: - st_vari_da = da.asarray(st_vari(sti).data, chunks=memchunk[1:]) + elif callable(st_vari_broadcasted) and isinstance(sti.data, da.core.Array): + st_vari_da = da.asarray( + st_vari_broadcasted(sti).data, chunks=memchunk[1:] + ) - elif callable(st_vari) and type(sti.data) != da.core.Array: - st_vari_da = da.from_array(st_vari(sti).data, chunks=memchunk[1:]) + elif callable(st_vari_broadcasted) and not isinstance( + sti.data, da.core.Array + ): + st_vari_da = da.from_array( + st_vari_broadcasted(sti).data, chunks=memchunk[1:] + ) else: - st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) + st_vari_da = da.from_array(st_vari_broadcasted, chunks=memchunk[1:]) params[key_mc] = ( ("mc", "x", "time"), @@ -1435,9 +1480,8 @@ def monte_carlo_double_ended( mc_remove_set_flag=True, reduce_memory_usage=False, ): - r""" - Estimation of the confidence intervals for the temperatures measured - with a double-ended setup. + r"""Estimation of the confidence intervals for the temperatures measured with a double-ended setup. + Double-ended setups require four additional steps to estimate the confidence intervals for the temperature. First, the variances of the Stokes and anti-Stokes intensity measurements of the forward and @@ -1451,7 +1495,7 @@ def monte_carlo_double_ended( Normal distributions are assigned for :math:`A` (`ds.alpha`) for each location outside of the reference sections. These distributions are centered - around :math:`A_p` and have variance :math:`\sigma^2\left[A_p\\right]` + around :math:`A_p` and have variance :math:`\sigma^2\left[A_p\right]` given by Equations 44 and 45. Fourth, the distributions are sampled and :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` are computed with Equations 16 and 17, respectively. Fifth, step four is repeated to @@ -1459,8 +1503,8 @@ def monte_carlo_double_ended( :math:`T_{\mathrm{B},m,n}` to approximate their probability density functions. Sixth, the standard uncertainties of :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` - (:math:`\sigma\left[T_{\mathrm{F},m,n}\\right]` and - :math:`\sigma\left[T_{\mathrm{B},m,n}\\right]`) are estimated with the + (:math:`\sigma\left[T_{\mathrm{F},m,n}\right]` and + :math:`\sigma\left[T_{\mathrm{B},m,n}\right]`) are estimated with the standard deviation of their realizations. Seventh, for each realization :math:`i` the temperature :math:`T_{m,n,i}` is computed as the weighted average of :math:`T_{\mathrm{F},m,n,i}` and @@ -1469,18 +1513,18 @@ def monte_carlo_double_ended( .. math:: T_{m,n,i} =\ - \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n,i}}{\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right]} +\ - \\frac{T_{\mathrm{B},m,n,i}}{\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) + \sigma^2\left[T_{m,n}\right]\left({\frac{T_{\mathrm{F},m,n,i}}{\ + \sigma^2\left[T_{\mathrm{F},m,n}\right]} +\ + \frac{T_{\mathrm{B},m,n,i}}{\ + \sigma^2\left[T_{\mathrm{B},m,n}\right]}}\right) where .. math:: - \sigma^2\left[T_{m,n}\\right] = \\frac{1}{1 /\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right] + 1 /\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]} + \sigma^2\left[T_{m,n}\right] = \frac{1}{1 /\ + \sigma^2\left[T_{\mathrm{F},m,n}\right] + 1 /\ + \sigma^2\left[T_{\mathrm{B},m,n}\right]} The best estimate of the temperature :math:`T_{m,n}` is computed directly from the best estimates of :math:`T_{\mathrm{F},m,n}` and @@ -1488,9 +1532,9 @@ def monte_carlo_double_ended( .. math:: T_{m,n} =\ - \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n}}{\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right]} + \\frac{T_{\mathrm{B},m,n}}{\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) + \sigma^2\left[T_{m,n}\right]\left({\frac{T_{\mathrm{F},m,n}}{\ + \sigma^2\left[T_{\mathrm{F},m,n}\right]} + \frac{T_{\mathrm{B},m,n}}{\ + \sigma^2\left[T_{\mathrm{B},m,n}\right]}}\right) Alternatively, the best estimate of :math:`T_{m,n}` can be approximated with the mean of the :math:`T_{m,n,i}` values. Finally, the 95\% @@ -1545,10 +1589,11 @@ def monte_carlo_double_ended( reduce_memory_usage : bool Use less memory but at the expense of longer computation time - Returns + Returns: ------- + dict - References + References: ---------- .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation of Temperature and Associated Uncertainty from Fiber-Optic Raman- @@ -1559,8 +1604,7 @@ def monte_carlo_double_ended( """ def create_da_ta2(no, i_splice, direction="fw", chunks=None): - """create mask array mc, o, nt""" - + """Create mask array mc, o, nt.""" if direction == "fw": arr = da.concatenate( ( @@ -1749,7 +1793,7 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): [st_var, ast_var, rst_var, rast_var], ): # Load the mean as chunked Dask array, otherwise eats memory - if type(sti.data) == da.core.Array: + if isinstance(sti.data, da.core.Array): loc = da.asarray(sti.data, chunks=memchunk[1:]) else: loc = da.from_array(sti.data, chunks=memchunk[1:]) @@ -1757,24 +1801,30 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): # Make sure variance is of size (no, nt) if np.size(st_vari) > 1: if st_vari.shape == sti.shape: - pass + st_vari_broadcasted = st_vari else: - st_vari = np.broadcast_to(st_vari, (no, nt)) + st_vari_broadcasted = np.broadcast_to(st_vari, (no, nt)) else: - pass + st_vari_broadcasted = st_vari # Load variance as chunked Dask array, otherwise eats memory - if type(st_vari) == da.core.Array: - st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) + if isinstance(st_vari_broadcasted, da.core.Array): + st_vari_da = da.asarray(st_vari_broadcasted, chunks=memchunk[1:]) - elif callable(st_vari) and type(sti.data) == da.core.Array: - st_vari_da = da.asarray(st_vari(sti).data, chunks=memchunk[1:]) + elif callable(st_vari_broadcasted) and isinstance(sti.data, da.core.Array): + st_vari_da = da.asarray( + st_vari_broadcasted(sti).data, chunks=memchunk[1:] + ) - elif callable(st_vari) and type(sti.data) != da.core.Array: - st_vari_da = da.from_array(st_vari(sti).data, chunks=memchunk[1:]) + elif callable(st_vari_broadcasted) and not isinstance( + sti.data, da.core.Array + ): + st_vari_da = da.from_array( + st_vari_broadcasted(sti).data, chunks=memchunk[1:] + ) else: - st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) + st_vari_da = da.from_array(st_vari_broadcasted, chunks=memchunk[1:]) params[k] = ( ("mc", "x", "time"), @@ -1837,8 +1887,8 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): xi = self.ufunc_per_section( sections=sections, x_indices=True, calc_per="all" ) - x_mask_ = [True if ix in xi else False for ix in range(params.x.size)] - x_mask = np.reshape(x_mask_, (1, -1, 1)) + x_mask = [ix in xi for ix in range(params.x.size)] + x_mask = np.reshape(x_mask, (1, -1, 1)) params[label + "_mc_set"] = params[label + "_mc_set"].where(x_mask) # subtract the mean temperature @@ -1937,8 +1987,7 @@ def average_monte_carlo_single_ended( mc_remove_set_flag=True, reduce_memory_usage=False, ): - """ - Average temperatures from single-ended setups. + """Average temperatures from single-ended setups. Four types of averaging are implemented. Please see Example Notebook 16. @@ -2041,8 +2090,9 @@ def average_monte_carlo_single_ended( reduce_memory_usage : bool Use less memory but at the expense of longer computation time - Returns + Returns: ------- + dict """ # out contains the state @@ -2280,8 +2330,7 @@ def average_monte_carlo_double_ended( reduce_memory_usage=False, **kwargs, ): - """ - Average temperatures from double-ended setups. + """Average temperatures from double-ended setups. Four types of averaging are implemented. Please see Example Notebook 16. @@ -2383,40 +2432,11 @@ def average_monte_carlo_double_ended( reduce_memory_usage : bool Use less memory but at the expense of longer computation time - Returns + Returns: ------- + dict """ - - # def create_da_ta2(no, i_splice, direction="fw", chunks=None): - # """create mask array mc, o, nt""" - - # if direction == "fw": - # arr = da.concatenate( - # ( - # da.zeros((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), - # da.ones( - # (1, no - i_splice, 1), - # chunks=(1, no - i_splice, 1), - # dtype=bool, - # ), - # ), - # axis=1, - # ).rechunk((1, chunks[1], 1)) - # else: - # arr = da.concatenate( - # ( - # da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), - # da.zeros( - # (1, no - i_splice, 1), - # chunks=(1, no - i_splice, 1), - # dtype=bool, - # ), - # ), - # axis=1, - # ).rechunk((1, chunks[1], 1)) - # return arr - out = xr.Dataset( coords={"x": self.x, "time": self.time, "trans_att": result["trans_att"]} ).copy() diff --git a/src/dtscalibration/io/apsensing.py b/src/dtscalibration/io/apsensing.py index f2164d9f..ff4e4d0e 100644 --- a/src/dtscalibration/io/apsensing.py +++ b/src/dtscalibration/io/apsensing.py @@ -56,11 +56,11 @@ def read_apsensing_files( kwargs : dict-like, optional keyword-arguments are passed to DataStore initialization - Notes + Notes: ----- Only XML files are supported for now - Returns + Returns: ------- datastore : DataStore The newly created datastore. @@ -114,11 +114,10 @@ def apsensing_xml_version_check(filepathlist): ---------- filepathlist - Returns + Returns: ------- """ - sep = ":" attrs, _ = read_apsensing_attrs_singlefile(filepathlist[0], sep) @@ -132,8 +131,7 @@ def read_apsensing_files_routine( silent=False, load_in_memory="auto", ): - """ - Internal routine that reads AP Sensing files. + """Internal routine that reads AP Sensing files. Use dtscalibration.read_apsensing_files function instead. The AP sensing files are not timezone aware @@ -146,7 +144,7 @@ def read_apsensing_files_routine( silent load_in_memory - Returns + Returns: ------- """ @@ -224,17 +222,14 @@ def read_apsensing_files_routine( @dask.delayed def grab_data_per_file(file_handle): - """ - - Parameters + """Parameters ---------- file_handle - Returns + Returns: ------- """ - with open_file(file_handle, mode="r") as f_h: if skip_chars: f_h.read(3) @@ -262,11 +257,7 @@ def grab_data_per_file(file_handle): # Check whether to compute data_arr (if possible 25% faster) data_arr_cnk = data_arr.rechunk({0: -1, 1: -1, 2: "auto"}) - if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - elif load_in_memory: + if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5 or load_in_memory: if not silent: print("Reading the data from disk") data_arr = data_arr_cnk.compute() @@ -296,13 +287,11 @@ def grab_data_per_file(file_handle): @dask.delayed def grab_timeseries_per_file(file_handle): - """ - - Parameters + """Parameters ---------- file_handle - Returns + Returns: ------- """ @@ -352,14 +341,12 @@ def grab_timeseries_per_file(file_handle): def read_apsensing_attrs_singlefile(filename, sep): - """ - - Parameters + """Parameters ---------- filename sep - Returns + Returns: ------- """ @@ -368,8 +355,7 @@ def read_apsensing_attrs_singlefile(filename, sep): import xmltodict def metakey(meta, dict_to_parse, prefix): - """ - Fills the metadata dictionairy with data from dict_to_parse. + """Fills the metadata dictionairy with data from dict_to_parse. The dict_to_parse is the raw data from a silixa xml-file. dict_to_parse is a nested dictionary to represent the different levels of hierarchy. For example, @@ -384,11 +370,10 @@ def metakey(meta, dict_to_parse, prefix): dict_to_parse : dict prefix - Returns + Returns: ------- """ - for key in dict_to_parse: if prefix == "": prefix_parse = key.replace("@", "") diff --git a/src/dtscalibration/io/sensornet.py b/src/dtscalibration/io/sensornet.py index ccac51c9..2664b6c9 100644 --- a/src/dtscalibration/io/sensornet.py +++ b/src/dtscalibration/io/sensornet.py @@ -54,13 +54,13 @@ def read_sensornet_files( kwargs : dict-like, optional keyword-arguments are passed to DataStore initialization - Notes + Notes: ----- Compressed sensornet files can not be directly decoded because the files are encoded with encoding='windows-1252' instead of UTF-8. - Returns + Returns: ------- datastore : DataStore The newly created datastore. @@ -104,14 +104,11 @@ def read_sensornet_files( valid = any([fnmatch.fnmatch(ddf_version, v_) for v_ in valid_versions]) - if valid: - if fnmatch.fnmatch(ddf_version, "Halo DTS v1*"): - flip_reverse_measurements = True - elif fnmatch.fnmatch(ddf_version, "Sentinel DTS v5*"): - flip_reverse_measurements = True - else: - flip_reverse_measurements = False - + if valid and ( + fnmatch.fnmatch(ddf_version, "Halo DTS v1*") + or fnmatch.fnmatch(ddf_version, "Sentinel DTS v5*") + ): + flip_reverse_measurements = True else: flip_reverse_measurements = False warnings.warn( @@ -135,22 +132,21 @@ def read_sensornet_files( def sensornet_ddf_version_check(filepathlist): - """Function which checks and returns the .ddf file version + """Function which checks and returns the .ddf file version. Parameters ---------- filepathlist - Returns + Returns: ------- ddf_version """ - # Obtain metadata fro mthe first file _, meta = read_sensornet_single(filepathlist[0]) - if "Software version number" in meta.keys(): + if "Software version number" in meta: version_string = meta["Software version number"] else: raise ValueError( @@ -164,13 +160,11 @@ def sensornet_ddf_version_check(filepathlist): def read_sensornet_single(filename): - """ - - Parameters + """Parameters ---------- filename - Returns + Returns: ------- """ @@ -252,8 +246,7 @@ def read_sensornet_files_routine_v3( fiber_length=None, flip_reverse_measurements=False, ): - """ - Internal routine that reads Sensor files. + """Internal routine that reads Sensor files. Use dtscalibration.read_sensornet_files function instead. Parameters @@ -270,11 +263,10 @@ def read_sensornet_files_routine_v3( It is the fiber length between the two connector entering the DTS device. - Returns + Returns: ------- """ - # Obtain metadata from the first file data, meta = read_sensornet_single(filepathlist[0]) diff --git a/src/dtscalibration/io/sensortran.py b/src/dtscalibration/io/sensortran.py index b1077897..01111d60 100644 --- a/src/dtscalibration/io/sensortran.py +++ b/src/dtscalibration/io/sensortran.py @@ -38,12 +38,11 @@ def read_sensortran_files( kwargs : dict-like, optional keyword-arguments are passed to DataStore initialization - Returns + Returns: ------- DataStore The newly created datastore. """ - filepathlist_dts = sorted(Path(directory).glob("*BinaryRawDTS.dat")) # Make sure that the list of files contains any files @@ -88,7 +87,7 @@ def sensortran_binary_version_check(filepathlist: list[Path]): ---------- filepathlist - Returns + Returns: ------- """ @@ -108,8 +107,7 @@ def read_sensortran_files_routine( timezone_netcdf: str = "UTC", silent: bool = False, ) -> tuple[dict[str, Any], dict[str, Any], dict]: - """ - Internal routine that reads sensortran files. + """Internal routine that reads sensortran files. Use dtscalibration.read_sensortran_files function instead. The sensortran files are in UTC time @@ -121,7 +119,7 @@ def read_sensortran_files_routine( timezone_netcdf silent - Returns + Returns: ------- """ @@ -261,15 +259,14 @@ def read_sensortran_files_routine( def read_sensortran_single(file: Path) -> tuple[dict, dict]: - """ - Internal routine that reads a single sensortran file. + """Internal routine that reads a single sensortran file. Use dtscalibration.read_sensortran_files function instead. Parameters ---------- file - Returns + Returns: ------- data, metadata """ diff --git a/src/dtscalibration/io/silixa.py b/src/dtscalibration/io/silixa.py index 025b7b1f..4fdbec76 100644 --- a/src/dtscalibration/io/silixa.py +++ b/src/dtscalibration/io/silixa.py @@ -48,12 +48,11 @@ def read_silixa_files( kwargs : dict-like, optional keyword-arguments are passed to DataStore initialization - Returns + Returns: ------- datastore : DataStore The newly created datastore. """ - assert "timezone_input_files" not in kwargs, ( "The silixa files are " "already timezone aware" ) @@ -109,11 +108,10 @@ def silixa_xml_version_check(filepathlist): ---------- filepathlist - Returns + Returns: ------- """ - sep = ":" attrs = read_silixa_attrs_singlefile(filepathlist[0], sep) @@ -126,22 +124,19 @@ def silixa_xml_version_check(filepathlist): def read_silixa_attrs_singlefile(filename, sep): - """ - - Parameters + """Parameters ---------- filename sep - Returns + Returns: ------- """ import xmltodict def metakey(meta, dict_to_parse, prefix): - """ - Fills the metadata dictionairy with data from dict_to_parse. + """Fills the metadata dictionairy with data from dict_to_parse. The dict_to_parse is the raw data from a silixa xml-file. dict_to_parse is a nested dictionary to represent the different levels of hierarchy. For example, @@ -156,11 +151,10 @@ def metakey(meta, dict_to_parse, prefix): dict_to_parse : dict prefix - Returns + Returns: ------- """ - for key in dict_to_parse: if prefix == "": prefix_parse = key.replace("@", "") @@ -188,10 +182,7 @@ def metakey(meta, dict_to_parse, prefix): with open_file(filename) as fh: doc_ = xmltodict.parse(fh.read()) - if "wellLogs" in doc_.keys(): - doc = doc_["wellLogs"]["wellLog"] - else: - doc = doc_["logs"]["log"] + doc = doc_["wellLogs"]["wellLog"] if "wellLogs" in doc_ else doc_["logs"]["log"] return metakey({}, doc, "") @@ -203,8 +194,7 @@ def read_silixa_files_routine_v6( # noqa: MC0001 silent=False, load_in_memory="auto", ): - """ - Internal routine that reads Silixa files. + """Internal routine that reads Silixa files. Use dtscalibration.read_silixa_files function instead. The silixa files are already timezone aware @@ -216,11 +206,10 @@ def read_silixa_files_routine_v6( # noqa: MC0001 timezone_netcdf silent - Returns + Returns: ------- """ - # translate names tld = {"ST": "st", "AST": "ast", "REV-ST": "rst", "REV-AST": "rast", "TMP": "tmp"} @@ -266,11 +255,9 @@ def read_silixa_files_routine_v6( # noqa: MC0001 double_ended_flag = bool(int(attrs["isDoubleEnded"])) chFW = int(attrs["forwardMeasurementChannel"]) - 1 # zero-based - if double_ended_flag: - chBW = int(attrs["backwardMeasurementChannel"]) - 1 # zero-based - else: - # no backward channel is negative value. writes better to netcdf - chBW = -1 + chBW = ( + int(attrs["backwardMeasurementChannel"]) - 1 if double_ended_flag else -1 + ) # zero-based or -1 if single ended # print summary if not silent: @@ -323,23 +310,18 @@ def read_silixa_files_routine_v6( # noqa: MC0001 # add units to timeseries (unit of measurement) for key, item in timeseries.items(): - if f"customData:{key}:uom" in attrs: - item["uom"] = attrs[f"customData:{key}:uom"] - else: - item["uom"] = "" + item["uom"] = attrs.get(f"customData:{key}:uom", "") # Gather data arr_path = "s:" + "/s:".join(["log", "logData", "data"]) @dask.delayed def grab_data_per_file(file_handle): - """ - - Parameters + """Parameters ---------- file_handle - Returns + Returns: ------- """ @@ -368,11 +350,7 @@ def grab_data_per_file(file_handle): # Check whether to compute data_arr (if possible 25% faster) data_arr_cnk = data_arr.rechunk({0: -1, 1: -1, 2: "auto"}) - if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - elif load_in_memory: + if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5 or load_in_memory: if not silent: print("Reading the data from disk") data_arr = data_arr_cnk.compute() @@ -401,13 +379,11 @@ def grab_data_per_file(file_handle): @dask.delayed def grab_timeseries_per_file(file_handle, xml_version): - """ - - Parameters + """Parameters ---------- file_handle - Returns + Returns: ------- """ @@ -514,8 +490,7 @@ def grab_timeseries_per_file(file_handle, xml_version): def read_silixa_files_routine_v4( # noqa: MC0001 filepathlist, timezone_netcdf="UTC", silent=False, load_in_memory="auto" ): - """ - Internal routine that reads Silixa files. + """Internal routine that reads Silixa files. Use dtscalibration.read_silixa_files function instead. The silixa files are already timezone aware @@ -527,7 +502,7 @@ def read_silixa_files_routine_v4( # noqa: MC0001 timezone_netcdf silent - Returns + Returns: ------- """ @@ -563,11 +538,9 @@ def read_silixa_files_routine_v4( # noqa: MC0001 attrs["backwardMeasurementChannel"] = "N/A" chFW = int(attrs["forwardMeasurementChannel"]) - 1 # zero-based - if double_ended_flag: - chBW = int(attrs["backwardMeasurementChannel"]) - 1 # zero-based - else: - # no backward channel is negative value. writes better to netcdf - chBW = -1 + chBW = ( + int(attrs["backwardMeasurementChannel"]) - 1 if double_ended_flag else -1 + ) # zero-based or -1 if single ended # obtain basic data info if double_ended_flag: @@ -630,23 +603,18 @@ def read_silixa_files_routine_v4( # noqa: MC0001 # add units to timeseries (unit of measurement) for key, item in timeseries.items(): - if f"customData:{key}:uom" in attrs: - item["uom"] = attrs[f"customData:{key}:uom"] - else: - item["uom"] = "" + item["uom"] = attrs.get(f"customData:{key}:uom", "") # Gather data arr_path = "s:" + "/s:".join(["wellLog", "logData", "data"]) @dask.delayed def grab_data_per_file(file_handle): - """ - - Parameters + """Parameters ---------- file_handle - Returns + Returns: ------- """ @@ -674,11 +642,7 @@ def grab_data_per_file(file_handle): # Check whether to compute data_arr (if possible 25% faster) data_arr_cnk = data_arr.rechunk({0: -1, 1: -1, 2: "auto"}) - if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - elif load_in_memory: + if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5 or load_in_memory: if not silent: print("Reading the data from disk") data_arr = data_arr_cnk.compute() @@ -709,13 +673,11 @@ def grab_data_per_file(file_handle): @dask.delayed def grab_timeseries_per_file(file_handle): - """ - - Parameters + """Parameters ---------- file_handle - Returns + Returns: ------- """ diff --git a/src/dtscalibration/io/utils.py b/src/dtscalibration/io/utils.py index b9a08338..10b7fee4 100644 --- a/src/dtscalibration/io/utils.py +++ b/src/dtscalibration/io/utils.py @@ -118,23 +118,19 @@ def open_file(path, **kwargs): if isinstance(path, tuple): # print('\nabout to open zipfile', path[0], '. from', path[1]) # assert isinstance(path[1], zip) - the_file = path[1].open(path[0], **kwargs) - + with path[1].open(path[0], **kwargs) as the_file: + yield the_file else: - the_file = open(path, **kwargs) - - yield the_file - the_file.close() + with open(path, **kwargs) as the_file: + yield the_file def get_xml_namespace(element): - """ - - Parameters + """Parameters ---------- element - Returns + Returns: ------- """ @@ -152,9 +148,8 @@ def coords_time( dtBW=None, double_ended_flag=False, ): - """ - Prepares the time coordinates for the construction of DataStore - instances with metadata + """Prepares the time coordinates for the construction of DataStore + instances with metadata. Parameters ---------- @@ -175,7 +170,7 @@ def coords_time( double_ended_flag : bool A flag whether the measurement is double ended - Returns + Returns: ------- """ diff --git a/src/dtscalibration/plot.py b/src/dtscalibration/plot.py index d864cafe..94eee678 100644 --- a/src/dtscalibration/plot.py +++ b/src/dtscalibration/plot.py @@ -16,8 +16,7 @@ def plot_residuals_reference_sections( method="split", cmap="RdBu_r", ): - """ - Analyze the residuals of the reference sections, between the Stokes + """Analyze the residuals of the reference sections, between the Stokes signal and a best-fit decaying exponential. @@ -45,7 +44,7 @@ def plot_residuals_reference_sections( Matplotlib colormap to use for the residual plot. By default it will use a diverging colormap. - Returns + Returns: ------- fig : Figurehandle @@ -244,8 +243,7 @@ def plot_residuals_reference_sections_single( units="", fig_kwargs=None, ): - """ - Analyze the residuals of the reference sections, between the Stokes + """Analyze the residuals of the reference sections, between the Stokes signal and a best-fit decaying exponential. @@ -269,7 +267,7 @@ def plot_residuals_reference_sections_single( "x" : str Name of the spatial dimension - Returns + Returns: ------- fig : Figurehandle @@ -379,8 +377,7 @@ def plot_accuracy( plot_names=True, sections=None, ): - """ - Analyze the residuals of the reference sections, between the Stokes + """Analyze the residuals of the reference sections, between the Stokes signal and a best-fit decaying exponential. @@ -402,7 +399,7 @@ def plot_accuracy( "x" : str Name of the spatial dimension - Returns + Returns: ------- fig : Figurehandle @@ -559,19 +556,6 @@ def plot_sigma_report( temp.plot(ax=ax1, linewidth=0.8, c="black", label="DTS") if itimes: - # std_dts_proj = d.ufunc_per_section( - # func=np.std, - # label='tmpf_mc_set', - # calc_per='stretch', - # temp_err=False, - # subtract_from_label='tmpf', - # axis=[0, 1]) - # std_dts_meas = d.ufunc_per_section( - # func=np.std, - # label='tmpf', - # calc_per='stretch', - # temp_err=True, - # axis=0) sigma_est = ds.dts.ufunc_per_section( sections=sections, label=temp_label, @@ -592,12 +576,9 @@ def plot_sigma_report( for (k, v), (k_se, v_se) in zip(ds.sections.items(), sigma_est.items()): for vi, v_sei in zip(v, v_se): if hasattr(v_sei, "compute"): - v_sei = v_sei.compute() + v_sei.compute() - if itimes: - val = ds[k].mean(dim="time") - else: - val = ds[k].isel(time=itimes) + val = ds[k].mean(dim="time") if not itimes else ds[k].isel(time=itimes) ax1.plot( [vi.start, vi.stop], [val, val], linewidth=0.8, c="blue", linestyle="--" diff --git a/src/dtscalibration/variance_helpers.py b/src/dtscalibration/variance_helpers.py index 9ea6c834..aa243a54 100644 --- a/src/dtscalibration/variance_helpers.py +++ b/src/dtscalibration/variance_helpers.py @@ -170,8 +170,7 @@ def var_fun(stokes): def check_allclose_acquisitiontime(acquisitiontime, eps: float = 0.05) -> None: - """ - Check if all acquisition times are of equal duration. For now it is not possible to calibrate + """Check if all acquisition times are of equal duration. For now it is not possible to calibrate over timesteps if the acquisition time of timesteps varies, as the Stokes variance would change over time. @@ -184,7 +183,7 @@ def check_allclose_acquisitiontime(acquisitiontime, eps: float = 0.05) -> None: eps : float Default accepts 1% of relative variation between min and max acquisition time. - Returns + Returns: ------- """ dtmin = acquisitiontime.min() diff --git a/src/dtscalibration/variance_stokes.py b/src/dtscalibration/variance_stokes.py index 59819b87..fe6e5c4d 100644 --- a/src/dtscalibration/variance_stokes.py +++ b/src/dtscalibration/variance_stokes.py @@ -12,8 +12,7 @@ def variance_stokes_constant(st, sections, acquisitiontime, reshape_residuals=True): - """ - Approximate the variance of the noise in Stokes intensity measurements + """Approximate the variance of the noise in Stokes intensity measurements with one value, suitable for small setups. * `variance_stokes_constant()` for small setups with small variations in\ @@ -82,16 +81,15 @@ def variance_stokes_constant(st, sections, acquisitiontime, reshape_residuals=Tr st : DataArray sections : Dict[str, List[slice]] - Returns + Returns: ------- I_var : float Variance of the residuals between measured and best fit resid : array_like Residuals between measured and best fit - Notes + Notes: ----- - * Because there are a large number of unknowns, spend time on\ calculating an initial estimate. Can be turned off by setting to False. @@ -99,14 +97,14 @@ def variance_stokes_constant(st, sections, acquisitiontime, reshape_residuals=Tr your variance estimate does not change when including measurements\ additional time steps, you have included enough measurements. - References + References: ---------- .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation of Temperature and Associated Uncertainty from Fiber-Optic Raman- Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. https://doi.org/10.3390/s20082235 - Examples + Examples: -------- - `Example notebook 4: Calculate variance Stokes intensity measurements\