diff --git a/src/dtscalibration/__init__.py b/src/dtscalibration/__init__.py index 23139e44..2087d2b9 100644 --- a/src/dtscalibration/__init__.py +++ b/src/dtscalibration/__init__.py @@ -18,16 +18,27 @@ from dtscalibration.plot import plot_residuals_reference_sections_single from dtscalibration.plot import plot_sigma_report -__version__ = '2.0.0' +__version__ = "2.0.0" __all__ = [ - "DataStore", "open_datastore", "open_mf_datastore", "read_apsensing_files", - "read_sensornet_files", "read_sensortran_files", "read_silixa_files", - 'check_dims', 'check_timestep_allclose', 'get_netcdf_encoding', - 'merge_double_ended', 'shift_double_ended', - 'suggest_cable_shift_double_ended', 'plot_accuracy', - 'plot_location_residuals_double_ended', - 'plot_residuals_reference_sections', - 'plot_residuals_reference_sections_single', 'plot_sigma_report'] + "DataStore", + "open_datastore", + "open_mf_datastore", + "read_apsensing_files", + "read_sensornet_files", + "read_sensortran_files", + "read_silixa_files", + "check_dims", + "check_timestep_allclose", + "get_netcdf_encoding", + "merge_double_ended", + "shift_double_ended", + "suggest_cable_shift_double_ended", + "plot_accuracy", + "plot_location_residuals_double_ended", + "plot_residuals_reference_sections", + "plot_residuals_reference_sections_single", + "plot_sigma_report", +] # filenames = ['datastore.py', 'datastore_utils.py', 'calibrate_utils.py', # 'plot.py', 'io.py'] diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index 7d23d064..66047af1 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -6,7 +6,7 @@ from scipy.sparse import linalg as ln -def parse_st_var(ds, st_var, st_label='st'): +def parse_st_var(ds, st_var, st_label="st"): """ Utility function to check the st_var input and to return in DataArray format. @@ -35,23 +35,26 @@ def parse_st_var(ds, st_var, st_label='st'): else: st_var_sec = xr.ones_like(ds[st_label]) * st_var - assert np.all(np.isfinite(st_var_sec)), \ - 'NaN/inf values detected in ' + st_label + '_var. Please check input.' + assert np.all(np.isfinite(st_var_sec)), ( + "NaN/inf values detected in " + st_label + "_var. Please check input." + ) - assert np.all(st_var_sec > 0.), \ - 'Negative values detected in ' + st_label + '_var. Please check input.' + assert np.all(st_var_sec > 0.0), ( + "Negative values detected in " + st_label + "_var. Please check input." + ) return st_var_sec def calibration_single_ended_solver( # noqa: MC0001 - ds, - st_var=None, - ast_var=None, - calc_cov=True, - solver='sparse', - matching_indices=None, - verbose=False): + ds, + st_var=None, + ast_var=None, + calc_cov=True, + solver="sparse", + matching_indices=None, + verbose=False, +): """ The solver for single-ended setups. Assumes `ds` is pre-configured with `sections` and `trans_att`. @@ -91,11 +94,11 @@ def calibration_single_ended_solver( # noqa: MC0001 """ # get ix_sec argsort so the sections are in order of increasing x - ix_sec = ds.ufunc_per_section(x_indices=True, calc_per='all') + ix_sec = ds.ufunc_per_section(x_indices=True, calc_per="all") ds_sec = ds.isel(x=ix_sec) - x_sec = ds_sec['x'].values - x_all = ds['x'].values + x_sec = ds_sec["x"].values + x_all = ds["x"].values nx = x_sec.size nt = ds.time.size no = ds.x.size @@ -106,20 +109,20 @@ def calibration_single_ended_solver( # noqa: MC0001 ds_ms0 = ds.isel(x=matching_indices[:, 0]) ds_ms1 = ds.isel(x=matching_indices[:, 1]) - p0_est_dalpha = np.asarray([485., 0.1] + nt * [1.4] + nta * nt * [0.]) - p0_est_alpha = np.asarray([485.] + no * [0.] + nt * [1.4] + nta * nt * [0.]) + p0_est_dalpha = np.asarray([485.0, 0.1] + nt * [1.4] + nta * nt * [0.0]) + p0_est_alpha = np.asarray([485.0] + no * [0.0] + nt * [1.4] + nta * nt * [0.0]) # X \gamma # Eq.34 cal_ref = ds.ufunc_per_section( - label='st', ref_temp_broadcasted=True, calc_per='all') + label="st", ref_temp_broadcasted=True, calc_per="all" + ) # cal_ref = cal_ref # sort by increasing x data_gamma = 1 / (cal_ref.T.ravel() + 273.15) # gamma coord_gamma_row = np.arange(nt * nx, dtype=int) coord_gamma_col = np.zeros(nt * nx, dtype=int) X_gamma = sp.coo_matrix( - (data_gamma, (coord_gamma_row, coord_gamma_col)), - shape=(nt * nx, 1), - copy=False) + (data_gamma, (coord_gamma_row, coord_gamma_col)), shape=(nt * nx, 1), copy=False + ) # X \Delta\alpha # Eq.34 data_dalpha = np.tile(-x_sec, nt) # dalpha @@ -128,7 +131,8 @@ def calibration_single_ended_solver( # noqa: MC0001 X_dalpha = sp.coo_matrix( (data_dalpha, (coord_dalpha_row, coord_dalpha_col)), shape=(nt * nx, 1), - copy=False) + copy=False, + ) # X C # Eq.34 data_c = -np.ones(nt * nx, dtype=int) @@ -136,7 +140,8 @@ def calibration_single_ended_solver( # noqa: MC0001 coord_c_col = np.repeat(np.arange(nt, dtype=int), nx) X_c = sp.coo_matrix( - (data_c, (coord_c_row, coord_c_col)), shape=(nt * nx, nt), copy=False) + (data_c, (coord_c_row, coord_c_col)), shape=(nt * nx, nt), copy=False + ) # X ta #not documented if ds.trans_att.size > 0: @@ -158,19 +163,20 @@ def calibration_single_ended_solver( # noqa: MC0001 # skip ix_sec_ta_ix0 locations, because they are upstream of # the connector. - coord_ta_row = ( - np.tile(np.arange(ix_sec_ta_ix0, nx), nt) - + np.repeat(np.arange(nx * nt, step=nx), nx - ix_sec_ta_ix0)) + coord_ta_row = np.tile(np.arange(ix_sec_ta_ix0, nx), nt) + np.repeat( + np.arange(nx * nt, step=nx), nx - ix_sec_ta_ix0 + ) # nt parameters - coord_ta_col = np.repeat( - np.arange(nt, dtype=int), nx - ix_sec_ta_ix0) + coord_ta_col = np.repeat(np.arange(nt, dtype=int), nx - ix_sec_ta_ix0) TA_list.append( sp.coo_matrix( (data_ta, (coord_ta_row, coord_ta_col)), shape=(nt * nx, nt), - copy=False)) + copy=False, + ) + ) X_TA = sp.hstack(TA_list) @@ -179,16 +185,15 @@ def calibration_single_ended_solver( # noqa: MC0001 if np.any(matching_indices): # first make matrix without the TA part (only diff in attentuation) - data_ma = np.tile(ds_ms1['x'].values - ds_ms0['x'].values, nt) + data_ma = np.tile(ds_ms1["x"].values - ds_ms0["x"].values, nt) coord_ma_row = np.arange(nm * nt) coord_ma_col = np.ones(nt * nm) X_ma = sp.coo_matrix( - (data_ma, (coord_ma_row, coord_ma_col)), - shape=(nm * nt, 2 + nt), - copy=False) + (data_ma, (coord_ma_row, coord_ma_col)), shape=(nm * nt, 2 + nt), copy=False + ) # make TA matrix if ds.trans_att.size > 0: @@ -197,24 +202,25 @@ def calibration_single_ended_solver( # noqa: MC0001 for jj, transient_att_xi in enumerate(ds.trans_att.values): transient_m_data[ii, jj] = np.logical_and( transient_att_xi > x_all[row[0]], - transient_att_xi < x_all[row[1]]).astype(int) + transient_att_xi < x_all[row[1]], + ).astype(int) - data_mt = np.tile(transient_m_data, (nt, 1)).flatten('F') + data_mt = np.tile(transient_m_data, (nt, 1)).flatten("F") - coord_mt_row = (np.tile(np.arange(nm * nt), nta)) + coord_mt_row = np.tile(np.arange(nm * nt), nta) - coord_mt_col = ( - np.tile(np.repeat(np.arange(nt), nm), nta) - + np.repeat(np.arange(nta * nt, step=nt), nt * nm)) + coord_mt_col = np.tile(np.repeat(np.arange(nt), nm), nta) + np.repeat( + np.arange(nta * nt, step=nt), nt * nm + ) X_mt = sp.coo_matrix( (data_mt, (coord_mt_row, coord_mt_col)), shape=(nm * nt, nta * nt), - copy=False) + copy=False, + ) else: - X_mt = sp.coo_matrix( - ([], ([], [])), shape=(nm * nt, 0), copy=False) + X_mt = sp.coo_matrix(([], ([], [])), shape=(nm * nt, 0), copy=False) # merge the two X_m = sp.hstack((X_ma, X_mt)) @@ -232,58 +238,81 @@ def calibration_single_ended_solver( # noqa: MC0001 # y_m = I_1 - I_2 y_m = ( np.log(ds_ms0.st.values / ds_ms0.ast.values) - - np.log(ds_ms1.st.values / ds_ms1.ast.values)).T.ravel() + - np.log(ds_ms1.st.values / ds_ms1.ast.values) + ).T.ravel() y = np.hstack((y, y_m)) # w if st_var is not None: - st_var_sec = parse_st_var(ds, st_var, st_label='st').isel(x=ix_sec).values - ast_var_sec = parse_st_var(ds, ast_var, st_label='ast').isel(x=ix_sec).values + st_var_sec = parse_st_var(ds, st_var, st_label="st").isel(x=ix_sec).values + ast_var_sec = parse_st_var(ds, ast_var, st_label="ast").isel(x=ix_sec).values - w = 1 / (ds_sec.st**-2 * st_var_sec - + ds_sec.ast**-2 * ast_var_sec).values.ravel() + w = ( + 1 + / ( + ds_sec.st**-2 * st_var_sec + ds_sec.ast**-2 * ast_var_sec + ).values.ravel() + ) if np.any(matching_indices): - st_var_ms0 = parse_st_var( - ds, st_var, st_label='st').isel(x=matching_indices[:, 0]).values - st_var_ms1 = parse_st_var( - ds, st_var, st_label='st').isel(x=matching_indices[:, 1]).values - ast_var_ms0 = parse_st_var( - ds, ast_var, st_label='ast').isel(x=matching_indices[:, 0]).values - ast_var_ms1 = parse_st_var( - ds, ast_var, st_label='ast').isel(x=matching_indices[:, 1]).values - - w_ms = 1 / ( - (ds_ms0.st.values**-2 * st_var_ms0) + - (ds_ms0.ast.values**-2 * ast_var_ms0) + - (ds_ms1.st.values**-2 * st_var_ms1) + - (ds_ms1.ast.values**-2 * ast_var_ms1)).ravel() + st_var_ms0 = ( + parse_st_var(ds, st_var, st_label="st") + .isel(x=matching_indices[:, 0]) + .values + ) + st_var_ms1 = ( + parse_st_var(ds, st_var, st_label="st") + .isel(x=matching_indices[:, 1]) + .values + ) + ast_var_ms0 = ( + parse_st_var(ds, ast_var, st_label="ast") + .isel(x=matching_indices[:, 0]) + .values + ) + ast_var_ms1 = ( + parse_st_var(ds, ast_var, st_label="ast") + .isel(x=matching_indices[:, 1]) + .values + ) + + w_ms = ( + 1 + / ( + (ds_ms0.st.values**-2 * st_var_ms0) + + (ds_ms0.ast.values**-2 * ast_var_ms0) + + (ds_ms1.st.values**-2 * st_var_ms1) + + (ds_ms1.ast.values**-2 * ast_var_ms1) + ).ravel() + ) w = np.hstack((w, w_ms)) else: - w = 1. # unweighted + w = 1.0 # unweighted - if solver == 'sparse': + if solver == "sparse": if calc_cov: p_sol, p_var, p_cov = wls_sparse( - X, y, w=w, x0=p0_est_dalpha, calc_cov=calc_cov, verbose=verbose) + X, y, w=w, x0=p0_est_dalpha, calc_cov=calc_cov, verbose=verbose + ) else: p_sol, p_var = wls_sparse( - X, y, w=w, x0=p0_est_dalpha, calc_cov=calc_cov, verbose=verbose) + X, y, w=w, x0=p0_est_dalpha, calc_cov=calc_cov, verbose=verbose + ) - elif solver == 'stats': + elif solver == "stats": if calc_cov: p_sol, p_var, p_cov = wls_stats( - X, y, w=w, calc_cov=calc_cov, verbose=verbose) + X, y, w=w, calc_cov=calc_cov, verbose=verbose + ) else: - p_sol, p_var = wls_stats( - X, y, w=w, calc_cov=calc_cov, verbose=verbose) + p_sol, p_var = wls_stats(X, y, w=w, calc_cov=calc_cov, verbose=verbose) - elif solver == 'external': + elif solver == "external": return X, y, w, p0_est_dalpha - elif solver == 'external_split': + elif solver == "external_split": # Only with external split, alpha can be estimated with double ended setup data_alpha = -np.ones(nt * nx, dtype=int) # np.tile(-x_sec, nt) # dalpha coord_alpha_row = np.arange(nt * nx, dtype=int) @@ -291,7 +320,8 @@ def calibration_single_ended_solver( # noqa: MC0001 X_alpha = sp.coo_matrix( (data_alpha, (coord_alpha_row, coord_alpha_col)), shape=(nt * nx, no), - copy=False) + copy=False, + ) return dict( y=y, @@ -303,7 +333,8 @@ def calibration_single_ended_solver( # noqa: MC0001 X_m=X_m, X_TA=X_TA, p0_est_dalpha=p0_est_dalpha, - p0_est_alpha=p0_est_alpha) + p0_est_alpha=p0_est_alpha, + ) else: raise ValueError("Choose a valid solver") @@ -312,15 +343,16 @@ def calibration_single_ended_solver( # noqa: MC0001 def calibration_double_ended_solver( # noqa: MC0001 - ds, - st_var=None, - ast_var=None, - rst_var=None, - rast_var=None, - calc_cov=True, - solver='sparse', - matching_indices=None, - verbose=False): + ds, + st_var=None, + ast_var=None, + rst_var=None, + rast_var=None, + calc_cov=True, + solver="sparse", + matching_indices=None, + verbose=False, +): """ The solver for double-ended setups. Assumes `ds` is pre-configured with `sections` and `trans_att`. @@ -377,11 +409,11 @@ def calibration_double_ended_solver( # noqa: MC0001 ------- """ - ix_sec = ds.ufunc_per_section(x_indices=True, calc_per='all') + ix_sec = ds.ufunc_per_section(x_indices=True, calc_per="all") ds_sec = ds.isel(x=ix_sec) ix_alpha_is_zero = ix_sec[0] # per definition of E - x_sec = ds_sec['x'].values + x_sec = ds_sec["x"].values nx_sec = x_sec.size nt = ds.time.size nta = ds.trans_att.size @@ -389,92 +421,132 @@ def calibration_double_ended_solver( # noqa: MC0001 # Calculate E as initial estimate for the E calibration. # Does not require ta to be passed on E_all_guess, E_all_var_guess = calc_alpha_double( - mode='guess', + mode="guess", ds=ds, st_var=st_var, ast_var=ast_var, rst_var=rst_var, rast_var=rast_var, - ix_alpha_is_zero=ix_alpha_is_zero) - df_est, db_est = calc_df_db_double_est(ds, ix_alpha_is_zero, 485.) - - E, Z_D, Z_gamma, Zero_d, Z_TA_fw, Z_TA_bw, = \ - construct_submatrices(nt, nx_sec, ds, ds.trans_att.values, x_sec) + ix_alpha_is_zero=ix_alpha_is_zero, + ) + df_est, db_est = calc_df_db_double_est(ds, ix_alpha_is_zero, 485.0) + + ( + E, + Z_D, + Z_gamma, + Zero_d, + Z_TA_fw, + Z_TA_bw, + ) = construct_submatrices(nt, nx_sec, ds, ds.trans_att.values, x_sec) # y # Eq.41--45 y_F = np.log(ds_sec.st / ds_sec.ast).values.ravel() y_B = np.log(ds_sec.rst / ds_sec.rast).values.ravel() # w - st_var_sec = parse_st_var(ds, st_var, st_label='st').isel(x=ix_sec).values - ast_var_sec = parse_st_var(ds, ast_var, st_label='ast').isel(x=ix_sec).values - rst_var_sec = parse_st_var(ds, rst_var, st_label='rst').isel(x=ix_sec).values - rast_var_sec = parse_st_var( - ds, rast_var, st_label='rast').isel(x=ix_sec).values - - w_F = 1 / (ds_sec.st**-2 * st_var_sec - + ds_sec.ast**-2 * ast_var_sec).values.ravel() - w_B = 1 / ( - ds_sec.rst**-2 * rst_var_sec - + ds_sec.rast**-2 * rast_var_sec).values.ravel() + st_var_sec = parse_st_var(ds, st_var, st_label="st").isel(x=ix_sec).values + ast_var_sec = parse_st_var(ds, ast_var, st_label="ast").isel(x=ix_sec).values + rst_var_sec = parse_st_var(ds, rst_var, st_label="rst").isel(x=ix_sec).values + rast_var_sec = parse_st_var(ds, rast_var, st_label="rast").isel(x=ix_sec).values + + w_F = ( + 1 + / (ds_sec.st**-2 * st_var_sec + ds_sec.ast**-2 * ast_var_sec).values.ravel() + ) + w_B = ( + 1 + / ( + ds_sec.rst**-2 * rst_var_sec + ds_sec.rast**-2 * rast_var_sec + ).values.ravel() + ) if not np.any(matching_indices): p0_est = np.concatenate( - ( - [485.], df_est, db_est, E_all_guess[ix_sec[1:]], - nta * nt * 2 * [0.])) + ([485.0], df_est, db_est, E_all_guess[ix_sec[1:]], nta * nt * 2 * [0.0]) + ) # Stack all X's X = sp.vstack( ( sp.hstack((Z_gamma, -Z_D, Zero_d, -E, Z_TA_fw)), - sp.hstack((Z_gamma, Zero_d, -Z_D, E, Z_TA_bw)))) + sp.hstack((Z_gamma, Zero_d, -Z_D, E, Z_TA_bw)), + ) + ) y = np.concatenate((y_F, y_B)) w = np.concatenate((w_F, w_B)) else: - E_match_F, E_match_B, E_match_no_cal, Z_TA_eq1, Z_TA_eq2, \ - Z_TA_eq3, d_no_cal, ix_from_cal_match_to_glob, ix_match_not_cal, \ - Zero_eq12_gamma, Zero_eq3_gamma, Zero_d_eq12 = \ - construct_submatrices_matching_sections( - ds.x.values, ix_sec, matching_indices[:, 0], - matching_indices[:, 1], nt, ds.trans_att.values) + ( + E_match_F, + E_match_B, + E_match_no_cal, + Z_TA_eq1, + Z_TA_eq2, + Z_TA_eq3, + d_no_cal, + ix_from_cal_match_to_glob, + ix_match_not_cal, + Zero_eq12_gamma, + Zero_eq3_gamma, + Zero_d_eq12, + ) = construct_submatrices_matching_sections( + ds.x.values, + ix_sec, + matching_indices[:, 0], + matching_indices[:, 1], + nt, + ds.trans_att.values, + ) p0_est = np.concatenate( ( - np.asarray([485.] + 2 * nt * [1.4]), - E_all_guess[ix_from_cal_match_to_glob], nta * nt * 2 * [0.])) + np.asarray([485.0] + 2 * nt * [1.4]), + E_all_guess[ix_from_cal_match_to_glob], + nta * nt * 2 * [0.0], + ) + ) # Stack all X's # X_sec contains a different number of columns than X. X_sec = sp.vstack( ( sp.hstack((Z_gamma, -Z_D, Zero_d, -E, Z_TA_fw)), - sp.hstack((Z_gamma, Zero_d, -Z_D, E, Z_TA_bw)))) + sp.hstack((Z_gamma, Zero_d, -Z_D, E, Z_TA_bw)), + ) + ) X_sec2 = sp.csr_matrix( ([], ([], [])), - shape=(2 * nt * nx_sec, 1 + 2 * nt + ds.x.size + 2 * nta * nt)) + shape=(2 * nt * nx_sec, 1 + 2 * nt + ds.x.size + 2 * nta * nt), + ) from_i = np.concatenate( ( - np.arange(1 + 2 * nt), 1 + 2 * nt + ix_sec[1:], + np.arange(1 + 2 * nt), + 1 + 2 * nt + ix_sec[1:], np.arange( - 1 + 2 * nt + ds.x.size, - 1 + 2 * nt + ds.x.size + 2 * nta * nt))) + 1 + 2 * nt + ds.x.size, 1 + 2 * nt + ds.x.size + 2 * nta * nt + ), + ) + ) X_sec2[:, from_i] = X_sec from_i2 = np.concatenate( ( - np.arange(1 + 2 * nt), 1 + 2 * nt + ix_from_cal_match_to_glob, + np.arange(1 + 2 * nt), + 1 + 2 * nt + ix_from_cal_match_to_glob, np.arange( - 1 + 2 * nt + ds.x.size, - 1 + 2 * nt + ds.x.size + 2 * nta * nt))) + 1 + 2 * nt + ds.x.size, 1 + 2 * nt + ds.x.size + 2 * nta * nt + ), + ) + ) X = sp.vstack( ( X_sec2[:, from_i2], sp.hstack((Zero_eq12_gamma, Zero_d_eq12, E_match_F, Z_TA_eq1)), sp.hstack((Zero_eq12_gamma, Zero_d_eq12, E_match_B, Z_TA_eq2)), - sp.hstack( - (Zero_eq3_gamma, d_no_cal, E_match_no_cal, Z_TA_eq3)))) + sp.hstack((Zero_eq3_gamma, d_no_cal, E_match_no_cal, Z_TA_eq3)), + ) + ) y_F = np.log(ds_sec.st / ds_sec.ast).values.ravel() y_B = np.log(ds_sec.rst / ds_sec.rast).values.ravel() @@ -485,62 +557,78 @@ def calibration_double_ended_solver( # noqa: MC0001 ds_tix = ds.isel(x=tix) y_eq1 = ( np.log(ds_hix.st / ds_hix.ast).values.ravel() - - np.log(ds_tix.st / ds_tix.ast).values.ravel()) + - np.log(ds_tix.st / ds_tix.ast).values.ravel() + ) y_eq2 = ( np.log(ds_hix.rst / ds_hix.rast).values.ravel() - - np.log(ds_tix.rst / ds_tix.rast).values.ravel()) + - np.log(ds_tix.rst / ds_tix.rast).values.ravel() + ) ds_mnc = ds.isel(x=ix_match_not_cal) y_eq3 = ( - ( - np.log(ds_mnc.rst / ds_mnc.rast) - - np.log(ds_mnc.st / ds_mnc.ast)) / 2).values.ravel() + (np.log(ds_mnc.rst / ds_mnc.rast) - np.log(ds_mnc.st / ds_mnc.ast)) / 2 + ).values.ravel() y = np.concatenate((y_F, y_B, y_eq1, y_eq2, y_eq3)) - st_var_hix = parse_st_var(ds, st_var, st_label='st').isel(x=hix).values - ast_var_hix = parse_st_var(ds, ast_var, st_label='ast').isel(x=hix).values - rst_var_hix = parse_st_var(ds, rst_var, st_label='rst').isel(x=hix).values - rast_var_hix = parse_st_var(ds, rast_var, st_label='rast').isel(x=hix).values - - st_var_tix = parse_st_var(ds, st_var, st_label='st').isel(x=tix).values - ast_var_tix = parse_st_var(ds, ast_var, st_label='ast').isel(x=tix).values - rst_var_tix = parse_st_var(ds, rst_var, st_label='rst').isel(x=tix).values - rast_var_tix = parse_st_var(ds, rast_var, st_label='rast').isel(x=tix).values - - st_var_mnc = parse_st_var( - ds, st_var, st_label='st').isel(x=ix_match_not_cal).values - ast_var_mnc = parse_st_var( - ds, ast_var, st_label='ast').isel(x=ix_match_not_cal).values - rst_var_mnc = parse_st_var( - ds, rst_var, st_label='rst').isel(x=ix_match_not_cal).values - rast_var_mnc = parse_st_var( - ds, rast_var, st_label='rast').isel(x=ix_match_not_cal).values + st_var_hix = parse_st_var(ds, st_var, st_label="st").isel(x=hix).values + ast_var_hix = parse_st_var(ds, ast_var, st_label="ast").isel(x=hix).values + rst_var_hix = parse_st_var(ds, rst_var, st_label="rst").isel(x=hix).values + rast_var_hix = parse_st_var(ds, rast_var, st_label="rast").isel(x=hix).values + + st_var_tix = parse_st_var(ds, st_var, st_label="st").isel(x=tix).values + ast_var_tix = parse_st_var(ds, ast_var, st_label="ast").isel(x=tix).values + rst_var_tix = parse_st_var(ds, rst_var, st_label="rst").isel(x=tix).values + rast_var_tix = parse_st_var(ds, rast_var, st_label="rast").isel(x=tix).values + + st_var_mnc = ( + parse_st_var(ds, st_var, st_label="st").isel(x=ix_match_not_cal).values + ) + ast_var_mnc = ( + parse_st_var(ds, ast_var, st_label="ast").isel(x=ix_match_not_cal).values + ) + rst_var_mnc = ( + parse_st_var(ds, rst_var, st_label="rst").isel(x=ix_match_not_cal).values + ) + rast_var_mnc = ( + parse_st_var(ds, rast_var, st_label="rast").isel(x=ix_match_not_cal).values + ) w_eq1 = 1 / ( - (ds_hix.st**-2 * st_var_hix - + ds_hix.ast**-2 * ast_var_hix).values.ravel() + - (ds_tix.st**-2 * st_var_tix - + ds_tix.ast**-2 * ast_var_tix).values.ravel()) + ( + ds_hix.st**-2 * st_var_hix + ds_hix.ast**-2 * ast_var_hix + ).values.ravel() + + ( + ds_tix.st**-2 * st_var_tix + ds_tix.ast**-2 * ast_var_tix + ).values.ravel() + ) w_eq2 = 1 / ( - (ds_hix.rst**-2 * rst_var_hix - + ds_hix.rast**-2 * rast_var_hix).values.ravel() + - (ds_tix.rst**-2 * rst_var_tix - + ds_tix.rast**-2 * rast_var_tix).values.ravel()) - w_eq3 = 1 / ( - ds_mnc.st**-2 * st_var_mnc + ds_mnc.ast**-2 * ast_var_mnc - + ds_mnc.rst**-2 * rst_var_mnc - + ds_mnc.rast**-2 * rast_var_mnc).values.ravel() + ( + ds_hix.rst**-2 * rst_var_hix + ds_hix.rast**-2 * rast_var_hix + ).values.ravel() + + ( + ds_tix.rst**-2 * rst_var_tix + ds_tix.rast**-2 * rast_var_tix + ).values.ravel() + ) + w_eq3 = ( + 1 + / ( + ds_mnc.st**-2 * st_var_mnc + + ds_mnc.ast**-2 * ast_var_mnc + + ds_mnc.rst**-2 * rst_var_mnc + + ds_mnc.rast**-2 * rast_var_mnc + ).values.ravel() + ) w = np.concatenate((w_F, w_B, w_eq1, w_eq2, w_eq3)) - if solver == 'sparse': + if solver == "sparse": solver_fun = wls_sparse - elif solver == 'stats': + elif solver == "stats": solver_fun = wls_stats - elif solver == 'external': + elif solver == "external": return X, y, w, p0_est - elif solver == 'external_split': + elif solver == "external_split": out = dict( y_F=y_F, y_B=y_B, @@ -554,7 +642,8 @@ def calibration_double_ended_solver( # noqa: MC0001 Z_TA_bw=Z_TA_bw, p0_est=p0_est, E_all_guess=E_all_guess, - E_all_var_guess=E_all_var_guess) + E_all_var_guess=E_all_var_guess, + ) if np.any(matching_indices): out.update( @@ -574,19 +663,15 @@ def calibration_double_ended_solver( # noqa: MC0001 y_eq3=y_eq3, w_eq1=w_eq1, w_eq2=w_eq2, - w_eq3=w_eq3) + w_eq3=w_eq3, + ) return out else: raise ValueError("Choose a valid solver") out = solver_fun( - X, - y, - w=w, - x0=p0_est, - calc_cov=calc_cov, - verbose=verbose, - return_werr=verbose) + X, y, w=w, x0=p0_est, calc_cov=calc_cov, verbose=verbose, return_werr=verbose + ) if calc_cov and verbose: p_sol, p_var, p_cov, _ = out elif not calc_cov and verbose: @@ -609,15 +694,16 @@ def calibration_double_ended_solver( # noqa: MC0001 # calculate talpha_fw and bw for attenuation if ds.trans_att.size > 0: if np.any(matching_indices): - ta = p_sol[1 + 2 * nt + ix_from_cal_match_to_glob.size:].reshape( - (nt, 2, nta), order='F') - ta_var = p_var[1 + 2 * nt - + ix_from_cal_match_to_glob.size:].reshape( - (nt, 2, nta), order='F') + ta = p_sol[1 + 2 * nt + ix_from_cal_match_to_glob.size :].reshape( + (nt, 2, nta), order="F" + ) + ta_var = p_var[1 + 2 * nt + ix_from_cal_match_to_glob.size :].reshape( + (nt, 2, nta), order="F" + ) else: - ta = p_sol[2 * nt + nx_sec:].reshape((nt, 2, nta), order='F') - ta_var = p_var[2 * nt + nx_sec:].reshape((nt, 2, nta), order='F') + ta = p_sol[2 * nt + nx_sec :].reshape((nt, 2, nta), order="F") + ta_var = p_var[2 * nt + nx_sec :].reshape((nt, 2, nta), order="F") talpha_fw = ta[:, 0, :] talpha_bw = ta[:, 1, :] @@ -631,13 +717,13 @@ def calibration_double_ended_solver( # noqa: MC0001 # put E outside of reference section in solution # concatenating makes a copy of the data instead of using a pointer - ds_sub = ds[['st', 'ast', 'rst', 'rast', 'trans_att']] - ds_sub['df'] = (('time',), p_sol[1:1 + nt]) - ds_sub['df_var'] = (('time',), p_var[1:1 + nt]) - ds_sub['db'] = (('time',), p_sol[1 + nt:1 + 2 * nt]) - ds_sub['db_var'] = (('time',), p_var[1 + nt:1 + 2 * nt]) + ds_sub = ds[["st", "ast", "rst", "rast", "trans_att"]] + ds_sub["df"] = (("time",), p_sol[1 : 1 + nt]) + ds_sub["df_var"] = (("time",), p_var[1 : 1 + nt]) + ds_sub["db"] = (("time",), p_sol[1 + nt : 1 + 2 * nt]) + ds_sub["db_var"] = (("time",), p_var[1 + nt : 1 + 2 * nt]) E_all_exact, E_all_var_exact = calc_alpha_double( - mode='exact', + mode="exact", ds=ds_sub, st_var=st_var, ast_var=ast_var, @@ -647,7 +733,8 @@ def calibration_double_ended_solver( # noqa: MC0001 talpha_fw=talpha_fw, talpha_bw=talpha_bw, talpha_fw_var=talpha_fw_var, - talpha_bw_var=talpha_bw_var) + talpha_bw_var=talpha_bw_var, + ) if np.any(matching_indices): p_sol_size = 1 + 2 * nt + ix_from_cal_match_to_glob.size + 2 * nt * nta @@ -659,29 +746,39 @@ def calibration_double_ended_solver( # noqa: MC0001 if np.any(matching_indices): po_sol = np.concatenate( ( - p_sol[:1 + 2 * nt], E_all_exact, - p_sol[1 + 2 * nt + ix_from_cal_match_to_glob.size:])) - po_sol[1 + 2 * nt + ix_from_cal_match_to_glob] = \ - p_sol[1 + 2 * nt:1 + 2 * nt + ix_from_cal_match_to_glob.size] + p_sol[: 1 + 2 * nt], + E_all_exact, + p_sol[1 + 2 * nt + ix_from_cal_match_to_glob.size :], + ) + ) + po_sol[1 + 2 * nt + ix_from_cal_match_to_glob] = p_sol[ + 1 + 2 * nt : 1 + 2 * nt + ix_from_cal_match_to_glob.size + ] else: po_sol = np.concatenate( - (p_sol[:1 + 2 * nt], E_all_exact, p_sol[2 * nt + nx_sec:])) - po_sol[1 + 2 * nt + ix_sec[1:]] = p_sol[1 + 2 * nt:2 * nt + nx_sec] + (p_sol[: 1 + 2 * nt], E_all_exact, p_sol[2 * nt + nx_sec :]) + ) + po_sol[1 + 2 * nt + ix_sec[1:]] = p_sol[1 + 2 * nt : 2 * nt + nx_sec] - po_sol[1 + 2 * nt + ix_sec[0]] = 0. # per definition + po_sol[1 + 2 * nt + ix_sec[0]] = 0.0 # per definition if np.any(matching_indices): po_var = np.concatenate( ( - p_var[:1 + 2 * nt], E_all_var_exact, - p_var[1 + 2 * nt + ix_from_cal_match_to_glob.size:])) - po_var[1 + 2 * nt + ix_from_cal_match_to_glob] = \ - p_var[1 + 2 * nt:1 + 2 * nt + ix_from_cal_match_to_glob.size] + p_var[: 1 + 2 * nt], + E_all_var_exact, + p_var[1 + 2 * nt + ix_from_cal_match_to_glob.size :], + ) + ) + po_var[1 + 2 * nt + ix_from_cal_match_to_glob] = p_var[ + 1 + 2 * nt : 1 + 2 * nt + ix_from_cal_match_to_glob.size + ] else: po_var = np.concatenate( - (p_var[:1 + 2 * nt], E_all_var_exact, p_var[2 * nt + nx_sec:])) - po_var[1 + 2 * nt + ix_sec[1:]] = p_var[1 + 2 * nt:2 * nt + nx_sec] - po_var[1 + 2 * nt + ix_sec[0]] = 0. # per definition + (p_var[: 1 + 2 * nt], E_all_var_exact, p_var[2 * nt + nx_sec :]) + ) + po_var[1 + 2 * nt + ix_sec[1:]] = p_var[1 + 2 * nt : 2 * nt + nx_sec] + po_var[1 + 2 * nt + ix_sec[0]] = 0.0 # per definition if calc_cov: # the COV can be expensive to compute (in the least squares routine) @@ -693,17 +790,21 @@ def calibration_double_ended_solver( # noqa: MC0001 np.arange(1 + 2 * nt), 1 + 2 * nt + ix_from_cal_match_to_glob, np.arange( - 1 + 2 * nt + ix_from_cal_match_to_glob.size, 1 + 2 * nt - + ix_from_cal_match_to_glob.size + nta * nt * 2))) + 1 + 2 * nt + ix_from_cal_match_to_glob.size, + 1 + 2 * nt + ix_from_cal_match_to_glob.size + nta * nt * 2, + ), + ) + ) else: from_i = np.concatenate( ( - np.arange(1 + 2 * nt), 1 + 2 * nt + ix_sec[1:], - np.arange( - 1 + 2 * nt + nx_sec, - 1 + 2 * nt + nx_sec + nta * nt * 2))) + np.arange(1 + 2 * nt), + 1 + 2 * nt + ix_sec[1:], + np.arange(1 + 2 * nt + nx_sec, 1 + 2 * nt + nx_sec + nta * nt * 2), + ) + ) - iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing='ij') + iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") po_cov[iox_sec1, iox_sec2] = p_cov return po_sol, po_var, po_cov @@ -723,15 +824,13 @@ def matching_section_location_indices(ix_sec, hix, tix): # indices in the section of interest. Excluding E0 # ix_E0 mask - to exclude E[ix_sec[0]] from the E matrices - ix_E0_mask = np.array( - [ix for ix in range(nx_cal_match) if ix != ix_sec2[0]]) + ix_E0_mask = np.array([ix for ix in range(nx_cal_match) if ix != ix_sec2[0]]) # contains the global coordinate indices of the E ix_from_cal_match_to_glob = ix_cal_match[ix_E0_mask] return ix_from_cal_match_to_glob -def construct_submatrices_matching_sections( - x, ix_sec, hix, tix, nt, trans_att): +def construct_submatrices_matching_sections(x, ix_sec, hix, tix, nt, trans_att): """ For all matching indices, where subscript 1 refers to the indices in `hix` and subscript 2 refers to the indices in `tix`. @@ -777,8 +876,7 @@ def construct_submatrices_matching_sections( ix_cal_match = np.unique(np.concatenate((ix_sec, hix, tix))) # subscript 3 in doc-eqns - ix_match_not_cal = np.array( - [ix for ix in ix_cal_match if ix not in ix_sec]) + ix_match_not_cal = np.array([ix for ix in ix_cal_match if ix not in ix_sec]) # number of locations of interest, width of the section of interest. nx_cal_match = ix_cal_match.size @@ -794,8 +892,7 @@ def construct_submatrices_matching_sections( # indices in the section of interest. Excluding E0 # ix_E0 mask - to exclude E[ix_sec[0]] from the E matrices - ix_E0_mask = np.array( - [ix for ix in range(nx_cal_match) if ix != ix_sec2[0]]) + ix_E0_mask = np.array([ix for ix in range(nx_cal_match) if ix != ix_sec2[0]]) # contains the global coordinate indices of the E ix_from_cal_match_to_glob = ix_cal_match[ix_E0_mask] @@ -804,12 +901,18 @@ def construct_submatrices_matching_sections( row = np.arange(nt * npair, dtype=int) col1 = np.repeat(hix_sec2, nt) col2 = np.repeat(tix_sec2, nt) - E_match_F = sp.coo_matrix( - ( - np.concatenate((-data, data)), - (np.concatenate((row, row)), np.concatenate((col1, col2)))), - shape=(nt * npair, nx_cal_match), - copy=False).tocsr(copy=False)[:, ix_E0_mask].tocoo() + E_match_F = ( + sp.coo_matrix( + ( + np.concatenate((-data, data)), + (np.concatenate((row, row)), np.concatenate((col1, col2))), + ), + shape=(nt * npair, nx_cal_match), + copy=False, + ) + .tocsr(copy=False)[:, ix_E0_mask] + .tocoo() + ) Zero_eq12_gamma = sp.coo_matrix(([], ([], [])), shape=(nt * npair, 1)) Zero_d_eq12 = sp.coo_matrix(([], ([], [])), shape=(nt * npair, 2 * nt)) @@ -818,21 +921,29 @@ def construct_submatrices_matching_sections( row = np.arange(nt * npair, dtype=int) col1 = np.repeat(hix_sec2, nt) col2 = np.repeat(tix_sec2, nt) - E_match_B = sp.coo_matrix( - ( - np.concatenate((data, -data)), - (np.concatenate((row, row)), np.concatenate((col1, col2)))), - shape=(nt * npair, nx_cal_match), - copy=False).tocsr(copy=False)[:, ix_E0_mask].tocoo() + E_match_B = ( + sp.coo_matrix( + ( + np.concatenate((data, -data)), + (np.concatenate((row, row)), np.concatenate((col1, col2))), + ), + shape=(nt * npair, nx_cal_match), + copy=False, + ) + .tocsr(copy=False)[:, ix_E0_mask] + .tocoo() + ) # E in EQ3 nx_nm = ix_match_not_cal_sec2.size data = np.ones(nt * nx_nm, dtype=float) row = np.arange(nt * nx_nm, dtype=int) col = np.repeat(ix_match_not_cal_sec2, nt) - E_match_no_cal = sp.coo_matrix( - (data, (row, col)), shape=(nt * nx_nm, nx_cal_match), - copy=False).tocsr(copy=False)[:, ix_E0_mask].tocoo() + E_match_no_cal = ( + sp.coo_matrix((data, (row, col)), shape=(nt * nx_nm, nx_cal_match), copy=False) + .tocsr(copy=False)[:, ix_E0_mask] + .tocoo() + ) # DF and DB in EQ3 data = np.ones(nt * nx_nm, dtype=float) / 2 @@ -842,9 +953,11 @@ def construct_submatrices_matching_sections( d_no_cal = sp.coo_matrix( ( np.concatenate((data, -data)), - (np.concatenate((row, row)), np.concatenate((colf, colb)))), + (np.concatenate((row, row)), np.concatenate((colf, colb))), + ), shape=(nt * nx_nm, 2 * nt), - copy=False) + copy=False, + ) Zero_eq3_gamma = sp.coo_matrix(([], ([], [])), shape=(nt * nx_nm, 1)) # TA @@ -870,57 +983,61 @@ def construct_submatrices_matching_sections( # TAF1 and TAF2 in EQ1 data_taf = np.repeat( -np.array(hix >= ix_ta_ix0, dtype=float) - + np.array(tix >= ix_ta_ix0, dtype=float), nt) + + np.array(tix >= ix_ta_ix0, dtype=float), + nt, + ) row_taf = np.arange(nt * npair) col_taf = np.tile(np.arange(nt, dtype=int), npair) - mask_taf = data_taf.astype( - bool) # only store non-zeros in sparse m + mask_taf = data_taf.astype(bool) # only store non-zeros in sparse m TA_eq1_list.append( sp.coo_matrix( - ( - data_taf[mask_taf], - (row_taf[mask_taf], col_taf[mask_taf])), + (data_taf[mask_taf], (row_taf[mask_taf], col_taf[mask_taf])), shape=(nt * npair, 2 * nt), - copy=False)) + copy=False, + ) + ) # TAB1 and TAB2 in EQ2 data_tab = np.repeat( -np.array(hix < ix_ta_ix0, dtype=float) - + np.array(tix < ix_ta_ix0, dtype=float), nt) + + np.array(tix < ix_ta_ix0, dtype=float), + nt, + ) row_tab = np.arange(nt * npair) col_tab = np.tile(np.arange(nt, 2 * nt, dtype=int), npair) - mask_tab = data_tab.astype( - bool) # only store non-zeros in sparse m + mask_tab = data_tab.astype(bool) # only store non-zeros in sparse m TA_eq2_list.append( sp.coo_matrix( - ( - data_tab[mask_tab], - (row_tab[mask_tab], col_tab[mask_tab])), + (data_tab[mask_tab], (row_tab[mask_tab], col_tab[mask_tab])), shape=(nt * npair, 2 * nt), - copy=False)) + copy=False, + ) + ) data_taf = np.repeat( - np.array(ix_match_not_cal >= ix_ta_ix0, dtype=float) / 2, nt) + np.array(ix_match_not_cal >= ix_ta_ix0, dtype=float) / 2, nt + ) data_tab = np.repeat( - -np.array(ix_match_not_cal < ix_ta_ix0, dtype=float) / 2, nt) + -np.array(ix_match_not_cal < ix_ta_ix0, dtype=float) / 2, nt + ) row_ta = np.arange(nt * nx_nm) col_taf = np.tile(np.arange(nt, dtype=int), nx_nm) col_tab = np.tile(np.arange(nt, 2 * nt, dtype=int), nx_nm) - mask_taf = data_taf.astype( - bool) # only store non-zeros in sparse m - mask_tab = data_tab.astype( - bool) # only store non-zeros in sparse m + mask_taf = data_taf.astype(bool) # only store non-zeros in sparse m + mask_tab = data_tab.astype(bool) # only store non-zeros in sparse m TA_eq3_list.append( sp.coo_matrix( ( - np.concatenate( - (data_taf[mask_taf], data_tab[mask_tab])), ( - np.concatenate( - (row_ta[mask_taf], row_ta[mask_tab])), - np.concatenate( - (col_taf[mask_taf], col_tab[mask_tab])))), + np.concatenate((data_taf[mask_taf], data_tab[mask_tab])), + ( + np.concatenate((row_ta[mask_taf], row_ta[mask_tab])), + np.concatenate((col_taf[mask_taf], col_tab[mask_tab])), + ), + ), shape=(nt * nx_nm, 2 * nt), - copy=False)) + copy=False, + ) + ) Z_TA_eq1 = sp.hstack(TA_eq1_list) Z_TA_eq2 = sp.hstack(TA_eq2_list) @@ -932,9 +1049,19 @@ def construct_submatrices_matching_sections( Z_TA_eq3 = sp.coo_matrix(([], ([], [])), shape=(nt * nx_nm, 0)) return ( - E_match_F, E_match_B, E_match_no_cal, Z_TA_eq1, Z_TA_eq2, Z_TA_eq3, - d_no_cal, ix_from_cal_match_to_glob, ix_match_not_cal, Zero_eq12_gamma, - Zero_eq3_gamma, Zero_d_eq12) + E_match_F, + E_match_B, + E_match_no_cal, + Z_TA_eq1, + Z_TA_eq2, + Z_TA_eq3, + d_no_cal, + ix_from_cal_match_to_glob, + ix_match_not_cal, + Zero_eq12_gamma, + Zero_eq3_gamma, + Zero_d_eq12, + ) def construct_submatrices(nt, nx, ds, trans_att, x_sec): @@ -955,31 +1082,30 @@ def construct_submatrices(nt, nx, ds, trans_att, x_sec): # Z \gamma # Eq.47 cal_ref = np.array( - ds.ufunc_per_section( - label='st', ref_temp_broadcasted=True, calc_per='all')) + ds.ufunc_per_section(label="st", ref_temp_broadcasted=True, calc_per="all") + ) data_gamma = 1 / (cal_ref.ravel() + 273.15) # gamma coord_gamma_row = np.arange(nt * nx, dtype=int) coord_gamma_col = np.zeros(nt * nx, dtype=int) Z_gamma = sp.coo_matrix( - (data_gamma, (coord_gamma_row, coord_gamma_col)), - shape=(nt * nx, 1), - copy=False) + (data_gamma, (coord_gamma_row, coord_gamma_col)), shape=(nt * nx, 1), copy=False + ) # Z D # Eq.47 data_c = np.ones(nt * nx, dtype=float) coord_c_row = np.arange(nt * nx, dtype=int) coord_c_col = np.tile(np.arange(nt, dtype=int), nx) Z_D = sp.coo_matrix( - (data_c, (coord_c_row, coord_c_col)), shape=(nt * nx, nt), copy=False) + (data_c, (coord_c_row, coord_c_col)), shape=(nt * nx, nt), copy=False + ) # E # Eq.47 # E is 0 at ix=0 data_c = np.ones(nt * (nx - 1), dtype=float) coord_c_row = np.arange(nt, nt * nx, dtype=int) coord_c_col = np.repeat(np.arange(nx - 1, dtype=int), nt) E = sp.coo_matrix( - (data_c, (coord_c_row, coord_c_col)), - shape=(nt * nx, (nx - 1)), - copy=False) + (data_c, (coord_c_row, coord_c_col)), shape=(nt * nx, (nx - 1)), copy=False + ) # Zero # Eq.45 Zero_d = sp.coo_matrix(([], ([], [])), shape=(nt * nx, nt)) # Zero_E = sp.coo_matrix(([], ([], [])), shape=(nt * nx, (nx - 1))) @@ -999,8 +1125,7 @@ def construct_submatrices(nt, nx, ds, trans_att, x_sec): elif trans_atti <= x_sec[0]: ix_sec_ta_ix0 = 0 else: - ix_sec_ta_ix0 = np.flatnonzero( - x_sec >= trans_atti)[0] + ix_sec_ta_ix0 = np.flatnonzero(x_sec >= trans_atti)[0] # Data is -1 for both forward and backward # I_fw = 1/Tref*gamma - D_fw - E - TA_fw. Eq40 @@ -1009,24 +1134,26 @@ def construct_submatrices(nt, nx, ds, trans_att, x_sec): # the connector. coord_ta_fw_row = np.arange(nt * ix_sec_ta_ix0, nt * nx, dtype=int) # nt parameters - coord_ta_fw_col = np.tile( - np.arange(nt, dtype=int), nx - ix_sec_ta_ix0) + coord_ta_fw_col = np.tile(np.arange(nt, dtype=int), nx - ix_sec_ta_ix0) TA_fw_list.append( sp.coo_matrix( (data_ta_fw, (coord_ta_fw_row, coord_ta_fw_col)), shape=(nt * nx, 2 * nt), - copy=False)) # TA_fw + copy=False, + ) + ) # TA_fw # I_bw = 1/Tref*gamma - D_bw + E - TA_bw. Eq41 data_ta_bw = -np.ones(nt * ix_sec_ta_ix0, dtype=float) coord_ta_bw_row = np.arange(nt * ix_sec_ta_ix0, dtype=int) - coord_ta_bw_col = np.tile( - np.arange(nt, 2 * nt, dtype=int), ix_sec_ta_ix0) + coord_ta_bw_col = np.tile(np.arange(nt, 2 * nt, dtype=int), ix_sec_ta_ix0) TA_bw_list.append( sp.coo_matrix( (data_ta_bw, (coord_ta_bw_row, coord_ta_bw_col)), shape=(nt * nx, 2 * nt), - copy=False)) # TA_bw + copy=False, + ) + ) # TA_bw Z_TA_fw = sp.hstack(TA_fw_list) Z_TA_bw = sp.hstack(TA_bw_list) @@ -1038,14 +1165,15 @@ def construct_submatrices(nt, nx, ds, trans_att, x_sec): def wls_sparse( - X, - y, - w=1., - calc_cov=False, - verbose=False, - x0=None, - return_werr=False, - **solver_kwargs): + X, + y, + w=1.0, + calc_cov=False, + verbose=False, + x0=None, + return_werr=False, + **solver_kwargs, +): """ If some initial estimate x0 is known and if damp == 0, one could proceed as follows: - Compute a residual vector r0 = b - A*x0. @@ -1072,34 +1200,35 @@ def wls_sparse( # of the error. if x0 is not None: - assert np.all(np.isfinite(x0)), 'Nan/inf in p0 initial estimate' + assert np.all(np.isfinite(x0)), "Nan/inf in p0 initial estimate" else: raise NotImplementedError if sp.issparse(X): - assert np.all(np.isfinite(X.data)), 'Nan/inf in X: check ' +\ - 'reference temperatures?' + assert np.all(np.isfinite(X.data)), ( + "Nan/inf in X: check " + "reference temperatures?" + ) else: - assert np.all(np.isfinite(X)), 'Nan/inf in X: check ' +\ - 'reference temperatures?' - assert np.all(np.isfinite(w)), 'Nan/inf in weights' - assert np.all(np.isfinite(y)), 'Nan/inf in observations' + assert np.all(np.isfinite(X)), ( + "Nan/inf in X: check " + "reference temperatures?" + ) + assert np.all(np.isfinite(w)), "Nan/inf in weights" + assert np.all(np.isfinite(y)), "Nan/inf in observations" # precision up to 10th decimal. So that the temperature is approximately # estimated with 8 decimal precision. - if 'atol' not in solver_kwargs: - solver_kwargs['atol'] = 1e-16 - if 'btol' not in solver_kwargs: - solver_kwargs['btol'] = 1e-16 + if "atol" not in solver_kwargs: + solver_kwargs["atol"] = 1e-16 + if "btol" not in solver_kwargs: + solver_kwargs["btol"] = 1e-16 if w is None: # gracefully default to unweighted - w = 1. + w = 1.0 w_std = np.asarray(np.sqrt(w)) wy = np.asarray(w_std * y) - w_std = np.broadcast_to( - np.atleast_2d(np.squeeze(w_std)).T, (X.shape[0], 1)) + w_std = np.broadcast_to(np.atleast_2d(np.squeeze(w_std)).T, (X.shape[0], 1)) if not sp.issparse(X): wX = w_std * X @@ -1115,8 +1244,7 @@ def wls_sparse( wr0 = wy - wX.dot(x0) # noinspection PyTypeChecker - out_sol = ln.lsqr( - wX, wr0, show=verbose, calc_var=True, **solver_kwargs) + out_sol = ln.lsqr(wX, wr0, show=verbose, calc_var=True, **solver_kwargs) p_sol = x0 + out_sol[0] @@ -1133,8 +1261,7 @@ def wls_sparse( if sp.issparse(arg): # arg_inv = np.linalg.inv(arg.toarray()) - arg_inv = np.linalg.lstsq( - arg.todense(), np.eye(npar), rcond=None)[0] + arg_inv = np.linalg.lstsq(arg.todense(), np.eye(npar), rcond=None)[0] else: # arg_inv = np.linalg.inv(arg) arg_inv = np.linalg.lstsq(arg, np.eye(npar), rcond=None)[0] @@ -1143,8 +1270,10 @@ def wls_sparse( p_var = np.diagonal(p_cov) if np.any(p_var < 0): - m = 'Unable to invert the matrix. The following parameters are ' \ - 'difficult to determine:' + str(np.where(p_var < 0)) + m = ( + "Unable to invert the matrix. The following parameters are " + "difficult to determine:" + str(np.where(p_var < 0)) + ) assert np.all(p_var >= 0), m if return_werr: @@ -1158,8 +1287,7 @@ def wls_sparse( return p_sol, p_var -def wls_stats( - X, y, w=1., calc_cov=False, x0=None, return_werr=False, verbose=False): +def wls_stats(X, y, w=1.0, calc_cov=False, x0=None, return_werr=False, verbose=False): """ Parameters @@ -1209,22 +1337,22 @@ def wls_stats( def calc_alpha_double( - mode, - ds, - st_var=None, - ast_var=None, - rst_var=None, - rast_var=None, - ix_alpha_is_zero=-1, - talpha_fw=None, - talpha_bw=None, - talpha_fw_var=None, - talpha_bw_var=None): + mode, + ds, + st_var=None, + ast_var=None, + rst_var=None, + rast_var=None, + ix_alpha_is_zero=-1, + talpha_fw=None, + talpha_bw=None, + talpha_fw_var=None, + talpha_bw_var=None, +): """Eq.50 if weighted least squares. Assumes ds has `trans_att` pre-configured.""" - assert ix_alpha_is_zero >= 0, 'Define ix_alpha_is_zero' + \ - str(ix_alpha_is_zero) + assert ix_alpha_is_zero >= 0, "Define ix_alpha_is_zero" + str(ix_alpha_is_zero) if st_var is not None: if callable(st_var): @@ -1244,19 +1372,17 @@ def calc_alpha_double( else: rast_var_val = np.asarray(rast_var) - i_var_fw = ds.i_var( - st_var_val, ast_var_val, st_label='st', ast_label='ast') - i_var_bw = ds.i_var( - rst_var_val, rast_var_val, st_label='rst', ast_label='rast') + i_var_fw = ds.i_var(st_var_val, ast_var_val, st_label="st", ast_label="ast") + i_var_bw = ds.i_var(rst_var_val, rast_var_val, st_label="rst", ast_label="rast") i_fw = np.log(ds.st / ds.ast) i_bw = np.log(ds.rst / ds.rast) - if mode == 'guess': + if mode == "guess": A_var = (i_var_fw + i_var_bw) / 2 A = (i_bw - i_fw) / 2 - elif mode == 'exact': + elif mode == "exact": D_F = ds["df"] D_B = ds["db"] D_F_var = ds["df_var"] @@ -1266,53 +1392,61 @@ def calc_alpha_double( # Can be improved by including covariances. That reduces the # uncert. - ta_arr_fw = np.zeros((ds.x.size, ds['time'].size)) - ta_arr_fw_var = np.zeros((ds.x.size, ds['time'].size)) + ta_arr_fw = np.zeros((ds.x.size, ds["time"].size)) + ta_arr_fw_var = np.zeros((ds.x.size, ds["time"].size)) for tai, taxi, tai_var in zip( - talpha_fw.T, ds.trans_att.values, talpha_fw_var.T): - ta_arr_fw[ds.x.values >= taxi] = \ + talpha_fw.T, ds.trans_att.values, talpha_fw_var.T + ): + ta_arr_fw[ds.x.values >= taxi] = ( ta_arr_fw[ds.x.values >= taxi] + tai - ta_arr_fw_var[ds.x.values >= taxi] = \ + ) + ta_arr_fw_var[ds.x.values >= taxi] = ( ta_arr_fw_var[ds.x.values >= taxi] + tai_var + ) - ta_arr_bw = np.zeros((ds.x.size, ds['time'].size)) - ta_arr_bw_var = np.zeros((ds.x.size, ds['time'].size)) + ta_arr_bw = np.zeros((ds.x.size, ds["time"].size)) + ta_arr_bw_var = np.zeros((ds.x.size, ds["time"].size)) for tai, taxi, tai_var in zip( - talpha_bw.T, ds.trans_att.values, talpha_bw_var.T): - ta_arr_bw[ds.x.values < taxi] = \ - ta_arr_bw[ds.x.values < taxi] + tai - ta_arr_bw_var[ds.x.values < taxi] = \ + talpha_bw.T, ds.trans_att.values, talpha_bw_var.T + ): + ta_arr_bw[ds.x.values < taxi] = ta_arr_bw[ds.x.values < taxi] + tai + ta_arr_bw_var[ds.x.values < taxi] = ( ta_arr_bw_var[ds.x.values < taxi] + tai_var + ) A_var = ( - i_var_fw + i_var_bw + D_B_var + D_F_var + ta_arr_fw_var - + ta_arr_bw_var) / 2 - A = (i_bw - i_fw) / 2 + (D_B - D_F) / 2 + ( - ta_arr_bw - ta_arr_fw) / 2 + i_var_fw + + i_var_bw + + D_B_var + + D_F_var + + ta_arr_fw_var + + ta_arr_bw_var + ) / 2 + A = (i_bw - i_fw) / 2 + (D_B - D_F) / 2 + (ta_arr_bw - ta_arr_fw) / 2 else: A_var = (i_var_fw + i_var_bw + D_B_var + D_F_var) / 2 A = (i_bw - i_fw) / 2 + (D_B - D_F) / 2 - E_var = 1 / (1 / A_var).sum(dim='time') - E = (A / A_var).sum(dim='time') * E_var + E_var = 1 / (1 / A_var).sum(dim="time") + E = (A / A_var).sum(dim="time") * E_var else: i_fw = np.log(ds.st / ds.ast) i_bw = np.log(ds.rst / ds.rast) - if mode == 'guess': + if mode == "guess": A = (i_bw - i_fw) / 2 - elif mode == 'exact': + elif mode == "exact": D_F = ds["df"] D_B = ds["db"] A = (i_bw - i_fw) / 2 + (D_B - D_F) / 2 - E_var = A.var(dim='time') - E = A.mean(dim='time') + E_var = A.var(dim="time") + E = A.mean(dim="time") # E is defined zero at the first index of the reference sections - if mode == 'guess': + if mode == "guess": E -= E.isel(x=ix_alpha_is_zero) # assert np.abs(E.isel(x=ix_alpha_is_zero)) < 1e-8, \ @@ -1323,16 +1457,18 @@ def calc_alpha_double( def calc_df_db_double_est(ds, ix_alpha_is_zero, gamma_est): Ifwx0 = np.log( - ds.st.isel(x=ix_alpha_is_zero) - / ds.ast.isel(x=ix_alpha_is_zero)).values + ds.st.isel(x=ix_alpha_is_zero) / ds.ast.isel(x=ix_alpha_is_zero) + ).values Ibwx0 = np.log( - ds.rst.isel(x=ix_alpha_is_zero) - / ds.rast.isel(x=ix_alpha_is_zero)).values + ds.rst.isel(x=ix_alpha_is_zero) / ds.rast.isel(x=ix_alpha_is_zero) + ).values ref_temps_refs = ds.ufunc_per_section( - label='st', ref_temp_broadcasted=True, calc_per='all') - ix_sec = ds.ufunc_per_section(x_indices=True, calc_per='all') - ref_temps_x0 = ref_temps_refs[ - ix_sec == ix_alpha_is_zero].flatten().compute() + 273.15 + label="st", ref_temp_broadcasted=True, calc_per="all" + ) + ix_sec = ds.ufunc_per_section(x_indices=True, calc_per="all") + ref_temps_x0 = ( + ref_temps_refs[ix_sec == ix_alpha_is_zero].flatten().compute() + 273.15 + ) df_est = gamma_est / ref_temps_x0 - Ifwx0 db_est = gamma_est / ref_temps_x0 - Ibwx0 return df_est, db_est @@ -1360,19 +1496,27 @@ def match_sections(ds, matching_sections): hxs = ds.x.sel(x=hslice).size txs = ds.x.sel(x=tslice).size - assert hxs == txs, 'the two sections do not have matching ' \ - 'number of items: ' + str(hslice) + 'size: ' + \ - str(hxs) + str(tslice) + 'size: ' + str(txs) + assert ( + hxs == txs + ), "the two sections do not have matching " "number of items: " + str( + hslice + ) + "size: " + str( + hxs + ) + str( + tslice + ) + "size: " + str( + txs + ) hix = ds.ufunc_per_section( - sections={0: [i[0] for i in matching_sections]}, - x_indices=True, - calc_per='all') + sections={0: [i[0] for i in matching_sections]}, x_indices=True, calc_per="all" + ) tixl = [] for _, tslice, reverse_flag in matching_sections: ixi = ds.ufunc_per_section( - sections={0: [tslice]}, x_indices=True, calc_per='all') + sections={0: [tslice]}, x_indices=True, calc_per="all" + ) if reverse_flag: tixl.append(ixi[::-1]) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 35540e34..ae5c92b0 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -36,11 +36,11 @@ from dtscalibration.io import silixa_xml_version_check from dtscalibration.io import ziphandle_to_filepathlist -dtsattr_namelist = ['double_ended_flag'] +dtsattr_namelist = ["double_ended_flag"] dim_attrs = {k: v for kl, v in _dim_attrs.items() for k in kl} warnings.filterwarnings( - 'ignore', - message='xarray subclass DataStore should explicitly define __slots__') + "ignore", message="xarray subclass DataStore should explicitly define __slots__" +) class DataStore(xr.Dataset): @@ -89,6 +89,7 @@ class DataStore(xr.Dataset): dtscalibration.open_datastore : Load (calibrated) measurements from netCDF-like file """ + def __init__(self, *args, autofill_dim_attrs=True, **kwargs): super().__init__(*args, **kwargs) @@ -98,23 +99,24 @@ def __init__(self, *args, autofill_dim_attrs=True, **kwargs): all_dim = list(self.dims) if all_dim: - if 'x' in all_dim: - ideal_dim.append('x') - all_dim.pop(all_dim.index('x')) + if "x" in all_dim: + ideal_dim.append("x") + all_dim.pop(all_dim.index("x")) - if 'time': - if 'time' in all_dim: - ideal_dim.append('time') - all_dim.pop(all_dim.index('time')) + if "time": + if "time" in all_dim: + ideal_dim.append("time") + all_dim.pop(all_dim.index("time")) ideal_dim += all_dim for name, var in self._variables.items(): var_dims = tuple( - dim for dim in ideal_dim if dim in (var.dims + (...,))) + dim for dim in ideal_dim if dim in (var.dims + (...,)) + ) self._variables[name] = var.transpose(*var_dims) - if 'trans_att' not in self.coords: + if "trans_att" not in self.coords: self.set_trans_att(trans_att=[]) # Get attributes from dataset @@ -128,11 +130,11 @@ def __init__(self, *args, autofill_dim_attrs=True, **kwargs): if name in dim_attrs and not self.coords[name].attrs: self.coords[name].attrs = dim_attrs[name] - if '_sections' not in self.attrs: - self.attrs['_sections'] = yaml.dump(None) + if "_sections" not in self.attrs: + self.attrs["_sections"] = yaml.dump(None) - if 'sections' in kwargs: - self.sections = kwargs['sections'] + if "sections" in kwargs: + self.sections = kwargs["sections"] pass @@ -141,53 +143,52 @@ def __repr__(self): # 'xarray' is prepended. so we remove it and add 'dtscalibration' s = xr.core.formatting.dataset_repr(self) name_module = type(self).__name__ - preamble_new = u'' % name_module + preamble_new = "" % name_module # Add sections to new preamble - preamble_new += '\nSections:' - if hasattr(self, '_sections') and self.sections: - preamble_new += '\n' + preamble_new += "\nSections:" + if hasattr(self, "_sections") and self.sections: + preamble_new += "\n" - if 'units' in self.x: + if "units" in self.x: unit = self.x.units else: - unit = '' + unit = "" for k, v in self.sections.items(): - preamble_new += ' {0: <23}'.format(k) + preamble_new += " {0: <23}".format(k) # Compute statistics reference section timeseries - sec_stat = '({0:6.2f}'.format(float(self[k].mean())) - sec_stat += ' +/-{0:5.2f}'.format(float(self[k].std())) - sec_stat += u'\N{DEGREE SIGN}C)\t' + sec_stat = "({0:6.2f}".format(float(self[k].mean())) + sec_stat += " +/-{0:5.2f}".format(float(self[k].std())) + sec_stat += "\N{DEGREE SIGN}C)\t" preamble_new += sec_stat # print sections vl = [ - '{0:.2f}{2} - {1:.2f}{2}'.format(vi.start, vi.stop, unit) - for vi in v] - preamble_new += ' and '.join(vl) + '\n' + "{0:.2f}{2} - {1:.2f}{2}".format(vi.start, vi.stop, unit) + for vi in v + ] + preamble_new += " and ".join(vl) + "\n" else: - preamble_new += 18 * ' ' + '()\n' + preamble_new += 18 * " " + "()\n" # add new preamble to the remainder of the former __repr__ len_preamble_old = 8 + len(name_module) + 2 # untill the attribute listing - attr_index = s.find('Attributes:') + attr_index = s.find("Attributes:") # abbreviate attribute listing - attr_list_all = s[attr_index:].split(sep='\n') + attr_list_all = s[attr_index:].split(sep="\n") if len(attr_list_all) > 10: - s_too_many = ['\n.. and many more attributes. See: ds.attrs'] + s_too_many = ["\n.. and many more attributes. See: ds.attrs"] attr_list = attr_list_all[:10] + s_too_many else: attr_list = attr_list_all - s_out = ( - preamble_new + s[len_preamble_old:attr_index] - + '\n'.join(attr_list)) + s_out = preamble_new + s[len_preamble_old:attr_index] + "\n".join(attr_list) # return new __repr__ return s_out @@ -216,9 +217,9 @@ def sections(self): ------- """ - if '_sections' not in self.attrs: - self.attrs['_sections'] = yaml.dump(None) - return yaml.load(self.attrs['_sections'], Loader=yaml.UnsafeLoader) + if "_sections" not in self.attrs: + self.attrs["_sections"] = yaml.dump(None) + return yaml.load(self.attrs["_sections"], Loader=yaml.UnsafeLoader) @sections.setter def sections(self, sections: Dict[str, List[slice]]): @@ -230,8 +231,8 @@ def sections(self, sections: Dict[str, List[slice]]): # be less restrictive for capitalized labels # find lower cases label labels = np.reshape( - [[s.lower(), s] for s in self.data_vars.keys()], - (-1,)).tolist() + [[s.lower(), s] for s in self.data_vars.keys()], (-1,) + ).tolist() sections_fix = dict() for k, v in sections.items(): @@ -241,45 +242,48 @@ def sections(self, sections: Dict[str, List[slice]]): k_normal_case = labels[i_normal_case] sections_fix[k_normal_case] = v else: - assert k in self.data_vars, 'The keys of the ' \ - 'sections-dictionary should ' \ - 'refer to a valid timeserie ' \ - 'already stored in ' \ - 'ds.data_vars ' + assert k in self.data_vars, ( + "The keys of the " + "sections-dictionary should " + "refer to a valid timeserie " + "already stored in " + "ds.data_vars " + ) sections_fix_slice_fixed = dict() for k, v in sections_fix.items(): - assert isinstance(v, (list, tuple)), \ - 'The values of the sections-dictionary ' \ - 'should be lists of slice objects.' + assert isinstance(v, (list, tuple)), ( + "The values of the sections-dictionary " + "should be lists of slice objects." + ) for vi in v: - assert isinstance(vi, slice), \ - 'The values of the sections-dictionary should ' \ - 'be lists of slice objects.' + assert isinstance(vi, slice), ( + "The values of the sections-dictionary should " + "be lists of slice objects." + ) - assert self.x.sel(x=vi).size > 0, \ - f'Better define the {k} section. You tried {vi}, ' \ - 'which is out of reach' + assert self.x.sel(x=vi).size > 0, ( + f"Better define the {k} section. You tried {vi}, " + "which is out of reach" + ) # sorted stretches - stretch_unsort = [ - slice(float(vi.start), float(vi.stop)) for vi in v] + stretch_unsort = [slice(float(vi.start), float(vi.stop)) for vi in v] stretch_start = [i.start for i in stretch_unsort] stretch_i_sorted = np.argsort(stretch_start) sections_fix_slice_fixed[k] = [ - stretch_unsort[i] for i in stretch_i_sorted] + stretch_unsort[i] for i in stretch_i_sorted + ] # Prevent overlapping slices ix_sec = self.ufunc_per_section( - sections=sections_fix_slice_fixed, - x_indices=True, - calc_per='all') - assert np.unique(ix_sec).size == ix_sec.size, \ - "The sections are overlapping" + sections=sections_fix_slice_fixed, x_indices=True, calc_per="all" + ) + assert np.unique(ix_sec).size == ix_sec.size, "The sections are overlapping" - self.attrs['_sections'] = yaml.dump(sections_fix_slice_fixed) + self.attrs["_sections"] = yaml.dump(sections_fix_slice_fixed) pass @sections.deleter @@ -296,17 +300,19 @@ def is_double_ended(self) -> float: ------- """ - if 'isDoubleEnded' in self.attrs: - return bool(int(self.attrs['isDoubleEnded'])) - elif 'customData:isDoubleEnded' in self.attrs: + if "isDoubleEnded" in self.attrs: + return bool(int(self.attrs["isDoubleEnded"])) + elif "customData:isDoubleEnded" in self.attrs: # backward compatible to when only silixa files were supported - return bool(int(self.attrs['customData:isDoubleEnded'])) + return bool(int(self.attrs["customData:isDoubleEnded"])) else: - raise ValueError("Could not determine if the data was from a double-ended setup.") + raise ValueError( + "Could not determine if the data was from a double-ended setup." + ) @is_double_ended.setter def is_double_ended(self, flag: bool): - self.attrs['isDoubleEnded'] = flag + self.attrs["isDoubleEnded"] = flag pass @property @@ -318,7 +324,7 @@ def chfw(self) -> float: ------- """ - return int(self.attrs['forwardMeasurementChannel']) - 1 # zero-based + return int(self.attrs["forwardMeasurementChannel"]) - 1 # zero-based @property def chbw(self): @@ -330,8 +336,7 @@ def chbw(self): """ if self.is_double_ended: - return int( - self.attrs['reverseMeasurementChannel']) - 1 # zero-based + return int(self.attrs["reverseMeasurementChannel"]) - 1 # zero-based else: return None @@ -345,22 +350,23 @@ def channel_configuration(self): """ d = { - 'chfw': - { - 'st_label': 'st', - 'ast_label': 'ast', - 'acquisitiontime_label': 'userAcquisitionTimeFW', - 'time_start_label': 'timeFWstart', - 'time_label': 'timeFW', - 'time_end_label': 'timeFWend'}, - 'chbw': - { - 'st_label': 'rst', - 'ast_label': 'rast', - 'acquisitiontime_label': 'userAcquisitionTimeBW', - 'time_start_label': 'timeBWstart', - 'time_label': 'timeBW', - 'time_end_label': 'timeBWend'}} + "chfw": { + "st_label": "st", + "ast_label": "ast", + "acquisitiontime_label": "userAcquisitionTimeFW", + "time_start_label": "timeFWstart", + "time_label": "timeFW", + "time_end_label": "timeFWend", + }, + "chbw": { + "st_label": "rst", + "ast_label": "rast", + "acquisitiontime_label": "userAcquisitionTimeBW", + "time_start_label": "timeBWstart", + "time_label": "timeBW", + "time_end_label": "timeBWend", + }, + } return d @property @@ -368,18 +374,19 @@ def timeseries_keys(self): """ Returns the keys of all timeseires that can be used for calibration. """ - return [k for k, v in self.data_vars.items() if v.dims == ('time',)] + return [k for k, v in self.data_vars.items() if v.dims == ("time",)] def to_netcdf( - self, - path=None, - mode='w', - format=None, - group=None, - engine=None, - encoding=None, - unlimited_dims=None, - compute=True): + self, + path=None, + mode="w", + format=None, + group=None, + engine=None, + encoding=None, + unlimited_dims=None, + compute=True, + ): """Write datastore contents to a netCDF file. Parameters @@ -444,12 +451,12 @@ def to_netcdf( encoding = self.get_default_encoding() if engine is None: - engine = 'netcdf4' + engine = "netcdf4" # Fix Bart Schilperoort: netCDF doesn't like None's for attribute, value in self.attrs.items(): if value is None: - self.attrs[attribute] = '' + self.attrs[attribute] = "" return super(DataStore, self).to_netcdf( path, @@ -459,19 +466,21 @@ def to_netcdf( engine=engine, encoding=encoding, unlimited_dims=unlimited_dims, - compute=compute) + compute=compute, + ) def to_mf_netcdf( - self, - folder_path=None, - filename_preamble='file_', - filename_extension='.nc', - format='netCDF4', - engine='netcdf4', - encoding=None, - mode='w', - compute=True, - time_chunks_from_key='st'): + self, + folder_path=None, + filename_preamble="file_", + filename_extension=".nc", + format="netCDF4", + engine="netcdf4", + encoding=None, + mode="w", + compute=True, + time_chunks_from_key="st", + ): """Write DataStore to multiple to multiple netCDF files. Splits the DataStore along the time dimension using the chunks. It @@ -550,22 +559,24 @@ def to_mf_netcdf( try: # This fails if not all chunks of the data_vars are time aligned. # In case we let Dask estimate an optimal chunk size. - t_chunks = self.chunks['time'] + t_chunks = self.chunks["time"] except: # noqa: E722 - if self[time_chunks_from_key].dims == ('x', 'time'): + if self[time_chunks_from_key].dims == ("x", "time"): _, t_chunks = da.ones( self[time_chunks_from_key].shape, - chunks=(-1, 'auto'), - dtype='float64').chunks + chunks=(-1, "auto"), + dtype="float64", + ).chunks - elif self[time_chunks_from_key].dims == ('time', 'x'): + elif self[time_chunks_from_key].dims == ("time", "x"): _, t_chunks = da.ones( self[time_chunks_from_key].shape, - chunks=('auto', -1), - dtype='float64').chunks + chunks=("auto", -1), + dtype="float64", + ).chunks else: - assert 0, 'something went wrong with your Stokes dimensions' + assert 0, "something went wrong with your Stokes dimensions" bnds = np.cumsum((0,) + t_chunks) x = [range(bu, bd) for bu, bd in zip(bnds[:-1], bnds[1:])] @@ -574,15 +585,17 @@ def to_mf_netcdf( paths = [ os.path.join( folder_path, - filename_preamble + "{:04d}".format(ix) + filename_extension) - for ix in range(len(x))] + filename_preamble + "{:04d}".format(ix) + filename_extension, + ) + for ix in range(len(x)) + ] encodings = [] for ids, ds in enumerate(datasets): if encoding is None: encodings.append( - ds.get_default_encoding( - time_chunks_from_key=time_chunks_from_key)) + ds.get_default_encoding(time_chunks_from_key=time_chunks_from_key) + ) else: encodings.append(encoding[ids]) @@ -598,8 +611,11 @@ def to_mf_netcdf( engine, compute=compute, multifile=True, - encoding=enc) - for ds, path, enc in zip(datasets, paths, encodings)]) + encoding=enc, + ) + for ds, path, enc in zip(datasets, paths, encodings) + ] + ) try: writes = [w.sync(compute=compute) for w in writers] @@ -611,15 +627,14 @@ def to_mf_netcdf( if not compute: def _finalize_store(write, store): - """ Finalize this store by explicitly syncing and closing""" + """Finalize this store by explicitly syncing and closing""" del write # ensure writing is done first store.close() pass return dask.delayed( - [ - dask.delayed(_finalize_store)(w, s) - for w, s in zip(writes, stores)]) + [dask.delayed(_finalize_store)(w, s) for w, s in zip(writes, stores)] + ) pass @@ -635,16 +650,29 @@ def get_default_encoding(self, time_chunks_from_key=None): # The following variables are stored with a sufficiently large # precision in 32 bit float32l = [ - 'st', 'ast', 'rst', 'rast', 'time', 'timestart', 'tmp', 'timeend', - 'acquisitionTime', 'x'] + "st", + "ast", + "rst", + "rast", + "time", + "timestart", + "tmp", + "timeend", + "acquisitionTime", + "x", + ] int32l = [ - 'filename_tstamp', 'acquisitiontimeFW', 'acquisitiontimeBW', - 'userAcquisitionTimeFW', 'userAcquisitionTimeBW'] + "filename_tstamp", + "acquisitiontimeFW", + "acquisitiontimeBW", + "userAcquisitionTimeFW", + "userAcquisitionTimeBW", + ] # default variable compression compdata = dict( - zlib=True, complevel=6, - shuffle=False) # , least_significant_digit=None + zlib=True, complevel=6, shuffle=False + ) # , least_significant_digit=None # default coordinate compression compcoords = dict(zlib=True, complevel=4) @@ -655,52 +683,56 @@ def get_default_encoding(self, time_chunks_from_key=None): for k, v in encoding.items(): if k in float32l: - v['dtype'] = 'float32' + v["dtype"] = "float32" if k in int32l: - v['dtype'] = 'int32' + v["dtype"] = "int32" # v['_FillValue'] = -9999 # Int does not support NaN - if np.issubdtype(self[k].dtype, str) or np.issubdtype(self[k].dtype, object): + if np.issubdtype(self[k].dtype, str) or np.issubdtype( + self[k].dtype, object + ): # Compression not supported for variable length strings # https://github.com/Unidata/netcdf4-python/issues/1205 v["zlib"] = False if time_chunks_from_key is not None: # obtain optimal chunk sizes in time and x dim - if self[time_chunks_from_key].dims == ('x', 'time'): + if self[time_chunks_from_key].dims == ("x", "time"): x_chunk, t_chunk = da.ones( self[time_chunks_from_key].shape, - chunks=(-1, 'auto'), - dtype='float64').chunks + chunks=(-1, "auto"), + dtype="float64", + ).chunks - elif self[time_chunks_from_key].dims == ('time', 'x'): + elif self[time_chunks_from_key].dims == ("time", "x"): x_chunk, t_chunk = da.ones( self[time_chunks_from_key].shape, - chunks=('auto', -1), - dtype='float64').chunks + chunks=("auto", -1), + dtype="float64", + ).chunks else: - assert 0, 'something went wrong with your Stokes dimensions' + assert 0, "something went wrong with your Stokes dimensions" for k, v in encoding.items(): # By writing and compressing the data in chunks, some sort of # parallism is possible. - if self[k].dims == ('x', 'time'): + if self[k].dims == ("x", "time"): chunks = (x_chunk[0], t_chunk[0]) - elif self[k].dims == ('time', 'x'): + elif self[k].dims == ("time", "x"): chunks = (t_chunk[0], x_chunk[0]) - elif self[k].dims == ('x',): + elif self[k].dims == ("x",): chunks = (x_chunk[0],) - elif self[k].dims == ('time',): + elif self[k].dims == ("time",): chunks = (t_chunk[0],) else: continue - v['chunksizes'] = chunks + v["chunksizes"] = chunks return encoding @@ -751,13 +783,10 @@ def check_deprecated_kwargs(self, kwargs): ds.tmpw.plot() """ - list_of_depr = ['st_label', 'ast_label', 'rst_label', 'rast_label'] - list_of_pending_depr = ['transient_asym_att_x', 'transient_att_x'] + list_of_depr = ["st_label", "ast_label", "rst_label", "rast_label"] + list_of_pending_depr = ["transient_asym_att_x", "transient_att_x"] - kwargs = { - k: v - for k, v in kwargs.items() - if k not in list_of_pending_depr} + kwargs = {k: v for k, v in kwargs.items() if k not in list_of_pending_depr} for k in kwargs: if k in list_of_depr: @@ -765,8 +794,10 @@ def check_deprecated_kwargs(self, kwargs): if len(kwargs) != 0: raise NotImplementedError( - 'The following keywords are not ' + 'supported: ' - + ', '.join(kwargs.keys())) + "The following keywords are not " + + "supported: " + + ", ".join(kwargs.keys()) + ) pass @@ -787,46 +818,48 @@ def rename_labels(self, assertion=True): """ re_dict = { - 'ST': 'st', - 'AST': 'ast', - 'REV-ST': 'rst', - 'REV-AST': 'rast', - 'TMP': 'tmp', - 'TMPF': 'tmpf', - 'TMPB': 'tmpb', - 'TMPW': 'tmpw'} + "ST": "st", + "AST": "ast", + "REV-ST": "rst", + "REV-AST": "rast", + "TMP": "tmp", + "TMPF": "tmpf", + "TMPB": "tmpb", + "TMPW": "tmpw", + } re_dict_err = { k: v for k, v in re_dict.items() - if k in self.data_vars and v in self.data_vars} + if k in self.data_vars and v in self.data_vars + } msg = ( - 'Unable to rename the st_labels automagically. \n' - 'Please manually rename ST->st and REV-ST->rst. The \n' - f'parameters {re_dict_err.values()} were already present') + "Unable to rename the st_labels automagically. \n" + "Please manually rename ST->st and REV-ST->rst. The \n" + f"parameters {re_dict_err.values()} were already present" + ) if assertion: assert len(re_dict_err) == 0, msg elif len(re_dict_err) != 0: print(msg) for v in re_dict_err.values(): - print(f'Variable {v} was not renamed') + print(f"Variable {v} was not renamed") re_dict2 = { k: v for k, v in re_dict.items() - if k in self.data_vars and v not in self.data_vars} + if k in self.data_vars and v not in self.data_vars + } return self.rename(re_dict2) def variance_stokes(self, *args, **kwargs): - """Backwards compatibility. See `ds.variance_stokes_constant()` - """ + """Backwards compatibility. See `ds.variance_stokes_constant()`""" return self.variance_stokes_constant(*args, **kwargs) - def variance_stokes_constant( - self, st_label, sections=None, reshape_residuals=True): + def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=True): """ Approximate the variance of the noise in Stokes intensity measurements with one value, suitable for small setups. @@ -942,15 +975,17 @@ def variance_stokes_constant( if sections: self.sections = sections else: - assert self.sections, 'sections are not defined' + assert self.sections, "sections are not defined" - assert self[st_label].dims[0] == 'x', 'Stokes are transposed' + assert self[st_label].dims[0] == "x", "Stokes are transposed" check_timestep_allclose(self, eps=0.01) data_dict = da.compute( - self.ufunc_per_section(label=st_label, calc_per='stretch'))[ - 0] # should maybe be per section. But then residuals + self.ufunc_per_section(label=st_label, calc_per="stretch") + )[ + 0 + ] # should maybe be per section. But then residuals # seem to be correlated between stretches. I don't know why.. BdT. resid_list = [] @@ -959,10 +994,10 @@ def variance_stokes_constant( nxs, nt = vi.shape npar = nt + nxs - p1 = np.ones(npar) * vi.mean()**0.5 + p1 = np.ones(npar) * vi.mean() ** 0.5 - res = minimize(func_cost, p1, args=(vi, nxs), method='Powell') - assert res.success, 'Unable to fit. Try variance_stokes_exponential' + res = minimize(func_cost, p1, args=(vi, nxs), method="Powell") + assert res.success, "Unable to fit. Try variance_stokes_exponential" fit = func_fit(res.x, nxs) resid_list.append(fit - vi) @@ -976,23 +1011,22 @@ def variance_stokes_constant( return var_I, resid else: - ix_resid = self.ufunc_per_section(x_indices=True, calc_per='all') + ix_resid = self.ufunc_per_section(x_indices=True, calc_per="all") - resid_sorted = np.full( - shape=self[st_label].shape, fill_value=np.nan) + resid_sorted = np.full(shape=self[st_label].shape, fill_value=np.nan) resid_sorted[ix_resid, :] = resid - resid_da = xr.DataArray( - data=resid_sorted, coords=self[st_label].coords) + resid_da = xr.DataArray(data=resid_sorted, coords=self[st_label].coords) return var_I, resid_da def variance_stokes_exponential( - self, - st_label, - sections=None, - use_statsmodels=False, - suppress_info=True, - reshape_residuals=True): + self, + st_label, + sections=None, + use_statsmodels=False, + suppress_info=True, + reshape_residuals=True, + ): """ Approximate the variance of the noise in Stokes intensity measurements with one value, suitable for small setups with measurements from only @@ -1117,9 +1151,9 @@ def variance_stokes_exponential( if sections: self.sections = sections else: - assert self.sections, 'sections are not defined' + assert self.sections, "sections are not defined" - assert self[st_label].dims[0] == 'x', 'Stokes are transposed' + assert self[st_label].dims[0] == "x", "Stokes are transposed" check_timestep_allclose(self, eps=0.01) @@ -1139,8 +1173,7 @@ def variance_stokes_exponential( len_stretch_list.append(_x.size) n_sections = len(len_stretch_list) # number of sections - n_locs = sum( - len_stretch_list) # total number of locations along cable used + n_locs = sum(len_stretch_list) # total number of locations along cable used # for reference. x = np.concatenate(x_list) # coordinates are already in memory @@ -1153,21 +1186,23 @@ def variance_stokes_exponential( # alpha is NOT the same for all -> one column per section coords1row = np.arange(nt * n_locs) coords1col = np.hstack( - [ - np.ones(in_locs * nt) * i - for i, in_locs in enumerate(len_stretch_list)]) # C for + [np.ones(in_locs * nt) * i for i, in_locs in enumerate(len_stretch_list)] + ) # C for # second calibration parameter is different per section and per timestep coords2row = np.arange(nt * n_locs) coords2col = np.hstack( [ np.repeat( - np.arange(i * nt + n_sections, (i + 1) * nt + n_sections), - in_locs) - for i, in_locs in enumerate(len_stretch_list)]) # C for + np.arange(i * nt + n_sections, (i + 1) * nt + n_sections), in_locs + ) + for i, in_locs in enumerate(len_stretch_list) + ] + ) # C for coords = ( np.concatenate([coords1row, coords2row]), - np.concatenate([coords1col, coords2col])) + np.concatenate([coords1col, coords2col]), + ) lny = np.log(y) w = y.copy() # 1/std. @@ -1179,10 +1214,8 @@ def variance_stokes_exponential( import statsmodels.api as sm X = sp.coo_matrix( - (data, coords), - shape=(nt * n_locs, ddof), - dtype=float, - copy=False) + (data, coords), shape=(nt * n_locs, ddof), dtype=float, copy=False + ) mod_wls = sm.WLS(lny, X.toarray(), weights=w**2) res_wls = mod_wls.fit() @@ -1195,25 +1228,29 @@ def variance_stokes_exponential( (wdata, coords), shape=(nt * n_locs, n_sections + nt * n_sections), dtype=float, - copy=False) + copy=False, + ) - wlny = (lny * w) + wlny = lny * w - p0_est = np.asarray(n_sections * [0.] + nt * n_sections * [8]) + p0_est = np.asarray(n_sections * [0.0] + nt * n_sections * [8]) # noinspection PyTypeChecker - a = ln.lsqr( - wX, wlny, x0=p0_est, show=not suppress_info, calc_var=False)[0] + a = ln.lsqr(wX, wlny, x0=p0_est, show=not suppress_info, calc_var=False)[0] beta = a[:n_sections] beta_expand_to_sec = np.hstack( [ np.repeat(float(beta[i]), leni * nt) - for i, leni in enumerate(len_stretch_list)]) + for i, leni in enumerate(len_stretch_list) + ] + ) G = np.asarray(a[n_sections:]) G_expand_to_sec = np.hstack( [ - np.repeat(G[i * nt:(i + 1) * nt], leni) - for i, leni in enumerate(len_stretch_list)]) + np.repeat(G[i * nt : (i + 1) * nt], leni) + for i, leni in enumerate(len_stretch_list) + ] + ) I_est = np.exp(G_expand_to_sec) * np.exp(x * beta_expand_to_sec) resid = I_est - y @@ -1226,41 +1263,33 @@ def variance_stokes_exponential( # added to ds resid_res = [] for leni, lenis, lenie in zip( - len_stretch_list, - nt * np.cumsum([0] + len_stretch_list[:-1]), - nt * np.cumsum(len_stretch_list)): + len_stretch_list, + nt * np.cumsum([0] + len_stretch_list[:-1]), + nt * np.cumsum(len_stretch_list), + ): try: - resid_res.append( - resid[lenis:lenie].reshape((leni, nt), order='F')) + resid_res.append(resid[lenis:lenie].reshape((leni, nt), order="F")) except: # noqa: E722 # Dask array does not support order - resid_res.append( - resid[lenis:lenie].T.reshape((nt, leni)).T) + resid_res.append(resid[lenis:lenie].T.reshape((nt, leni)).T) _resid = np.concatenate(resid_res) - _resid_x = self.ufunc_per_section(label='x', calc_per='all') + _resid_x = self.ufunc_per_section(label="x", calc_per="all") isort = np.argsort(_resid_x) resid_x = _resid_x[isort] # get indices from ufunc directly resid = _resid[isort, :] - ix_resid = np.array( - [np.argmin(np.abs(ai - self.x.data)) for ai in resid_x]) + ix_resid = np.array([np.argmin(np.abs(ai - self.x.data)) for ai in resid_x]) - resid_sorted = np.full( - shape=self[st_label].shape, fill_value=np.nan) + resid_sorted = np.full(shape=self[st_label].shape, fill_value=np.nan) resid_sorted[ix_resid, :] = resid - resid_da = xr.DataArray( - data=resid_sorted, coords=self[st_label].coords) + resid_da = xr.DataArray(data=resid_sorted, coords=self[st_label].coords) return var_I, resid_da def variance_stokes_linear( - self, - st_label, - sections=None, - nbin=50, - through_zero=True, - plot_fit=False): + self, st_label, sections=None, nbin=50, through_zero=True, plot_fit=False + ): """ Approximate the variance of the noise in Stokes intensity measurements with a linear function of the intensity, suitable for large setups. @@ -1377,12 +1406,12 @@ def variance_stokes_linear( if sections: self.sections = sections else: - assert self.sections, 'sections are not defined' + assert self.sections, "sections are not defined" - assert self[st_label].dims[0] == 'x', 'Stokes are transposed' + assert self[st_label].dims[0] == "x", "Stokes are transposed" _, resid = self.variance_stokes(st_label=st_label) - ix_sec = self.ufunc_per_section(x_indices=True, calc_per='all') + ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") st = self.isel(x=ix_sec)[st_label].values.ravel() diff_st = resid.isel(x=ix_sec).values.ravel() @@ -1394,8 +1423,11 @@ def variance_stokes_linear( if nbin_ != nbin: print( - 'Estimation of linear variance of', st_label, - 'Adjusting nbin to:', nbin_) + "Estimation of linear variance of", + st_label, + "Adjusting nbin to:", + nbin_, + ) nbin = nbin_ isort = np.argsort(st) @@ -1404,15 +1436,15 @@ def variance_stokes_linear( if through_zero: # VAR(Stokes) = slope * Stokes - offset = 0. - slope = np.linalg.lstsq( - st_sort_mean[:, None], st_sort_var, rcond=None)[0] + offset = 0.0 + slope = np.linalg.lstsq(st_sort_mean[:, None], st_sort_var, rcond=None)[0] else: # VAR(Stokes) = slope * Stokes + offset slope, offset = np.linalg.lstsq( np.hstack((st_sort_mean[:, None], np.ones((nbin, 1)))), st_sort_var, - rcond=None)[0] + rcond=None, + )[0] if offset < 0: warnings.warn( @@ -1421,30 +1453,33 @@ def variance_stokes_linear( f"not possible. Most likely, your {st_label} do " f"not vary enough to fit a linear curve. Either " f"use `through_zero` option or use " - f"`ds.variance_stokes_constant()`") + f"`ds.variance_stokes_constant()`" + ) def var_fun(stokes): return slope * stokes + offset if plot_fit: plt.figure() - plt.scatter(st_sort_mean, st_sort_var, marker='.', c='black') + plt.scatter(st_sort_mean, st_sort_var, marker=".", c="black") plt.plot( - [0., st_sort_mean[-1]], - [var_fun(0.), var_fun(st_sort_mean[-1])], - c='white', - lw=1.3) + [0.0, st_sort_mean[-1]], + [var_fun(0.0), var_fun(st_sort_mean[-1])], + c="white", + lw=1.3, + ) plt.plot( - [0., st_sort_mean[-1]], - [var_fun(0.), var_fun(st_sort_mean[-1])], - c='black', - lw=0.8) - plt.xlabel(st_label + ' intensity') - plt.ylabel(st_label + ' intensity variance') + [0.0, st_sort_mean[-1]], + [var_fun(0.0), var_fun(st_sort_mean[-1])], + c="black", + lw=0.8, + ) + plt.xlabel(st_label + " intensity") + plt.ylabel(st_label + " intensity variance") return slope, offset, st_sort_mean, st_sort_var, resid, var_fun - def i_var(self, st_var, ast_var, st_label='st', ast_label='ast'): + def i_var(self, st_var, ast_var, st_label="st", ast_label="ast"): """ Compute the variance of an observation given the stokes and anti-Stokes intensities and their variance. @@ -1504,13 +1539,14 @@ def i_var(self, st_var, ast_var, st_label='st', ast_label='ast'): return st**-2 * st_var + ast**-2 * ast_var def inverse_variance_weighted_mean( - self, - tmp1='tmpf', - tmp2='tmpb', - tmp1_var='tmpf_mc_var', - tmp2_var='tmpb_mc_var', - tmpw_store='tmpw', - tmpw_var_store='tmpw_var'): + self, + tmp1="tmpf", + tmp2="tmpb", + tmp1_var="tmpf_mc_var", + tmp2_var="tmpb_mc_var", + tmpw_store="tmpw", + tmpw_var_store="tmpw_var", + ): """ Average two temperature datasets with the inverse of the variance as weights. The two @@ -1541,18 +1577,19 @@ def inverse_variance_weighted_mean( self[tmpw_var_store] = 1 / (1 / self[tmp1_var] + 1 / self[tmp2_var]) self[tmpw_store] = ( - self[tmp1] / self[tmp1_var] - + self[tmp2] / self[tmp2_var]) * self[tmpw_var_store] + self[tmp1] / self[tmp1_var] + self[tmp2] / self[tmp2_var] + ) * self[tmpw_var_store] pass def inverse_variance_weighted_mean_array( - self, - tmp_label='tmpf', - tmp_var_label='tmpf_mc_var', - tmpw_store='tmpw', - tmpw_var_store='tmpw_var', - dim='time'): + self, + tmp_label="tmpf", + tmp_var_label="tmpf_mc_var", + tmpw_store="tmpw", + tmpw_var_store="tmpw_var", + dim="time", + ): """ Calculates the weighted average across a dimension. @@ -1569,8 +1606,9 @@ def inverse_variance_weighted_mean_array( """ self[tmpw_var_store] = 1 / (1 / self[tmp_var_label]).sum(dim=dim) - self[tmpw_store] = (self[tmp_label] / self[tmp_var_label]).sum( - dim=dim) / (1 / self[tmp_var_label]).sum(dim=dim) + self[tmpw_store] = (self[tmp_label] / self[tmp_var_label]).sum(dim=dim) / ( + 1 / self[tmp_var_label] + ).sum(dim=dim) pass @@ -1598,18 +1636,17 @@ def in_confidence_interval(self, ci_label, conf_ints=None, sections=None): if conf_ints is None: conf_ints = self[ci_label].values - assert len(conf_ints) == 2, 'Please define conf_ints' + assert len(conf_ints) == 2, "Please define conf_ints" - tmp_dn = self[ci_label].sel(CI=conf_ints[0], method='nearest') - tmp_up = self[ci_label].sel(CI=conf_ints[1], method='nearest') + tmp_dn = self[ci_label].sel(CI=conf_ints[0], method="nearest") + tmp_up = self[ci_label].sel(CI=conf_ints[1], method="nearest") ref = self.ufunc_per_section( - sections=sections, - label='st', - ref_temp_broadcasted=True, - calc_per='all') + sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" + ) ix_resid = self.ufunc_per_section( - sections=sections, x_indices=True, calc_per='all') + sections=sections, x_indices=True, calc_per="all" + ) ref_sorted = np.full(shape=tmp_dn.shape, fill_value=np.nan) ref_sorted[ix_resid, :] = ref ref_da = xr.DataArray(data=ref_sorted, coords=tmp_dn.coords) @@ -1635,40 +1672,45 @@ def set_trans_att(self, trans_att=None, **kwargs): """ - if 'transient_att_x' in kwargs: + if "transient_att_x" in kwargs: warnings.warn( "transient_att_x argument will be deprecated in version 2, " - "use trans_att", DeprecationWarning) - trans_att = kwargs['transient_att_x'] + "use trans_att", + DeprecationWarning, + ) + trans_att = kwargs["transient_att_x"] - if 'transient_asym_att_x' in kwargs: + if "transient_asym_att_x" in kwargs: warnings.warn( "transient_asym_att_x arg will be deprecated in version 2, " - "use trans_att", DeprecationWarning) - trans_att = kwargs['transient_asym_att_x'] + "use trans_att", + DeprecationWarning, + ) + trans_att = kwargs["transient_asym_att_x"] - if 'trans_att' in self.coords and self.trans_att.size > 0: + if "trans_att" in self.coords and self.trans_att.size > 0: raise_warning = 0 del_keys = [] for k, v in self.data_vars.items(): - if 'trans_att' in v.dims: + if "trans_att" in v.dims: del_keys.append(k) for del_key in del_keys: del self[del_key] if raise_warning: - m = 'trans_att was set before. All `data_vars` that make use ' \ - 'of the `trans_att` coordinates were deleted: ' + \ - str(del_keys) + m = ( + "trans_att was set before. All `data_vars` that make use " + "of the `trans_att` coordinates were deleted: " + str(del_keys) + ) warnings.warn(m) if trans_att is None: trans_att = [] - self['trans_att'] = trans_att - self.trans_att.attrs = dim_attrs['trans_att'] + self["trans_att"] = trans_att + self.trans_att.attrs = dim_attrs["trans_att"] pass def check_reference_section_values(self): @@ -1687,21 +1729,25 @@ def check_reference_section_values(self): for key in self.sections.keys(): if not np.issubdtype(self[key].dtype, np.floating): raise ValueError( - 'Data of reference temperature "' + key + 'Data of reference temperature "' + + key + '" does not have a float data type. Please ensure that ' - 'the data is of a valid type (e.g. np.float32)') + "the data is of a valid type (e.g. np.float32)" + ) if np.any(~np.isfinite(self[key].values)): raise ValueError( - 'NaN/inf value(s) found in reference temperature "' + key - + '"') + 'NaN/inf value(s) found in reference temperature "' + key + '"' + ) - if self[key].dims != ('time',): + if self[key].dims != ("time",): raise ValueError( - 'Time dimension of the reference temperature timeseries ' - + key + 'is not the same as the time dimension' - + ' of the Stokes measurement. See examples/notebooks/09' - + 'Import_timeseries.ipynb for more info') + "Time dimension of the reference temperature timeseries " + + key + + "is not the same as the time dimension" + + " of the Stokes measurement. See examples/notebooks/09" + + "Import_timeseries.ipynb for more info" + ) def calibration_single_ended( self, @@ -1718,7 +1764,7 @@ def calibration_single_ended( fix_gamma=None, fix_dalpha=None, fix_alpha=None, - **kwargs + **kwargs, ): """ Calibrate the Stokes (`ds.st`) and anti-Stokes (`ds.ast`) data to @@ -1865,7 +1911,7 @@ def calibration_single_ended( self.check_reference_section_values() nx = self.x.size - nt = self['time'].size + nt = self["time"].size nta = self.trans_att.size assert self.st.dims[0] == "x", "Stokes are transposed" @@ -1910,10 +1956,12 @@ def calibration_single_ended( if fix_alpha: assert not fix_dalpha, "Use either `fix_dalpha` or `fix_alpha`" assert fix_alpha[0].size == self.x.size, ( - "fix_alpha also needs to be defined outside the reference " "sections" + "fix_alpha also needs to be defined outside the reference " + "sections" ) assert fix_alpha[1].size == self.x.size, ( - "fix_alpha also needs to be defined outside the reference " "sections" + "fix_alpha also needs to be defined outside the reference " + "sections" ) p_val = split["p0_est_alpha"].copy() @@ -2002,14 +2050,19 @@ def calibration_single_ended( if solver == "sparse": out = wls_sparse( - X[:, ip_use], y, w=w, x0=p_val[ip_use], calc_cov=calc_cov, verbose=False + X[:, ip_use], + y, + w=w, + x0=p_val[ip_use], + calc_cov=calc_cov, + verbose=False, ) elif solver == "stats": out = wls_stats(X[:, ip_use], y, w=w, calc_cov=calc_cov, verbose=False) else: - assert 0, 'Unknown solver' + assert 0, "Unknown solver" p_val[ip_use] = out[0] p_var[ip_use] = out[1] @@ -2030,9 +2083,13 @@ def calibration_single_ended( # all below require the following solution sizes if fix_alpha: - ip = ParameterIndexSingleEnded(nt, nx, nta, includes_alpha=True, includes_dalpha=False) + ip = ParameterIndexSingleEnded( + nt, nx, nta, includes_alpha=True, includes_dalpha=False + ) else: - ip = ParameterIndexSingleEnded(nt, nx, nta, includes_alpha=False, includes_dalpha=True) + ip = ParameterIndexSingleEnded( + nt, nx, nta, includes_alpha=False, includes_dalpha=True + ) # npar = 1 + 1 + nt + nta * nt assert p_val.size == ip.npar @@ -2045,19 +2102,19 @@ def calibration_single_ended( if nta > 0: self["talpha_fw"] = ( - ("trans_att", 'time'), + ("trans_att", "time"), ip.get_taf_pars(p_val), ) self["talpha_fw_var"] = ( - ("trans_att", 'time'), + ("trans_att", "time"), ip.get_taf_pars(p_var), ) else: - self["talpha_fw"] = (("trans_att", 'time'), np.zeros((0, nt))) - self["talpha_fw_var"] = (("trans_att", 'time'), np.zeros((0, nt))) + self["talpha_fw"] = (("trans_att", "time"), np.zeros((0, nt))) + self["talpha_fw_var"] = (("trans_att", "time"), np.zeros((0, nt))) - self["c"] = (('time',), p_val[ip.c]) - self["c_var"] = (('time',), p_var[ip.c]) + self["c"] = (("time",), p_val[ip.c]) + self["c_var"] = (("time",), p_var[ip.c]) if fix_alpha: self["alpha"] = (("x",), fix_alpha[0]) @@ -2068,18 +2125,16 @@ def calibration_single_ended( self["dalpha_var"] = (tuple(), p_var[ip.dalpha].item()) self["alpha"] = self.dalpha * self.x - self["alpha_var"] = ( - self["dalpha_var"] * self.x ** 2 - ) + self["alpha_var"] = self["dalpha_var"] * self.x**2 self["talpha_fw_full"] = ( - ("x", 'time'), + ("x", "time"), ip.get_taf_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) self["talpha_fw_full_var"] = ( - ("x", 'time'), + ("x", "time"), ip.get_taf_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" ), @@ -2094,12 +2149,12 @@ def calibration_single_ended( # tmpf_var deriv_dict = dict( T_gamma_fw=tmpf / self.gamma, - T_st_fw=-(tmpf ** 2) / (self.gamma * self.st), - T_ast_fw=tmpf ** 2 / (self.gamma * self.ast), - T_c_fw=-(tmpf ** 2) / self.gamma, - T_dalpha_fw=-self.x * (tmpf ** 2) / self.gamma, - T_alpha_fw=-(tmpf ** 2) / self.gamma, - T_ta_fw=-(tmpf ** 2) / self.gamma, + T_st_fw=-(tmpf**2) / (self.gamma * self.st), + T_ast_fw=tmpf**2 / (self.gamma * self.ast), + T_c_fw=-(tmpf**2) / self.gamma, + T_dalpha_fw=-self.x * (tmpf**2) / self.gamma, + T_alpha_fw=-(tmpf**2) / self.gamma, + T_ta_fw=-(tmpf**2) / self.gamma, ) deriv_ds = xr.Dataset(deriv_dict) # self["deriv"] = deriv_ds.to_array(dim="com2") @@ -2118,12 +2173,14 @@ def calibration_single_ended( axis="time", ) var_fw_dict = dict( - dT_dst=deriv_ds.T_st_fw ** 2 * parse_st_var(self, st_var, st_label="st"), - dT_dast=deriv_ds.T_ast_fw ** 2 * parse_st_var(self, ast_var, st_label="ast"), - dT_gamma=deriv_ds.T_gamma_fw ** 2 * self["gamma_var"], - dT_dc=deriv_ds.T_c_fw ** 2 * self["c_var"], - dT_ddalpha=deriv_ds.T_alpha_fw ** 2 * self["alpha_var"], # same as dT_dalpha - dT_dta=deriv_ds.T_ta_fw ** 2 * self["talpha_fw_full_var"], + dT_dst=deriv_ds.T_st_fw**2 * parse_st_var(self, st_var, st_label="st"), + dT_dast=deriv_ds.T_ast_fw**2 + * parse_st_var(self, ast_var, st_label="ast"), + dT_gamma=deriv_ds.T_gamma_fw**2 * self["gamma_var"], + dT_dc=deriv_ds.T_c_fw**2 * self["c_var"], + dT_ddalpha=deriv_ds.T_alpha_fw**2 + * self["alpha_var"], # same as dT_dalpha + dT_dta=deriv_ds.T_ta_fw**2 * self["talpha_fw_full_var"], dgamma_dc=(2 * deriv_ds.T_gamma_fw * deriv_ds.T_c_fw * sigma2_gamma_c), dta_dgamma=(2 * deriv_ds.T_ta_fw * deriv_ds.T_gamma_fw * sigma2_tafw_gamma), dta_dc=(2 * deriv_ds.T_ta_fw * deriv_ds.T_c_fw * sigma2_tafw_c), @@ -2142,7 +2199,10 @@ def calibration_single_ended( var_fw_dict.update( dict( dgamma_ddalpha=( - 2 * deriv_ds.T_gamma_fw * deriv_ds.T_dalpha_fw * sigma2_gamma_dalpha + 2 + * deriv_ds.T_gamma_fw + * deriv_ds.T_dalpha_fw + * sigma2_gamma_dalpha ), ddalpha_dc=( 2 * deriv_ds.T_dalpha_fw * deriv_ds.T_c_fw * sigma2_dalpha_c @@ -2406,7 +2466,7 @@ def calibration_double_ended( self.check_reference_section_values() nx = self.x.size - nt = self['time'].size + nt = self["time"].size nta = self.trans_att.size ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") nx_sec = ix_sec.size @@ -2512,11 +2572,13 @@ def calibration_double_ended( n_E_in_cal = split["ix_from_cal_match_to_glob"].size p0_est = np.concatenate( ( - split["p0_est"][1: 1 + 2 * nt], - split["p0_est"][1 + 2 * nt + n_E_in_cal:], + split["p0_est"][1 : 1 + 2 * nt], + split["p0_est"][1 + 2 * nt + n_E_in_cal :], ) ) - X_E1 = sp.csr_matrix(([], ([], [])), shape=(nt * nx_sec, self.x.size)) + X_E1 = sp.csr_matrix( + ([], ([], [])), shape=(nt * nx_sec, self.x.size) + ) X_E1[:, ix_sec[1:]] = split["E"] X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] X_E = sp.vstack( @@ -2545,8 +2607,12 @@ def calibration_double_ended( X = sp.vstack( ( - sp.hstack((-split["Z_D"], split["Zero_d"], split["Z_TA_fw"])), - sp.hstack((split["Zero_d"], -split["Z_D"], split["Z_TA_bw"])), + sp.hstack( + (-split["Z_D"], split["Zero_d"], split["Z_TA_fw"]) + ), + sp.hstack( + (split["Zero_d"], -split["Z_D"], split["Z_TA_bw"]) + ), sp.hstack((split["Zero_d_eq12"], split["Z_TA_eq1"])), sp.hstack((split["Zero_d_eq12"], split["Z_TA_eq2"])), sp.hstack((split["d_no_cal"], split["Z_TA_eq3"])), @@ -2586,14 +2652,20 @@ def calibration_double_ended( # X_gamma X_E = sp.vstack((-split["E"], split["E"])) X_gamma = ( - sp.vstack((split["Z_gamma"], split["Z_gamma"])).toarray().flatten() + sp.vstack((split["Z_gamma"], split["Z_gamma"])) + .toarray() + .flatten() ) # Use only the remaining coefficients # Stack all X's X = sp.vstack( ( - sp.hstack((-split["Z_D"], split["Zero_d"], split["Z_TA_fw"])), - sp.hstack((split["Zero_d"], -split["Z_D"], split["Z_TA_bw"])), + sp.hstack( + (-split["Z_D"], split["Zero_d"], split["Z_TA_fw"]) + ), + sp.hstack( + (split["Zero_d"], -split["Z_D"], split["Z_TA_bw"]) + ), ) ) @@ -2606,7 +2678,9 @@ def calibration_double_ended( # of the observations w_ = np.concatenate((split["w_F"], split["w_B"])) w = 1 / ( - 1 / w_ + X_E.dot(fix_alpha[1][ix_sec[1:]]) + fix_gamma[1] * X_gamma + 1 / w_ + + X_E.dot(fix_alpha[1][ix_sec[1:]]) + + fix_gamma[1] * X_gamma ) # [C_1, C_2, .., C_nt, TA_fw_a_1, TA_fw_a_2, TA_fw_a_nt, @@ -2614,8 +2688,8 @@ def calibration_double_ended( # TA for connector b. p0_est = np.concatenate( ( - split["p0_est"][1: 1 + 2 * nt], - split["p0_est"][1 + 2 * nt + nx_sec - 1:], + split["p0_est"][1 : 1 + 2 * nt], + split["p0_est"][1 + 2 * nt + nx_sec - 1 :], ) ) @@ -2627,10 +2701,10 @@ def calibration_double_ended( # Added fixed gamma and its variance to the solution p_val = np.concatenate( - ([fix_gamma[0]], out[0][: 2 * nt], fix_alpha[0], out[0][2 * nt:]) + ([fix_gamma[0]], out[0][: 2 * nt], fix_alpha[0], out[0][2 * nt :]) ) p_var = np.concatenate( - ([fix_gamma[1]], out[1][: 2 * nt], fix_alpha[1], out[1][2 * nt:]) + ([fix_gamma[1]], out[1][: 2 * nt], fix_alpha[1], out[1][2 * nt :]) ) # whether it returns a copy or a view depends on what @@ -2649,7 +2723,9 @@ def calibration_double_ended( if np.any(matching_indices): # n_E_in_cal = split['ix_from_cal_match_to_glob'].size p0_est = split["p0_est"][1:] - X_E1 = sp.csr_matrix(([], ([], [])), shape=(nt * nx_sec, self.x.size)) + X_E1 = sp.csr_matrix( + ([], ([], [])), shape=(nt * nx_sec, self.x.size) + ) from_i = ix_sec[1:] X_E1[:, from_i] = split["E"] X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] @@ -2729,7 +2805,9 @@ def calibration_double_ended( else: X_gamma = ( - sp.vstack((split["Z_gamma"], split["Z_gamma"])).toarray().flatten() + sp.vstack((split["Z_gamma"], split["Z_gamma"])) + .toarray() + .flatten() ) # Use only the remaining coefficients X = sp.vstack( @@ -2775,20 +2853,24 @@ def calibration_double_ended( ds_sub = self[["st", "ast", "rst", "rast", "trans_att"]] ds_sub["df"] = (("time",), out[0][:nt]) ds_sub["df_var"] = (("time",), out[1][:nt]) - ds_sub["db"] = (("time",), out[0][nt: 2 * nt]) - ds_sub["db_var"] = (("time",), out[1][nt: 2 * nt]) + ds_sub["db"] = (("time",), out[0][nt : 2 * nt]) + ds_sub["db_var"] = (("time",), out[1][nt : 2 * nt]) if nta > 0: if np.any(matching_indices): n_E_in_cal = split["ix_from_cal_match_to_glob"].size - ta = out[0][2 * nt + n_E_in_cal:].reshape((nt, 2, nta), order="F") - ta_var = out[1][2 * nt + n_E_in_cal:].reshape( + ta = out[0][2 * nt + n_E_in_cal :].reshape( + (nt, 2, nta), order="F" + ) + ta_var = out[1][2 * nt + n_E_in_cal :].reshape( (nt, 2, nta), order="F" ) else: - ta = out[0][2 * nt + nx_sec - 1:].reshape((nt, 2, nta), order="F") - ta_var = out[1][2 * nt + nx_sec - 1:].reshape( + ta = out[0][2 * nt + nx_sec - 1 :].reshape( + (nt, 2, nta), order="F" + ) + ta_var = out[1][2 * nt + nx_sec - 1 :].reshape( (nt, 2, nta), order="F" ) @@ -2824,20 +2906,24 @@ def calibration_double_ended( [fix_gamma[0]], out[0][: 2 * nt], E_all_exact, - out[0][2 * nt + nx_sec - 1:], + out[0][2 * nt + nx_sec - 1 :], ) ) - p_val[1 + 2 * nt + ix_sec[1:]] = out[0][2 * nt: 2 * nt + nx_sec - 1] + p_val[1 + 2 * nt + ix_sec[1:]] = out[0][ + 2 * nt : 2 * nt + nx_sec - 1 + ] p_val[1 + 2 * nt + ix_sec[0]] = 0.0 p_var = np.concatenate( ( [fix_gamma[1]], out[1][: 2 * nt], E_all_var_exact, - out[1][2 * nt + nx_sec - 1:], + out[1][2 * nt + nx_sec - 1 :], ) ) - p_var[1 + 2 * nt + ix_sec[1:]] = out[1][2 * nt: 2 * nt + nx_sec - 1] + p_var[1 + 2 * nt + ix_sec[1:]] = out[1][ + 2 * nt : 2 * nt + nx_sec - 1 + ] else: n_E_in_cal = split["ix_from_cal_match_to_glob"].size @@ -2848,22 +2934,24 @@ def calibration_double_ended( [fix_gamma[0]], out[0][: 2 * nt], E_all_exact, - out[0][2 * nt + n_E_in_cal:], + out[0][2 * nt + n_E_in_cal :], ) ) - p_val[1 + 2 * nt + split["ix_from_cal_match_to_glob"]] = \ - out[0][2 * nt: 2 * nt + n_E_in_cal] + p_val[1 + 2 * nt + split["ix_from_cal_match_to_glob"]] = out[0][ + 2 * nt : 2 * nt + n_E_in_cal + ] p_val[1 + 2 * nt + ix_sec[0]] = 0.0 p_var = np.concatenate( ( [fix_gamma[1]], out[1][: 2 * nt], E_all_var_exact, - out[1][2 * nt + n_E_in_cal:], + out[1][2 * nt + n_E_in_cal :], ) ) - p_var[1 + 2 * nt + split["ix_from_cal_match_to_glob"]] = \ - out[1][2 * nt: 2 * nt + n_E_in_cal] + p_var[1 + 2 * nt + split["ix_from_cal_match_to_glob"]] = out[1][ + 2 * nt : 2 * nt + n_E_in_cal + ] p_cov = np.diag(p_var).copy() @@ -2939,7 +3027,7 @@ def calibration_double_ended( p0_est = np.concatenate( ( split["p0_est"][: 1 + 2 * nt], - split["p0_est"][1 + 2 * nt + nx_sec - 1:], + split["p0_est"][1 + 2 * nt + nx_sec - 1 :], ) ) @@ -2948,10 +3036,12 @@ def calibration_double_ended( p0_est = np.concatenate( ( split["p0_est"][: 1 + 2 * nt], - split["p0_est"][1 + 2 * nt + n_E_in_cal:], + split["p0_est"][1 + 2 * nt + n_E_in_cal :], ) ) - X_E1 = sp.csr_matrix(([], ([], [])), shape=(nt * nx_sec, self.x.size)) + X_E1 = sp.csr_matrix( + ([], ([], [])), shape=(nt * nx_sec, self.x.size) + ) X_E1[:, ix_sec[1:]] = split["E"] X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] X_E = sp.vstack( @@ -3029,7 +3119,8 @@ def calibration_double_ended( ) ) w = 1 / ( - 1 / w_ + X_E.dot(fix_alpha[1][split["ix_from_cal_match_to_glob"]]) + 1 / w_ + + X_E.dot(fix_alpha[1][split["ix_from_cal_match_to_glob"]]) ) if solver == "sparse": @@ -3040,10 +3131,10 @@ def calibration_double_ended( # Added fixed gamma and its variance to the solution p_val = np.concatenate( - (out[0][: 1 + 2 * nt], fix_alpha[0], out[0][1 + 2 * nt:]) + (out[0][: 1 + 2 * nt], fix_alpha[0], out[0][1 + 2 * nt :]) ) p_var = np.concatenate( - (out[1][: 1 + 2 * nt], fix_alpha[1], out[1][1 + 2 * nt:]) + (out[1][: 1 + 2 * nt], fix_alpha[1], out[1][1 + 2 * nt :]) ) p_cov = np.diag(p_var).copy() @@ -3082,40 +3173,40 @@ def calibration_double_ended( # save estimates and variances to datastore, skip covariances self["gamma"] = (tuple(), p_val[ip.gamma].item()) self["alpha"] = (("x",), p_val[ip.alpha]) - self["df"] = (('time',), p_val[ip.df]) - self["db"] = (('time',), p_val[ip.db]) + self["df"] = (("time",), p_val[ip.df]) + self["db"] = (("time",), p_val[ip.db]) if nta: self["talpha_fw"] = ( - ('time', "trans_att"), + ("time", "trans_att"), p_val[ip.taf].reshape((nt, nta), order="C"), ) self["talpha_bw"] = ( - ('time', "trans_att"), + ("time", "trans_att"), p_val[ip.tab].reshape((nt, nta), order="C"), ) self["talpha_fw_var"] = ( - ('time', "trans_att"), + ("time", "trans_att"), p_var[ip.taf].reshape((nt, nta), order="C"), ) self["talpha_bw_var"] = ( - ('time', "trans_att"), + ("time", "trans_att"), p_var[ip.tab].reshape((nt, nta), order="C"), ) else: - self["talpha_fw"] = (('time', "trans_att"), np.zeros((nt, 0))) - self["talpha_bw"] = (('time', "trans_att"), np.zeros((nt, 0))) - self["talpha_fw_var"] = (('time', "trans_att"), np.zeros((nt, 0))) - self["talpha_bw_var"] = (('time', "trans_att"), np.zeros((nt, 0))) + self["talpha_fw"] = (("time", "trans_att"), np.zeros((nt, 0))) + self["talpha_bw"] = (("time", "trans_att"), np.zeros((nt, 0))) + self["talpha_fw_var"] = (("time", "trans_att"), np.zeros((nt, 0))) + self["talpha_bw_var"] = (("time", "trans_att"), np.zeros((nt, 0))) self["talpha_fw_full"] = ( - ("x", 'time'), + ("x", "time"), ip.get_taf_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) self["talpha_bw_full"] = ( - ("x", 'time'), + ("x", "time"), ip.get_tab_values( pval=p_val, x=self.x.values, trans_att=self.trans_att.values, axis="" ), @@ -3145,16 +3236,16 @@ def calibration_double_ended( self["gamma_var"] = (tuple(), p_var[ip.gamma].item()) self["alpha_var"] = (("x",), p_var[ip.alpha]) - self["df_var"] = (('time',), p_var[ip.df]) - self["db_var"] = (('time',), p_var[ip.db]) + self["df_var"] = (("time",), p_var[ip.df]) + self["db_var"] = (("time",), p_var[ip.db]) self["talpha_fw_full_var"] = ( - ("x", 'time'), + ("x", "time"), ip.get_taf_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" ), ) self["talpha_bw_full_var"] = ( - ("x", 'time'), + ("x", "time"), ip.get_tab_values( pval=p_var, x=self.x.values, trans_att=self.trans_att.values, axis="" ), @@ -3222,46 +3313,47 @@ def calibration_double_ended( deriv_dict = dict( T_gamma_fw=tmpf / self.gamma, - T_st_fw=-(tmpf ** 2) / (self.gamma * self.st), - T_ast_fw=tmpf ** 2 / (self.gamma * self.ast), - T_df_fw=-(tmpf ** 2) / self.gamma, - T_alpha_fw=-(tmpf ** 2) / self.gamma, - T_ta_fw=-(tmpf ** 2) / self.gamma, + T_st_fw=-(tmpf**2) / (self.gamma * self.st), + T_ast_fw=tmpf**2 / (self.gamma * self.ast), + T_df_fw=-(tmpf**2) / self.gamma, + T_alpha_fw=-(tmpf**2) / self.gamma, + T_ta_fw=-(tmpf**2) / self.gamma, T_gamma_bw=tmpb / self.gamma, - T_rst_bw=-(tmpb ** 2) / (self.gamma * self.rst), - T_rast_bw=tmpb ** 2 / (self.gamma * self.rast), - T_db_bw=-(tmpb ** 2) / self.gamma, - T_alpha_bw=tmpb ** 2 / self.gamma, - T_ta_bw=-(tmpb ** 2) / self.gamma, + T_rst_bw=-(tmpb**2) / (self.gamma * self.rst), + T_rast_bw=tmpb**2 / (self.gamma * self.rast), + T_db_bw=-(tmpb**2) / self.gamma, + T_alpha_bw=tmpb**2 / self.gamma, + T_ta_bw=-(tmpb**2) / self.gamma, ) deriv_ds = xr.Dataset(deriv_dict) self["deriv"] = deriv_ds.to_array(dim="com2") var_fw_dict = dict( - dT_dst=deriv_ds.T_st_fw ** 2 * parse_st_var(self, st_var, st_label="st"), - dT_dast=deriv_ds.T_ast_fw ** 2 * parse_st_var(self, ast_var, st_label="ast"), - dT_gamma=deriv_ds.T_gamma_fw ** 2 * self["gamma_var"], - dT_ddf=deriv_ds.T_df_fw ** 2 * self["df_var"], - dT_dalpha=deriv_ds.T_alpha_fw ** 2 * self["alpha_var"], - dT_dta=deriv_ds.T_ta_fw ** 2 * self["talpha_fw_full_var"], + dT_dst=deriv_ds.T_st_fw**2 * parse_st_var(self, st_var, st_label="st"), + dT_dast=deriv_ds.T_ast_fw**2 + * parse_st_var(self, ast_var, st_label="ast"), + dT_gamma=deriv_ds.T_gamma_fw**2 * self["gamma_var"], + dT_ddf=deriv_ds.T_df_fw**2 * self["df_var"], + dT_dalpha=deriv_ds.T_alpha_fw**2 * self["alpha_var"], + dT_dta=deriv_ds.T_ta_fw**2 * self["talpha_fw_full_var"], dgamma_ddf=(2 * deriv_ds.T_gamma_fw * deriv_ds.T_df_fw * sigma2_gamma_df), dgamma_dalpha=( 2 * deriv_ds.T_gamma_fw * deriv_ds.T_alpha_fw * sigma2_gamma_alpha ), - dalpha_ddf=( - 2 * deriv_ds.T_alpha_fw * deriv_ds.T_df_fw * sigma2_alpha_df - ), + dalpha_ddf=(2 * deriv_ds.T_alpha_fw * deriv_ds.T_df_fw * sigma2_alpha_df), dta_dgamma=(2 * deriv_ds.T_ta_fw * deriv_ds.T_gamma_fw * sigma2_tafw_gamma), dta_ddf=(2 * deriv_ds.T_ta_fw * deriv_ds.T_df_fw * sigma2_tafw_df), dta_dalpha=(2 * deriv_ds.T_ta_fw * deriv_ds.T_alpha_fw * sigma2_tafw_alpha), ) var_bw_dict = dict( - dT_drst=deriv_ds.T_rst_bw ** 2 * parse_st_var(self, rst_var, st_label="rst"), - dT_drast=deriv_ds.T_rast_bw ** 2 * parse_st_var(self, rast_var, st_label="rast"), - dT_gamma=deriv_ds.T_gamma_bw ** 2 * self["gamma_var"], - dT_ddb=deriv_ds.T_db_bw ** 2 * self["db_var"], - dT_dalpha=deriv_ds.T_alpha_bw ** 2 * self["alpha_var"], - dT_dta=deriv_ds.T_ta_bw ** 2 * self["talpha_bw_full_var"], + dT_drst=deriv_ds.T_rst_bw**2 + * parse_st_var(self, rst_var, st_label="rst"), + dT_drast=deriv_ds.T_rast_bw**2 + * parse_st_var(self, rast_var, st_label="rast"), + dT_gamma=deriv_ds.T_gamma_bw**2 * self["gamma_var"], + dT_ddb=deriv_ds.T_db_bw**2 * self["db_var"], + dT_dalpha=deriv_ds.T_alpha_bw**2 * self["alpha_var"], + dT_dta=deriv_ds.T_ta_bw**2 * self["talpha_bw_full_var"], dgamma_ddb=(2 * deriv_ds.T_gamma_bw * deriv_ds.T_db_bw * sigma2_gamma_db), dgamma_dalpha=( 2 * deriv_ds.T_gamma_bw * deriv_ds.T_alpha_bw * sigma2_gamma_alpha @@ -3279,45 +3371,52 @@ def calibration_double_ended( self["tmpb_var"] = self["var_bw_da"].sum(dim="comp_bw") # First estimate of tmpw_var - self["tmpw_var" + "_approx"] = 1 / ( - 1 / self["tmpf_var"] + 1 / self["tmpb_var"] - ) - self["tmpw"] = ((tmpf / self["tmpf_var"] + - tmpb / self["tmpb_var"] - ) * self["tmpw_var" + "_approx"]) - 273.15 + self["tmpw_var" + "_approx"] = 1 / (1 / self["tmpf_var"] + 1 / self["tmpb_var"]) + self["tmpw"] = ( + (tmpf / self["tmpf_var"] + tmpb / self["tmpb_var"]) + * self["tmpw_var" + "_approx"] + ) - 273.15 weightsf = self["tmpw_var" + "_approx"] / self["tmpf_var"] weightsb = self["tmpw_var" + "_approx"] / self["tmpb_var"] deriv_dict2 = dict( - T_gamma_w=weightsf * deriv_dict['T_gamma_fw'] + weightsb * deriv_dict['T_gamma_bw'], - T_st_w=weightsf * deriv_dict['T_st_fw'], - T_ast_w=weightsf * deriv_dict['T_ast_fw'], - T_rst_w=weightsb * deriv_dict['T_rst_bw'], - T_rast_w=weightsb * deriv_dict['T_rast_bw'], - T_df_w=weightsf * deriv_dict['T_df_fw'], - T_db_w=weightsb * deriv_dict['T_db_bw'], - T_alpha_w=weightsf * deriv_dict['T_alpha_fw'] + weightsb * deriv_dict['T_alpha_bw'], - T_taf_w=weightsf * deriv_dict['T_ta_fw'], - T_tab_w=weightsb * deriv_dict['T_ta_bw'], + T_gamma_w=weightsf * deriv_dict["T_gamma_fw"] + + weightsb * deriv_dict["T_gamma_bw"], + T_st_w=weightsf * deriv_dict["T_st_fw"], + T_ast_w=weightsf * deriv_dict["T_ast_fw"], + T_rst_w=weightsb * deriv_dict["T_rst_bw"], + T_rast_w=weightsb * deriv_dict["T_rast_bw"], + T_df_w=weightsf * deriv_dict["T_df_fw"], + T_db_w=weightsb * deriv_dict["T_db_bw"], + T_alpha_w=weightsf * deriv_dict["T_alpha_fw"] + + weightsb * deriv_dict["T_alpha_bw"], + T_taf_w=weightsf * deriv_dict["T_ta_fw"], + T_tab_w=weightsb * deriv_dict["T_ta_bw"], ) deriv_ds2 = xr.Dataset(deriv_dict2) # TODO: sigma2_tafw_tabw var_w_dict = dict( - dT_dst=deriv_ds2.T_st_w ** 2 * parse_st_var(self, st_var, st_label="st"), - dT_dast=deriv_ds2.T_ast_w ** 2 * parse_st_var(self, ast_var, st_label="ast"), - dT_drst=deriv_ds2.T_rst_w ** 2 * parse_st_var(self, rst_var, st_label="rst"), - dT_drast=deriv_ds2.T_rast_w ** 2 * parse_st_var(self, rast_var, st_label="rast"), - dT_gamma=deriv_ds2.T_gamma_w ** 2 * self["gamma_var"], - dT_ddf=deriv_ds2.T_df_w ** 2 * self["df_var"], - dT_ddb=deriv_ds2.T_db_w ** 2 * self["db_var"], - dT_dalpha=deriv_ds2.T_alpha_w ** 2 * self["alpha_var"], - dT_dtaf=deriv_ds2.T_taf_w ** 2 * self["talpha_fw_full_var"], - dT_dtab=deriv_ds2.T_tab_w ** 2 * self["talpha_bw_full_var"], + dT_dst=deriv_ds2.T_st_w**2 * parse_st_var(self, st_var, st_label="st"), + dT_dast=deriv_ds2.T_ast_w**2 + * parse_st_var(self, ast_var, st_label="ast"), + dT_drst=deriv_ds2.T_rst_w**2 + * parse_st_var(self, rst_var, st_label="rst"), + dT_drast=deriv_ds2.T_rast_w**2 + * parse_st_var(self, rast_var, st_label="rast"), + dT_gamma=deriv_ds2.T_gamma_w**2 * self["gamma_var"], + dT_ddf=deriv_ds2.T_df_w**2 * self["df_var"], + dT_ddb=deriv_ds2.T_db_w**2 * self["db_var"], + dT_dalpha=deriv_ds2.T_alpha_w**2 * self["alpha_var"], + dT_dtaf=deriv_ds2.T_taf_w**2 * self["talpha_fw_full_var"], + dT_dtab=deriv_ds2.T_tab_w**2 * self["talpha_bw_full_var"], dgamma_ddf=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_df_w * sigma2_gamma_df, dgamma_ddb=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_db_w * sigma2_gamma_db, - dgamma_dalpha=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_alpha_w * sigma2_gamma_alpha, + dgamma_dalpha=2 + * deriv_ds2.T_gamma_w + * deriv_ds2.T_alpha_w + * sigma2_gamma_alpha, dgamma_dtaf=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_taf_w * sigma2_tafw_gamma, dgamma_dtab=2 * deriv_ds2.T_gamma_w * deriv_ds2.T_tab_w * sigma2_tabw_gamma, ddf_ddb=2 * deriv_ds2.T_df_w * deriv_ds2.T_db_w * sigma2_df_db, @@ -3347,15 +3446,9 @@ def calibration_double_ended( self["tmpw"].attrs.update(_dim_attrs[("tmpw",)]) self["tmpf_var"].attrs.update(_dim_attrs[("tmpf_var",)]) self["tmpb_var"].attrs.update(_dim_attrs[("tmpb_var",)]) - self["tmpw_var"].attrs.update( - _dim_attrs[("tmpw_var",)] - ) - self["tmpw_var" + "_approx"].attrs.update( - _dim_attrs[("tmpw_var_approx",)] - ) - self["tmpw_var" + "_lower"].attrs.update( - _dim_attrs[("tmpw_var_lower",)] - ) + self["tmpw_var"].attrs.update(_dim_attrs[("tmpw_var",)]) + self["tmpw_var" + "_approx"].attrs.update(_dim_attrs[("tmpw_var_approx",)]) + self["tmpw_var" + "_lower"].attrs.update(_dim_attrs[("tmpw_var_lower",)]) drop_vars = [ k for k, v in self.items() if {"params1", "params2"}.intersection(v.dims) @@ -3370,17 +3463,18 @@ def calibration_double_ended( pass def conf_int_single_ended( - self, - p_val='p_val', - p_cov='p_cov', - st_var=None, - ast_var=None, - conf_ints=None, - mc_sample_size=100, - da_random_state=None, - mc_remove_set_flag=True, - reduce_memory_usage=False, - **kwargs): + self, + p_val="p_val", + p_cov="p_cov", + st_var=None, + ast_var=None, + conf_ints=None, + mc_sample_size=100, + da_random_state=None, + mc_remove_set_flag=True, + reduce_memory_usage=False, + **kwargs, + ): """ Estimation of the confidence intervals for the temperatures measured with a single-ended setup. It consists of five steps. First, the variances @@ -3455,7 +3549,7 @@ def conf_int_single_ended( state = da.random.RandomState() no, nt = self.st.data.shape - if 'trans_att' in self.keys(): + if "trans_att" in self.keys(): nta = self.trans_att.size else: nta = 0 @@ -3472,53 +3566,49 @@ def conf_int_single_ended( elif npar == 1 + no + nt + nt * nta: fixed_alpha = True else: - raise Exception('The size of `p_val` is not what I expected') + raise Exception("The size of `p_val` is not what I expected") - self.coords['mc'] = range(mc_sample_size) + self.coords["mc"] = range(mc_sample_size) if conf_ints: - self.coords['CI'] = conf_ints + self.coords["CI"] = conf_ints # WLS if isinstance(p_cov, str): p_cov = self[p_cov].data assert p_cov.shape == (npar, npar) - p_mc = sst.multivariate_normal.rvs( - mean=p_val, cov=p_cov, size=mc_sample_size) + p_mc = sst.multivariate_normal.rvs(mean=p_val, cov=p_cov, size=mc_sample_size) if fixed_alpha: - self['alpha_mc'] = (('mc', 'x'), p_mc[:, 1:no + 1]) - self['c_mc'] = (('mc', 'time'), p_mc[:, 1 + no:1 + no + nt]) + self["alpha_mc"] = (("mc", "x"), p_mc[:, 1 : no + 1]) + self["c_mc"] = (("mc", "time"), p_mc[:, 1 + no : 1 + no + nt]) else: - self['dalpha_mc'] = (('mc',), p_mc[:, 1]) - self['c_mc'] = (('mc', 'time'), p_mc[:, 2:nt + 2]) + self["dalpha_mc"] = (("mc",), p_mc[:, 1]) + self["c_mc"] = (("mc", "time"), p_mc[:, 2 : nt + 2]) - self['gamma_mc'] = (('mc',), p_mc[:, 0]) + self["gamma_mc"] = (("mc",), p_mc[:, 0]) if nta: - self['ta_mc'] = ( - ('mc', 'trans_att', 'time'), - np.reshape(p_mc[:, -nt * nta:], (mc_sample_size, nta, nt))) + self["ta_mc"] = ( + ("mc", "trans_att", "time"), + np.reshape(p_mc[:, -nt * nta :], (mc_sample_size, nta, nt)), + ) rsize = (self.mc.size, self.x.size, self.time.size) if reduce_memory_usage: memchunk = da.ones( - (mc_sample_size, no, nt), chunks={ - 0: -1, - 1: 1, - 2: 'auto'}).chunks + (mc_sample_size, no, nt), chunks={0: -1, 1: 1, 2: "auto"} + ).chunks else: memchunk = da.ones( - (mc_sample_size, no, nt), chunks={ - 0: -1, - 1: 'auto', - 2: 'auto'}).chunks + (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} + ).chunks # Draw from the normal distributions for the Stokes intensities - for k, st_labeli, st_vari in zip(['r_st', 'r_ast'], ['st', 'ast'], - [st_var, ast_var]): - + for k, st_labeli, st_vari in zip( + ["r_st", "r_ast"], ["st", "ast"], [st_var, ast_var] + ): # Load the mean as chunked Dask array, otherwise eats memory if type(self[st_labeli].data) == da.core.Array: loc = da.asarray(self[st_labeli].data, chunks=memchunk[1:]) @@ -3538,75 +3628,100 @@ def conf_int_single_ended( if type(st_vari) == da.core.Array: st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) - elif (callable(st_vari) and - type(self[st_labeli].data) == da.core.Array): + elif callable(st_vari) and type(self[st_labeli].data) == da.core.Array: st_vari_da = da.asarray( - st_vari(self[st_labeli]).data, chunks=memchunk[1:]) + st_vari(self[st_labeli]).data, chunks=memchunk[1:] + ) - elif (callable(st_vari) and - type(self[st_labeli].data) != da.core.Array): + elif callable(st_vari) and type(self[st_labeli].data) != da.core.Array: st_vari_da = da.from_array( - st_vari(self[st_labeli]).data, chunks=memchunk[1:]) + st_vari(self[st_labeli]).data, chunks=memchunk[1:] + ) else: st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) self[k] = ( - ('mc', 'x', 'time'), + ("mc", "x", "time"), state.normal( loc=loc, # has chunks=memchunk[1:] scale=st_vari_da**0.5, size=rsize, - chunks=memchunk)) + chunks=memchunk, + ), + ) ta_arr = np.zeros((mc_sample_size, no, nt)) if nta: - for ii, ta in enumerate(self['ta_mc']): + for ii, ta in enumerate(self["ta_mc"]): for tai, taxi in zip(ta.values, self.trans_att.values): - ta_arr[ii, self.x.values >= taxi] = \ + ta_arr[ii, self.x.values >= taxi] = ( ta_arr[ii, self.x.values >= taxi] + tai - self['ta_mc_arr'] = (('mc', 'x', 'time'), ta_arr) + ) + self["ta_mc_arr"] = (("mc", "x", "time"), ta_arr) if fixed_alpha: - self['tmpf_mc_set'] = self['gamma_mc'] / ( - ( - np.log(self['r_st']) - np.log(self['r_ast']) + - (self['c_mc'] + self['ta_mc_arr'])) - + self['alpha_mc']) - 273.15 + self["tmpf_mc_set"] = ( + self["gamma_mc"] + / ( + ( + np.log(self["r_st"]) + - np.log(self["r_ast"]) + + (self["c_mc"] + self["ta_mc_arr"]) + ) + + self["alpha_mc"] + ) + - 273.15 + ) else: - self['tmpf_mc_set'] = self['gamma_mc'] / ( - ( - np.log(self['r_st']) - np.log(self['r_ast']) + - (self['c_mc'] + self['ta_mc_arr'])) + - (self['dalpha_mc'] * self.x)) - 273.15 + self["tmpf_mc_set"] = ( + self["gamma_mc"] + / ( + ( + np.log(self["r_st"]) + - np.log(self["r_ast"]) + + (self["c_mc"] + self["ta_mc_arr"]) + ) + + (self["dalpha_mc"] * self.x) + ) + - 273.15 + ) - avg_dims = ['mc'] + avg_dims = ["mc"] - avg_axis = self['tmpf_mc_set'].get_axis_num(avg_dims) + avg_axis = self["tmpf_mc_set"].get_axis_num(avg_dims) - self['tmpf_mc_var'] = ( - self['tmpf_mc_set'] - self["tmpf"]).var( - dim=avg_dims, ddof=1) + self["tmpf_mc_var"] = (self["tmpf_mc_set"] - self["tmpf"]).var( + dim=avg_dims, ddof=1 + ) if conf_ints: - new_chunks = ( - (len(conf_ints),),) + self['tmpf_mc_set'].chunks[1:] + new_chunks = ((len(conf_ints),),) + self["tmpf_mc_set"].chunks[1:] - qq = self['tmpf_mc_set'] + qq = self["tmpf_mc_set"] q = qq.data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimesnions are dropped from input arr - new_axis=0) # The new CI dimension is added as first axis + new_axis=0, + ) # The new CI dimension is added as first axis - self['tmpf_mc'] = (('CI', 'x', 'time'), q) + self["tmpf_mc"] = (("CI", "x", "time"), q) if mc_remove_set_flag: drop_var = [ - 'gamma_mc', 'dalpha_mc', 'alpha_mc', 'c_mc', 'mc', 'r_st', - 'r_ast', 'tmpf_mc_set', 'ta_mc_arr'] + "gamma_mc", + "dalpha_mc", + "alpha_mc", + "c_mc", + "mc", + "r_st", + "r_ast", + "tmpf_mc_set", + "ta_mc_arr", + ] for k in drop_var: if k in self: del self[k] @@ -3614,26 +3729,27 @@ def conf_int_single_ended( pass def average_single_ended( - self, - p_val='p_val', - p_cov='p_cov', - st_var=None, - ast_var=None, - conf_ints=None, - mc_sample_size=100, - ci_avg_time_flag1=False, - ci_avg_time_flag2=False, - ci_avg_time_sel=None, - ci_avg_time_isel=None, - ci_avg_x_flag1=False, - ci_avg_x_flag2=False, - ci_avg_x_sel=None, - ci_avg_x_isel=None, - var_only_sections=None, - da_random_state=None, - mc_remove_set_flag=True, - reduce_memory_usage=False, - **kwargs): + self, + p_val="p_val", + p_cov="p_cov", + st_var=None, + ast_var=None, + conf_ints=None, + mc_sample_size=100, + ci_avg_time_flag1=False, + ci_avg_time_flag2=False, + ci_avg_time_sel=None, + ci_avg_time_isel=None, + ci_avg_x_flag1=False, + ci_avg_x_flag2=False, + ci_avg_x_sel=None, + ci_avg_x_isel=None, + var_only_sections=None, + da_random_state=None, + mc_remove_set_flag=True, + reduce_memory_usage=False, + **kwargs, + ): """ Average temperatures from single-ended setups. @@ -3771,193 +3887,194 @@ def average_single_ended( da_random_state=da_random_state, mc_remove_set_flag=False, reduce_memory_usage=reduce_memory_usage, - **kwargs) + **kwargs, + ) if ci_avg_time_sel is not None: - time_dim2 = 'time' + '_avg' - x_dim2 = 'x' + time_dim2 = "time" + "_avg" + x_dim2 = "x" self.coords[time_dim2] = ( (time_dim2,), - self['time'].sel(**{ - 'time': ci_avg_time_sel}).data) - self['tmpf_avgsec'] = ( - ('x', time_dim2), - self["tmpf"].sel(**{ - 'time': ci_avg_time_sel}).data) - self['tmpf_mc_set'] = ( - ('mc', 'x', time_dim2), - self["tmpf" - + '_mc_set'].sel(**{ - 'time': ci_avg_time_sel}).data) + self["time"].sel(**{"time": ci_avg_time_sel}).data, + ) + self["tmpf_avgsec"] = ( + ("x", time_dim2), + self["tmpf"].sel(**{"time": ci_avg_time_sel}).data, + ) + self["tmpf_mc_set"] = ( + ("mc", "x", time_dim2), + self["tmpf" + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, + ) elif ci_avg_time_isel is not None: - time_dim2 = 'time' + '_avg' - x_dim2 = 'x' + time_dim2 = "time" + "_avg" + x_dim2 = "x" self.coords[time_dim2] = ( (time_dim2,), - self['time'].isel(**{ - 'time': ci_avg_time_isel}).data) - self['tmpf_avgsec'] = ( - ('x', time_dim2), - self["tmpf"].isel(**{ - 'time': ci_avg_time_isel}).data) - self['tmpf_mc_set'] = ( - ('mc', 'x', time_dim2), - self["tmpf" - + '_mc_set'].isel(**{ - 'time': ci_avg_time_isel}).data) + self["time"].isel(**{"time": ci_avg_time_isel}).data, + ) + self["tmpf_avgsec"] = ( + ("x", time_dim2), + self["tmpf"].isel(**{"time": ci_avg_time_isel}).data, + ) + self["tmpf_mc_set"] = ( + ("mc", "x", time_dim2), + self["tmpf" + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, + ) elif ci_avg_x_sel is not None: - time_dim2 = 'time' - x_dim2 = 'x_avg' + time_dim2 = "time" + x_dim2 = "x_avg" self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) - self['tmpf_avgsec'] = ( - (x_dim2, 'time'), self["tmpf"].sel(x=ci_avg_x_sel).data) - self['tmpf_mc_set'] = ( - ('mc', x_dim2, 'time'), - self['tmpf_mc_set'].sel(x=ci_avg_x_sel).data) + self["tmpf_avgsec"] = ( + (x_dim2, "time"), + self["tmpf"].sel(x=ci_avg_x_sel).data, + ) + self["tmpf_mc_set"] = ( + ("mc", x_dim2, "time"), + self["tmpf_mc_set"].sel(x=ci_avg_x_sel).data, + ) elif ci_avg_x_isel is not None: - time_dim2 = 'time' - x_dim2 = 'x_avg' - self.coords[x_dim2] = ( - (x_dim2,), self.x.isel(x=ci_avg_x_isel).data) - self['tmpf_avgsec'] = ( + time_dim2 = "time" + x_dim2 = "x_avg" + self.coords[x_dim2] = ((x_dim2,), self.x.isel(x=ci_avg_x_isel).data) + self["tmpf_avgsec"] = ( (x_dim2, time_dim2), - self["tmpf"].isel(x=ci_avg_x_isel).data) - self['tmpf_mc_set'] = ( - ('mc', x_dim2, time_dim2), - self['tmpf_mc_set'].isel(x=ci_avg_x_isel).data) + self["tmpf"].isel(x=ci_avg_x_isel).data, + ) + self["tmpf_mc_set"] = ( + ("mc", x_dim2, time_dim2), + self["tmpf_mc_set"].isel(x=ci_avg_x_isel).data, + ) else: - self['tmpf_avgsec'] = self["tmpf"] - x_dim2 = 'x' - time_dim2 = 'time' + self["tmpf_avgsec"] = self["tmpf"] + x_dim2 = "x" + time_dim2 = "time" # subtract the mean temperature - q = self['tmpf_mc_set'] - self['tmpf_avgsec'] - self['tmpf_mc' + '_avgsec_var'] = ( - q.var(dim='mc', ddof=1)) + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + self["tmpf_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) if ci_avg_x_flag1: # unweighted mean - self['tmpf_avgx1'] = self["tmpf" - + '_avgsec'].mean(dim=x_dim2) + self["tmpf_avgx1"] = self["tmpf" + "_avgsec"].mean(dim=x_dim2) - q = self['tmpf_mc_set'] - self['tmpf_avgsec'] - qvar = q.var(dim=['mc', x_dim2], ddof=1) - self['tmpf_mc_avgx1_var'] = qvar + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + qvar = q.var(dim=["mc", x_dim2], ddof=1) + self["tmpf_mc_avgx1_var"] = qvar if conf_ints: - new_chunks = ( - len(conf_ints), self['tmpf_mc_set'].chunks[2]) - avg_axis = self['tmpf_mc_set'].get_axis_num( - ['mc', x_dim2]) - q = self['tmpf_mc_set'].data.map_blocks( + new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[2]) + avg_axis = self["tmpf_mc_set"].get_axis_num(["mc", x_dim2]) + q = self["tmpf_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr - new_axis=0) # The new CI dim is added as firsaxis + new_axis=0, + ) # The new CI dim is added as firsaxis - self['tmpf_mc_avgx1'] = (('CI', time_dim2), q) + self["tmpf_mc_avgx1"] = (("CI", time_dim2), q) if ci_avg_x_flag2: - q = self['tmpf_mc_set'] - self['tmpf_avgsec'] + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] - qvar = q.var(dim=['mc'], ddof=1) + qvar = q.var(dim=["mc"], ddof=1) # Inverse-variance weighting avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - self['tmpf_mc_avgx2_var'] = avg_x_var + self["tmpf_mc_avgx2_var"] = avg_x_var - self["tmpf" - + '_mc_avgx2_set'] = (self['tmpf_mc_set'] - / qvar).sum(dim=x_dim2) * avg_x_var - self["tmpf" - + '_avgx2'] = self["tmpf" - + '_mc_avgx2_set'].mean(dim='mc') + self["tmpf" + "_mc_avgx2_set"] = (self["tmpf_mc_set"] / qvar).sum( + dim=x_dim2 + ) * avg_x_var + self["tmpf" + "_avgx2"] = self["tmpf" + "_mc_avgx2_set"].mean(dim="mc") if conf_ints: - new_chunks = ( - len(conf_ints), self['tmpf_mc_set'].chunks[2]) - avg_axis_avgx = self['tmpf_mc_set'].get_axis_num('mc') + new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[2]) + avg_axis_avgx = self["tmpf_mc_set"].get_axis_num("mc") - qq = self['tmpf_mc_avgx2_set'].data.map_blocks( - lambda x: np.percentile( - x, q=conf_ints, axis=avg_axis_avgx), + qq = self["tmpf_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), chunks=new_chunks, # drop_axis=avg_axis_avgx, # avg dimensions are dropped from input arr new_axis=0, - dtype=float) # The new CI dimension is added as + dtype=float, + ) # The new CI dimension is added as # firsaxis - self['tmpf_mc_avgx2'] = (('CI', time_dim2), qq) + self["tmpf_mc_avgx2"] = (("CI", time_dim2), qq) if ci_avg_time_flag1 is not None: # unweighted mean - self['tmpf_avg1'] = self["tmpf" - + '_avgsec'].mean(dim=time_dim2) + self["tmpf_avg1"] = self["tmpf" + "_avgsec"].mean(dim=time_dim2) - q = self['tmpf_mc_set'] - self['tmpf_avgsec'] - qvar = q.var(dim=['mc', time_dim2], ddof=1) - self['tmpf_mc_avg1_var'] = qvar + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + qvar = q.var(dim=["mc", time_dim2], ddof=1) + self["tmpf_mc_avg1_var"] = qvar if conf_ints: - new_chunks = ( - len(conf_ints), self['tmpf_mc_set'].chunks[1]) - avg_axis = self['tmpf_mc_set'].get_axis_num( - ['mc', time_dim2]) - q = self['tmpf_mc_set'].data.map_blocks( + new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[1]) + avg_axis = self["tmpf_mc_set"].get_axis_num(["mc", time_dim2]) + q = self["tmpf_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr - new_axis=0) # The new CI dim is added as firsaxis + new_axis=0, + ) # The new CI dim is added as firsaxis - self['tmpf_mc_avg1'] = (('CI', x_dim2), q) + self["tmpf_mc_avg1"] = (("CI", x_dim2), q) if ci_avg_time_flag2: - q = self['tmpf_mc_set'] - self['tmpf_avgsec'] + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] - qvar = q.var(dim=['mc'], ddof=1) + qvar = q.var(dim=["mc"], ddof=1) # Inverse-variance weighting avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - self['tmpf_mc_avg2_var'] = avg_time_var + self["tmpf_mc_avg2_var"] = avg_time_var - self["tmpf" - + '_mc_avg2_set'] = (self['tmpf_mc_set'] / qvar).sum( - dim=time_dim2) * avg_time_var - self['tmpf_avg2'] = self["tmpf" - + '_mc_avg2_set'].mean(dim='mc') + self["tmpf" + "_mc_avg2_set"] = (self["tmpf_mc_set"] / qvar).sum( + dim=time_dim2 + ) * avg_time_var + self["tmpf_avg2"] = self["tmpf" + "_mc_avg2_set"].mean(dim="mc") if conf_ints: - new_chunks = ( - len(conf_ints), self['tmpf_mc_set'].chunks[1]) - avg_axis_avg2 = self['tmpf_mc_set'].get_axis_num('mc') + new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[1]) + avg_axis_avg2 = self["tmpf_mc_set"].get_axis_num("mc") - qq = self['tmpf_mc_avg2_set'].data.map_blocks( - lambda x: np.percentile( - x, q=conf_ints, axis=avg_axis_avg2), + qq = self["tmpf_mc_avg2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), chunks=new_chunks, # drop_axis=avg_axis_avg2, # avg dimensions are dropped from input arr new_axis=0, - dtype=float) # The new CI dimension is added as + dtype=float, + ) # The new CI dimension is added as # firsaxis - self['tmpf_mc_avg2'] = (('CI', x_dim2), qq) + self["tmpf_mc_avg2"] = (("CI", x_dim2), qq) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: remove_mc_set = [ - 'r_st', 'r_ast', 'gamma_mc', 'dalpha_mc', 'c_mc', 'x_avg', - 'time_avg', 'mc', 'ta_mc_arr'] - remove_mc_set.append('tmpf_avgsec') - remove_mc_set.append('tmpf_mc_set') - remove_mc_set.append('tmpf_mc_avg2_set') - remove_mc_set.append('tmpf_mc_avgx2_set') - remove_mc_set.append('tmpf_mc_avgsec_var') + "r_st", + "r_ast", + "gamma_mc", + "dalpha_mc", + "c_mc", + "x_avg", + "time_avg", + "mc", + "ta_mc_arr", + ] + remove_mc_set.append("tmpf_avgsec") + remove_mc_set.append("tmpf_mc_set") + remove_mc_set.append("tmpf_mc_avg2_set") + remove_mc_set.append("tmpf_mc_avgx2_set") + remove_mc_set.append("tmpf_mc_avgsec_var") for k in remove_mc_set: if k in self: @@ -3965,20 +4082,21 @@ def average_single_ended( pass def conf_int_double_ended( - self, - p_val='p_val', - p_cov='p_cov', - st_var=None, - ast_var=None, - rst_var=None, - rast_var=None, - conf_ints=None, - mc_sample_size=100, - var_only_sections=False, - da_random_state=None, - mc_remove_set_flag=True, - reduce_memory_usage=False, - **kwargs): + self, + p_val="p_val", + p_cov="p_cov", + st_var=None, + ast_var=None, + rst_var=None, + rast_var=None, + conf_ints=None, + mc_sample_size=100, + var_only_sections=False, + da_random_state=None, + mc_remove_set_flag=True, + reduce_memory_usage=False, + **kwargs, + ): """ Estimation of the confidence intervals for the temperatures measured with a double-ended setup. @@ -4098,33 +4216,36 @@ def conf_int_double_ended( https://doi.org/10.3390/s20082235 """ - def create_da_ta2(no, i_splice, direction='fw', chunks=None): + + def create_da_ta2(no, i_splice, direction="fw", chunks=None): """create mask array mc, o, nt""" - if direction == 'fw': + if direction == "fw": arr = da.concatenate( ( da.zeros( - (1, i_splice, 1), - chunks=((1, i_splice, 1)), - dtype=bool), + (1, i_splice, 1), chunks=((1, i_splice, 1)), dtype=bool + ), da.ones( (1, no - i_splice, 1), chunks=(1, no - i_splice, 1), - dtype=bool)), - axis=1).rechunk((1, chunks[1], 1)) + dtype=bool, + ), + ), + axis=1, + ).rechunk((1, chunks[1], 1)) else: arr = da.concatenate( ( - da.ones( - (1, i_splice, 1), - chunks=(1, i_splice, 1), - dtype=bool), + da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), da.zeros( (1, no - i_splice, 1), chunks=((1, no - i_splice, 1)), - dtype=bool)), - axis=1).rechunk((1, chunks[1], 1)) + dtype=bool, + ), + ), + axis=1, + ).rechunk((1, chunks[1], 1)) return arr self.check_deprecated_kwargs(kwargs) @@ -4137,9 +4258,11 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): state = da.random.RandomState() if conf_ints: - assert "tmpw", 'Current implementation requires you to ' \ - 'define "tmpw" when estimating confidence ' \ - 'intervals' + assert "tmpw", ( + "Current implementation requires you to " + 'define "tmpw" when estimating confidence ' + "intervals" + ) no, nt = self.st.shape nta = self.trans_att.size @@ -4149,63 +4272,62 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): if reduce_memory_usage: memchunk = da.ones( - (mc_sample_size, no, nt), chunks={ - 0: -1, - 1: 1, - 2: 'auto'}).chunks + (mc_sample_size, no, nt), chunks={0: -1, 1: 1, 2: "auto"} + ).chunks else: memchunk = da.ones( - (mc_sample_size, no, nt), chunks={ - 0: -1, - 1: 'auto', - 2: 'auto'}).chunks + (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} + ).chunks - self.coords['mc'] = range(mc_sample_size) + self.coords["mc"] = range(mc_sample_size) if conf_ints: - self.coords['CI'] = conf_ints + self.coords["CI"] = conf_ints assert isinstance(p_val, (str, np.ndarray, np.generic)) if isinstance(p_val, str): p_val = self[p_val].values - assert p_val.shape == (npar,), "Did you set 'talpha' as " \ - "keyword argument of the " \ - "conf_int_double_ended() function?" + assert p_val.shape == (npar,), ( + "Did you set 'talpha' as " + "keyword argument of the " + "conf_int_double_ended() function?" + ) assert isinstance(p_cov, (str, np.ndarray, np.generic, bool)) if isinstance(p_cov, bool) and not p_cov: # Exclude parameter uncertainty if p_cov == False gamma = p_val[0] - d_fw = p_val[1:nt + 1] - d_bw = p_val[1 + nt:2 * nt + 1] - alpha = p_val[2 * nt + 1:2 * nt + 1 + no] + d_fw = p_val[1 : nt + 1] + d_bw = p_val[1 + nt : 2 * nt + 1] + alpha = p_val[2 * nt + 1 : 2 * nt + 1 + no] - self['gamma_mc'] = (tuple(), gamma) - self['alpha_mc'] = (('x',), alpha) - self['df_mc'] = (('time',), d_fw) - self['db_mc'] = (('time',), d_bw) + self["gamma_mc"] = (tuple(), gamma) + self["alpha_mc"] = (("x",), alpha) + self["df_mc"] = (("time",), d_fw) + self["db_mc"] = (("time",), d_bw) if nta: - ta = p_val[2 * nt + 1 + no:].reshape((nt, 2, nta), order='F') + ta = p_val[2 * nt + 1 + no :].reshape((nt, 2, nta), order="F") ta_fw = ta[:, 0, :] ta_bw = ta[:, 1, :] ta_fw_arr = np.zeros((no, nt)) for tai, taxi in zip(ta_fw.T, self.coords["trans_att"].values): - ta_fw_arr[self.x.values >= taxi] = \ + ta_fw_arr[self.x.values >= taxi] = ( ta_fw_arr[self.x.values >= taxi] + tai + ) ta_bw_arr = np.zeros((no, nt)) for tai, taxi in zip(ta_bw.T, self.coords["trans_att"].values): - ta_bw_arr[self.x.values < taxi] = \ + ta_bw_arr[self.x.values < taxi] = ( ta_bw_arr[self.x.values < taxi] + tai + ) - self['talpha_fw_mc'] = (('x', 'time'), ta_fw_arr) - self['talpha_bw_mc'] = (('x', 'time'), ta_bw_arr) + self["talpha_fw_mc"] = (("x", "time"), ta_fw_arr) + self["talpha_bw_mc"] = (("x", "time"), ta_bw_arr) elif isinstance(p_cov, bool) and p_cov: - raise NotImplementedError( - 'Not an implemented option. Check p_cov argument') + raise NotImplementedError("Not an implemented option. Check p_cov argument") else: # WLS @@ -4213,83 +4335,90 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): p_cov = self[p_cov].values assert p_cov.shape == (npar, npar) - ix_sec = self.ufunc_per_section(x_indices=True, calc_per='all') + ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") nx_sec = ix_sec.size from_i = np.concatenate( ( - np.arange(1 + 2 * nt), 1 + 2 * nt + ix_sec, - np.arange(1 + 2 * nt + no, - 1 + 2 * nt + no + nt * 2 * nta))) - iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing='ij') + np.arange(1 + 2 * nt), + 1 + 2 * nt + ix_sec, + np.arange(1 + 2 * nt + no, 1 + 2 * nt + no + nt * 2 * nta), + ) + ) + iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") po_val = p_val[from_i] po_cov = p_cov[iox_sec1, iox_sec2] po_mc = sst.multivariate_normal.rvs( - mean=po_val, cov=po_cov, size=mc_sample_size) + mean=po_val, cov=po_cov, size=mc_sample_size + ) gamma = po_mc[:, 0] - d_fw = po_mc[:, 1:nt + 1] - d_bw = po_mc[:, 1 + nt:2 * nt + 1] + d_fw = po_mc[:, 1 : nt + 1] + d_bw = po_mc[:, 1 + nt : 2 * nt + 1] - self['gamma_mc'] = (('mc',), gamma) - self['df_mc'] = (('mc', 'time'), d_fw) - self['db_mc'] = (('mc', 'time'), d_bw) + self["gamma_mc"] = (("mc",), gamma) + self["df_mc"] = (("mc", "time"), d_fw) + self["db_mc"] = (("mc", "time"), d_bw) # calculate alpha seperately alpha = np.zeros((mc_sample_size, no), dtype=float) - alpha[:, ix_sec] = po_mc[:, 1 + 2 * nt:1 + 2 * nt + nx_sec] + alpha[:, ix_sec] = po_mc[:, 1 + 2 * nt : 1 + 2 * nt + nx_sec] not_ix_sec = np.array([i for i in range(no) if i not in ix_sec]) if np.any(not_ix_sec): not_alpha_val = p_val[2 * nt + 1 + not_ix_sec] - not_alpha_var = p_cov[2 * nt + 1 + not_ix_sec, - 2 * nt + 1 + not_ix_sec] + not_alpha_var = p_cov[2 * nt + 1 + not_ix_sec, 2 * nt + 1 + not_ix_sec] not_alpha_mc = np.random.normal( loc=not_alpha_val, scale=not_alpha_var**0.5, - size=(mc_sample_size, not_alpha_val.size)) + size=(mc_sample_size, not_alpha_val.size), + ) alpha[:, not_ix_sec] = not_alpha_mc - self['alpha_mc'] = (('mc', 'x'), alpha) + self["alpha_mc"] = (("mc", "x"), alpha) if nta: - ta = po_mc[:, 2 * nt + 1 + nx_sec:].reshape( - (mc_sample_size, nt, 2, nta), order='F') + ta = po_mc[:, 2 * nt + 1 + nx_sec :].reshape( + (mc_sample_size, nt, 2, nta), order="F" + ) ta_fw = ta[:, :, 0, :] ta_bw = ta[:, :, 1, :] ta_fw_arr = da.zeros( - (mc_sample_size, no, nt), chunks=memchunk, dtype=float) - for tai, taxi in zip(ta_fw.swapaxes(0, 2), - self.coords["trans_att"].values): + (mc_sample_size, no, nt), chunks=memchunk, dtype=float + ) + for tai, taxi in zip( + ta_fw.swapaxes(0, 2), self.coords["trans_att"].values + ): # iterate over the splices i_splice = sum(self.x.values < taxi) - mask = create_da_ta2( - no, i_splice, direction='fw', chunks=memchunk) + mask = create_da_ta2(no, i_splice, direction="fw", chunks=memchunk) ta_fw_arr += mask * tai.T[:, None, :] ta_bw_arr = da.zeros( - (mc_sample_size, no, nt), chunks=memchunk, dtype=float) - for tai, taxi in zip(ta_bw.swapaxes(0, 2), - self.coords["trans_att"].values): + (mc_sample_size, no, nt), chunks=memchunk, dtype=float + ) + for tai, taxi in zip( + ta_bw.swapaxes(0, 2), self.coords["trans_att"].values + ): i_splice = sum(self.x.values < taxi) - mask = create_da_ta2( - no, i_splice, direction='bw', chunks=memchunk) + mask = create_da_ta2(no, i_splice, direction="bw", chunks=memchunk) ta_bw_arr += mask * tai.T[:, None, :] - self['talpha_fw_mc'] = (('mc', 'x', 'time'), ta_fw_arr) - self['talpha_bw_mc'] = (('mc', 'x', 'time'), ta_bw_arr) + self["talpha_fw_mc"] = (("mc", "x", "time"), ta_fw_arr) + self["talpha_bw_mc"] = (("mc", "x", "time"), ta_bw_arr) # Draw from the normal distributions for the Stokes intensities - for k, st_labeli, st_vari in zip(['r_st', 'r_ast', 'r_rst', 'r_rast'], - ['st', 'ast', 'rst', 'rast'], - [st_var, ast_var, rst_var, rast_var]): - + for k, st_labeli, st_vari in zip( + ["r_st", "r_ast", "r_rst", "r_rast"], + ["st", "ast", "rst", "rast"], + [st_var, ast_var, rst_var, rast_var], + ): # Load the mean as chunked Dask array, otherwise eats memory if type(self[st_labeli].data) == da.core.Array: loc = da.asarray(self[st_labeli].data, chunks=memchunk[1:]) @@ -4309,120 +4438,146 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): if type(st_vari) == da.core.Array: st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) - elif (callable(st_vari) and - type(self[st_labeli].data) == da.core.Array): + elif callable(st_vari) and type(self[st_labeli].data) == da.core.Array: st_vari_da = da.asarray( - st_vari(self[st_labeli]).data, chunks=memchunk[1:]) + st_vari(self[st_labeli]).data, chunks=memchunk[1:] + ) - elif (callable(st_vari) and - type(self[st_labeli].data) != da.core.Array): + elif callable(st_vari) and type(self[st_labeli].data) != da.core.Array: st_vari_da = da.from_array( - st_vari(self[st_labeli]).data, chunks=memchunk[1:]) + st_vari(self[st_labeli]).data, chunks=memchunk[1:] + ) else: st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) self[k] = ( - ('mc', 'x', 'time'), + ("mc", "x", "time"), state.normal( loc=loc, # has chunks=memchunk[1:] scale=st_vari_da**0.5, size=rsize, - chunks=memchunk)) + chunks=memchunk, + ), + ) for label in ["tmpf", "tmpb"]: if "tmpw" or label: if label == "tmpf": if nta: - self['tmpf_mc_set'] = self['gamma_mc'] / ( - np.log(self['r_st'] / self['r_ast']) - + self['df_mc'] + self['alpha_mc'] - + self['talpha_fw_mc']) - 273.15 + self["tmpf_mc_set"] = ( + self["gamma_mc"] + / ( + np.log(self["r_st"] / self["r_ast"]) + + self["df_mc"] + + self["alpha_mc"] + + self["talpha_fw_mc"] + ) + - 273.15 + ) else: - self['tmpf_mc_set'] = self['gamma_mc'] / ( - np.log(self['r_st'] / self['r_ast']) - + self['df_mc'] + self['alpha_mc']) - 273.15 + self["tmpf_mc_set"] = ( + self["gamma_mc"] + / ( + np.log(self["r_st"] / self["r_ast"]) + + self["df_mc"] + + self["alpha_mc"] + ) + - 273.15 + ) else: if nta: - self['tmpb_mc_set'] = self['gamma_mc'] / ( - np.log(self['r_rst'] / self['r_rast']) - + self['db_mc'] - self['alpha_mc'] - + self['talpha_bw_mc']) - 273.15 + self["tmpb_mc_set"] = ( + self["gamma_mc"] + / ( + np.log(self["r_rst"] / self["r_rast"]) + + self["db_mc"] + - self["alpha_mc"] + + self["talpha_bw_mc"] + ) + - 273.15 + ) else: - self['tmpb_mc_set'] = self['gamma_mc'] / ( - np.log(self['r_rst'] / self['r_rast']) - + self['db_mc'] - self['alpha_mc']) - 273.15 + self["tmpb_mc_set"] = ( + self["gamma_mc"] + / ( + np.log(self["r_rst"] / self["r_rast"]) + + self["db_mc"] + - self["alpha_mc"] + ) + - 273.15 + ) if var_only_sections: # sets the values outside the reference sections to NaN - xi = self.ufunc_per_section(x_indices=True, calc_per='all') - x_mask_ = [ - True if ix in xi else False - for ix in range(self.x.size)] + xi = self.ufunc_per_section(x_indices=True, calc_per="all") + x_mask_ = [True if ix in xi else False for ix in range(self.x.size)] x_mask = np.reshape(x_mask_, (1, -1, 1)) - self[label + '_mc_set'] = self[label - + '_mc_set'].where(x_mask) + self[label + "_mc_set"] = self[label + "_mc_set"].where(x_mask) # subtract the mean temperature - q = self[label + '_mc_set'] - self[label] - self[label + '_mc_var'] = (q.var(dim='mc', ddof=1)) + q = self[label + "_mc_set"] - self[label] + self[label + "_mc_var"] = q.var(dim="mc", ddof=1) if conf_ints: - new_chunks = list(self[label + '_mc_set'].chunks) + new_chunks = list(self[label + "_mc_set"].chunks) new_chunks[0] = (len(conf_ints),) - avg_axis = self[label + '_mc_set'].get_axis_num('mc') - q = self[label + '_mc_set'].data.map_blocks( + avg_axis = self[label + "_mc_set"].get_axis_num("mc") + q = self[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr - new_axis=0) # The new CI dimension is added as firsaxis + new_axis=0, + ) # The new CI dimension is added as firsaxis - self[label + '_mc'] = (('CI', 'x', 'time'), q) + self[label + "_mc"] = (("CI", "x", "time"), q) # Weighted mean of the forward and backward - tmpw_var = 1 / ( - 1 / self['tmpf_mc_var'] - + 1 / self['tmpb_mc_var']) + tmpw_var = 1 / (1 / self["tmpf_mc_var"] + 1 / self["tmpb_mc_var"]) q = ( - self['tmpf_mc_set'] - / self['tmpf_mc_var'] - + self['tmpb_mc_set'] - / self['tmpb_mc_var']) * tmpw_var + self["tmpf_mc_set"] / self["tmpf_mc_var"] + + self["tmpb_mc_set"] / self["tmpb_mc_var"] + ) * tmpw_var - self["tmpw" + '_mc_set'] = q # + self["tmpw" + "_mc_set"] = q # - self["tmpw"] = \ - (self["tmpf"] / - self['tmpf_mc_var'] + - self["tmpb"] / - self['tmpb_mc_var'] - ) * tmpw_var + self["tmpw"] = ( + self["tmpf"] / self["tmpf_mc_var"] + self["tmpb"] / self["tmpb_mc_var"] + ) * tmpw_var - q = self["tmpw" + '_mc_set'] - self["tmpw"] - self["tmpw" + '_mc_var'] = q.var(dim='mc', ddof=1) + q = self["tmpw" + "_mc_set"] - self["tmpw"] + self["tmpw" + "_mc_var"] = q.var(dim="mc", ddof=1) # Calculate the CI of the weighted MC_set if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + memchunk[1:] - avg_axis = self["tmpw" + '_mc_set'].get_axis_num('mc') - q2 = self["tmpw" + '_mc_set'].data.map_blocks( + avg_axis = self["tmpw" + "_mc_set"].get_axis_num("mc") + q2 = self["tmpw" + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks drop_axis=avg_axis, # avg dimensions are dropped new_axis=0, - dtype=float) # The new CI dimension is added as first axis - self["tmpw" + '_mc'] = (('CI', 'x', 'time'), q2) + dtype=float, + ) # The new CI dimension is added as first axis + self["tmpw" + "_mc"] = (("CI", "x", "time"), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: remove_mc_set = [ - 'r_st', 'r_ast', 'r_rst', 'r_rast', 'gamma_mc', 'alpha_mc', - 'df_mc', 'db_mc'] + "r_st", + "r_ast", + "r_rst", + "r_rast", + "gamma_mc", + "alpha_mc", + "df_mc", + "db_mc", + ] for i in ["tmpf", "tmpb", "tmpw"]: - remove_mc_set.append(i + '_mc_set') + remove_mc_set.append(i + "_mc_set") if nta: remove_mc_set.append('talpha"_fw_mc') @@ -4434,27 +4589,28 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): pass def average_double_ended( - self, - p_val='p_val', - p_cov='p_cov', - st_var=None, - ast_var=None, - rst_var=None, - rast_var=None, - conf_ints=None, - mc_sample_size=100, - ci_avg_time_flag1=False, - ci_avg_time_flag2=False, - ci_avg_time_sel=None, - ci_avg_time_isel=None, - ci_avg_x_flag1=False, - ci_avg_x_flag2=False, - ci_avg_x_sel=None, - ci_avg_x_isel=None, - da_random_state=None, - mc_remove_set_flag=True, - reduce_memory_usage=False, - **kwargs): + self, + p_val="p_val", + p_cov="p_cov", + st_var=None, + ast_var=None, + rst_var=None, + rast_var=None, + conf_ints=None, + mc_sample_size=100, + ci_avg_time_flag1=False, + ci_avg_time_flag2=False, + ci_avg_time_sel=None, + ci_avg_time_isel=None, + ci_avg_x_flag1=False, + ci_avg_x_flag2=False, + ci_avg_x_sel=None, + ci_avg_x_isel=None, + da_random_state=None, + mc_remove_set_flag=True, + reduce_memory_usage=False, + **kwargs, + ): """ Average temperatures from double-ended setups. @@ -4570,46 +4726,51 @@ def average_double_ended( ------- """ - def create_da_ta2(no, i_splice, direction='fw', chunks=None): + + def create_da_ta2(no, i_splice, direction="fw", chunks=None): """create mask array mc, o, nt""" - if direction == 'fw': + if direction == "fw": arr = da.concatenate( ( da.zeros( - (1, i_splice, 1), - chunks=((1, i_splice, 1)), - dtype=bool), + (1, i_splice, 1), chunks=((1, i_splice, 1)), dtype=bool + ), da.ones( (1, no - i_splice, 1), chunks=(1, no - i_splice, 1), - dtype=bool)), - axis=1).rechunk((1, chunks[1], 1)) + dtype=bool, + ), + ), + axis=1, + ).rechunk((1, chunks[1], 1)) else: arr = da.concatenate( ( - da.ones( - (1, i_splice, 1), - chunks=(1, i_splice, 1), - dtype=bool), + da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), da.zeros( (1, no - i_splice, 1), chunks=((1, no - i_splice, 1)), - dtype=bool)), - axis=1).rechunk((1, chunks[1], 1)) + dtype=bool, + ), + ), + axis=1, + ).rechunk((1, chunks[1], 1)) return arr self.check_deprecated_kwargs(kwargs) - if (ci_avg_x_flag1 or ci_avg_x_flag2) and (ci_avg_time_flag1 or - ci_avg_time_flag2): + if (ci_avg_x_flag1 or ci_avg_x_flag2) and ( + ci_avg_time_flag1 or ci_avg_time_flag2 + ): raise NotImplementedError( - 'Incompatible flags. Can not pick ' - 'the right chunks') + "Incompatible flags. Can not pick " "the right chunks" + ) - elif not (ci_avg_x_flag1 or ci_avg_x_flag2 or ci_avg_time_flag1 or - ci_avg_time_flag2): - raise NotImplementedError('Pick one of the averaging options') + elif not ( + ci_avg_x_flag1 or ci_avg_x_flag2 or ci_avg_time_flag1 or ci_avg_time_flag2 + ): + raise NotImplementedError("Pick one of the averaging options") else: pass @@ -4626,341 +4787,329 @@ def create_da_ta2(no, i_splice, direction='fw', chunks=None): da_random_state=da_random_state, mc_remove_set_flag=False, reduce_memory_usage=reduce_memory_usage, - **kwargs) + **kwargs, + ) for label in ["tmpf", "tmpb"]: if ci_avg_time_sel is not None: - time_dim2 = 'time' + '_avg' - x_dim2 = 'x' + time_dim2 = "time" + "_avg" + x_dim2 = "x" self.coords[time_dim2] = ( (time_dim2,), - self['time'].sel(**{ - 'time': ci_avg_time_sel}).data) - self[label + '_avgsec'] = ( - ('x', time_dim2), - self[label].sel(**{ - 'time': ci_avg_time_sel}).data) - self[label + '_mc_set'] = ( - ('mc', 'x', time_dim2), - self[label + '_mc_set'].sel(**{ - 'time': ci_avg_time_sel}).data) + self["time"].sel(**{"time": ci_avg_time_sel}).data, + ) + self[label + "_avgsec"] = ( + ("x", time_dim2), + self[label].sel(**{"time": ci_avg_time_sel}).data, + ) + self[label + "_mc_set"] = ( + ("mc", "x", time_dim2), + self[label + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, + ) elif ci_avg_time_isel is not None: - time_dim2 = 'time' + '_avg' - x_dim2 = 'x' + time_dim2 = "time" + "_avg" + x_dim2 = "x" self.coords[time_dim2] = ( (time_dim2,), - self['time'].isel(**{ - 'time': ci_avg_time_isel}).data) - self[label + '_avgsec'] = ( - ('x', time_dim2), - self[label].isel(**{ - 'time': ci_avg_time_isel}).data) - self[label + '_mc_set'] = ( - ('mc', 'x', time_dim2), - self[label - + '_mc_set'].isel(**{ - 'time': ci_avg_time_isel}).data) + self["time"].isel(**{"time": ci_avg_time_isel}).data, + ) + self[label + "_avgsec"] = ( + ("x", time_dim2), + self[label].isel(**{"time": ci_avg_time_isel}).data, + ) + self[label + "_mc_set"] = ( + ("mc", "x", time_dim2), + self[label + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, + ) elif ci_avg_x_sel is not None: - time_dim2 = 'time' - x_dim2 = 'x_avg' - self.coords[x_dim2] = ( - (x_dim2,), self.x.sel(x=ci_avg_x_sel).data) - self[label + '_avgsec'] = ( - (x_dim2, 'time'), self[label].sel(x=ci_avg_x_sel).data) - self[label + '_mc_set'] = ( - ('mc', x_dim2, 'time'), - self[label + '_mc_set'].sel(x=ci_avg_x_sel).data) + time_dim2 = "time" + x_dim2 = "x_avg" + self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) + self[label + "_avgsec"] = ( + (x_dim2, "time"), + self[label].sel(x=ci_avg_x_sel).data, + ) + self[label + "_mc_set"] = ( + ("mc", x_dim2, "time"), + self[label + "_mc_set"].sel(x=ci_avg_x_sel).data, + ) elif ci_avg_x_isel is not None: - time_dim2 = 'time' - x_dim2 = 'x_avg' - self.coords[x_dim2] = ( - (x_dim2,), self.x.isel(x=ci_avg_x_isel).data) - self[label + '_avgsec'] = ( + time_dim2 = "time" + x_dim2 = "x_avg" + self.coords[x_dim2] = ((x_dim2,), self.x.isel(x=ci_avg_x_isel).data) + self[label + "_avgsec"] = ( (x_dim2, time_dim2), - self[label].isel(x=ci_avg_x_isel).data) - self[label + '_mc_set'] = ( - ('mc', x_dim2, time_dim2), - self[label + '_mc_set'].isel(x=ci_avg_x_isel).data) + self[label].isel(x=ci_avg_x_isel).data, + ) + self[label + "_mc_set"] = ( + ("mc", x_dim2, time_dim2), + self[label + "_mc_set"].isel(x=ci_avg_x_isel).data, + ) else: - self[label + '_avgsec'] = self[label] - x_dim2 = 'x' - time_dim2 = 'time' + self[label + "_avgsec"] = self[label] + x_dim2 = "x" + time_dim2 = "time" - memchunk = self[label + '_mc_set'].chunks + memchunk = self[label + "_mc_set"].chunks # subtract the mean temperature - q = self[label + '_mc_set'] - self[label + '_avgsec'] - self[label + '_mc' + '_avgsec_var'] = ( - q.var(dim='mc', ddof=1)) + q = self[label + "_mc_set"] - self[label + "_avgsec"] + self[label + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) if ci_avg_x_flag1: # unweighted mean - self[label + '_avgx1'] = self[label - + '_avgsec'].mean(dim=x_dim2) + self[label + "_avgx1"] = self[label + "_avgsec"].mean(dim=x_dim2) - q = self[label + '_mc_set'] - self[label + '_avgsec'] - qvar = q.var(dim=['mc', x_dim2], ddof=1) - self[label + '_mc_avgx1_var'] = qvar + q = self[label + "_mc_set"] - self[label + "_avgsec"] + qvar = q.var(dim=["mc", x_dim2], ddof=1) + self[label + "_mc_avgx1_var"] = qvar if conf_ints: - new_chunks = ( - len(conf_ints), self[label + '_mc_set'].chunks[2]) - avg_axis = self[label + '_mc_set'].get_axis_num( - ['mc', x_dim2]) - q = self[label + '_mc_set'].data.map_blocks( + new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[2]) + avg_axis = self[label + "_mc_set"].get_axis_num(["mc", x_dim2]) + q = self[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr - new_axis=0) # The new CI dim is added as firsaxis + new_axis=0, + ) # The new CI dim is added as firsaxis - self[label + '_mc_avgx1'] = (('CI', time_dim2), q) + self[label + "_mc_avgx1"] = (("CI", time_dim2), q) if ci_avg_x_flag2: - q = self[label + '_mc_set'] - self[label + '_avgsec'] + q = self[label + "_mc_set"] - self[label + "_avgsec"] - qvar = q.var(dim=['mc'], ddof=1) + qvar = q.var(dim=["mc"], ddof=1) # Inverse-variance weighting avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - self[label + '_mc_avgx2_var'] = avg_x_var + self[label + "_mc_avgx2_var"] = avg_x_var - self[label - + '_mc_avgx2_set'] = (self[label + '_mc_set'] - / qvar).sum(dim=x_dim2) * avg_x_var - self[label + '_avgx2'] = self[label + '_mc_avgx2_set'].mean( - dim='mc') + self[label + "_mc_avgx2_set"] = (self[label + "_mc_set"] / qvar).sum( + dim=x_dim2 + ) * avg_x_var + self[label + "_avgx2"] = self[label + "_mc_avgx2_set"].mean(dim="mc") if conf_ints: - new_chunks = ( - len(conf_ints), self[label + '_mc_set'].chunks[2]) - avg_axis_avgx = self[label + '_mc_set'].get_axis_num('mc') + new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[2]) + avg_axis_avgx = self[label + "_mc_set"].get_axis_num("mc") - qq = self[label + '_mc_avgx2_set'].data.map_blocks( - lambda x: np.percentile( - x, q=conf_ints, axis=avg_axis_avgx), + qq = self[label + "_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), chunks=new_chunks, # drop_axis=avg_axis_avgx, # avg dimensions are dropped from input arr new_axis=0, - dtype=float) # The new CI dimension is added as + dtype=float, + ) # The new CI dimension is added as # firsaxis - self[label + '_mc_avgx2'] = (('CI', time_dim2), qq) + self[label + "_mc_avgx2"] = (("CI", time_dim2), qq) if ci_avg_time_flag1 is not None: # unweighted mean - self[label + '_avg1'] = self[label - + '_avgsec'].mean(dim=time_dim2) + self[label + "_avg1"] = self[label + "_avgsec"].mean(dim=time_dim2) - q = self[label + '_mc_set'] - self[label + '_avgsec'] - qvar = q.var(dim=['mc', time_dim2], ddof=1) - self[label + '_mc_avg1_var'] = qvar + q = self[label + "_mc_set"] - self[label + "_avgsec"] + qvar = q.var(dim=["mc", time_dim2], ddof=1) + self[label + "_mc_avg1_var"] = qvar if conf_ints: - new_chunks = ( - len(conf_ints), self[label + '_mc_set'].chunks[1]) - avg_axis = self[label + '_mc_set'].get_axis_num( - ['mc', time_dim2]) - q = self[label + '_mc_set'].data.map_blocks( + new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[1]) + avg_axis = self[label + "_mc_set"].get_axis_num(["mc", time_dim2]) + q = self[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr - new_axis=0) # The new CI dim is added as firsaxis + new_axis=0, + ) # The new CI dim is added as firsaxis - self[label + '_mc_avg1'] = (('CI', x_dim2), q) + self[label + "_mc_avg1"] = (("CI", x_dim2), q) if ci_avg_time_flag2: - q = self[label + '_mc_set'] - self[label + '_avgsec'] + q = self[label + "_mc_set"] - self[label + "_avgsec"] - qvar = q.var(dim=['mc'], ddof=1) + qvar = q.var(dim=["mc"], ddof=1) # Inverse-variance weighting avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - self[label + '_mc_avg2_var'] = avg_time_var + self[label + "_mc_avg2_var"] = avg_time_var - self[label - + '_mc_avg2_set'] = (self[label + '_mc_set'] / qvar).sum( - dim=time_dim2) * avg_time_var - self[label + '_avg2'] = self[label - + '_mc_avg2_set'].mean(dim='mc') + self[label + "_mc_avg2_set"] = (self[label + "_mc_set"] / qvar).sum( + dim=time_dim2 + ) * avg_time_var + self[label + "_avg2"] = self[label + "_mc_avg2_set"].mean(dim="mc") if conf_ints: - new_chunks = ( - len(conf_ints), self[label + '_mc_set'].chunks[1]) - avg_axis_avg2 = self[label + '_mc_set'].get_axis_num('mc') + new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[1]) + avg_axis_avg2 = self[label + "_mc_set"].get_axis_num("mc") - qq = self[label + '_mc_avg2_set'].data.map_blocks( - lambda x: np.percentile( - x, q=conf_ints, axis=avg_axis_avg2), + qq = self[label + "_mc_avg2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), chunks=new_chunks, # drop_axis=avg_axis_avg2, # avg dimensions are dropped from input arr new_axis=0, - dtype=float) # The new CI dimension is added as + dtype=float, + ) # The new CI dimension is added as # firsaxis - self[label + '_mc_avg2'] = (('CI', x_dim2), qq) + self[label + "_mc_avg2"] = (("CI", x_dim2), qq) # Weighted mean of the forward and backward tmpw_var = 1 / ( - 1 / self['tmpf_mc' + '_avgsec_var'] - + 1 / self['tmpb_mc' + '_avgsec_var']) + 1 / self["tmpf_mc" + "_avgsec_var"] + 1 / self["tmpb_mc" + "_avgsec_var"] + ) q = ( - self['tmpf_mc_set'] - / self['tmpf_mc' + '_avgsec_var'] - + self['tmpb_mc_set'] - / self['tmpb_mc' + '_avgsec_var']) * tmpw_var + self["tmpf_mc_set"] / self["tmpf_mc" + "_avgsec_var"] + + self["tmpb_mc_set"] / self["tmpb_mc" + "_avgsec_var"] + ) * tmpw_var - self["tmpw" + '_mc_set'] = q # + self["tmpw" + "_mc_set"] = q # # self["tmpw"] = self["tmpw" + '_mc_set'].mean(dim='mc') - self["tmpw" + '_avgsec'] = \ - (self['tmpf_avgsec'] / - self['tmpf_mc' + '_avgsec_var'] + - self['tmpb_avgsec'] / - self['tmpb_mc' + '_avgsec_var'] - ) * tmpw_var + self["tmpw" + "_avgsec"] = ( + self["tmpf_avgsec"] / self["tmpf_mc" + "_avgsec_var"] + + self["tmpb_avgsec"] / self["tmpb_mc" + "_avgsec_var"] + ) * tmpw_var - q = self["tmpw" + '_mc_set'] - self["tmpw" + '_avgsec'] - self["tmpw" + '_mc' + '_avgsec_var'] = q.var( - dim='mc', ddof=1) + q = self["tmpw" + "_mc_set"] - self["tmpw" + "_avgsec"] + self["tmpw" + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) if ci_avg_time_flag1: - self["tmpw" + '_avg1'] = \ - self["tmpw" + '_avgsec'].mean(dim=time_dim2) + self["tmpw" + "_avg1"] = self["tmpw" + "_avgsec"].mean(dim=time_dim2) - self["tmpw" + '_mc_avg1_var'] = \ - self["tmpw" + '_mc_set'].var(dim=['mc', time_dim2]) + self["tmpw" + "_mc_avg1_var"] = self["tmpw" + "_mc_set"].var( + dim=["mc", time_dim2] + ) if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis = self["tmpw" + '_mc_set'].get_axis_num( - ['mc', time_dim2]) - q2 = self["tmpw" + '_mc_set'].data.map_blocks( + avg_axis = self["tmpw" + "_mc_set"].get_axis_num(["mc", time_dim2]) + q2 = self["tmpw" + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks drop_axis=avg_axis, # avg dimensions are dropped new_axis=0, - dtype=float) # The new CI dimension is added as + dtype=float, + ) # The new CI dimension is added as # first axis - self["tmpw" + '_mc_avg1'] = (('CI', x_dim2), q2) + self["tmpw" + "_mc_avg1"] = (("CI", x_dim2), q2) if ci_avg_time_flag2: tmpw_var_avg2 = 1 / ( - 1 / self['tmpf_mc_avg2_var'] - + 1 / self['tmpb_mc_avg2_var']) + 1 / self["tmpf_mc_avg2_var"] + 1 / self["tmpb_mc_avg2_var"] + ) - q = (self['tmpf_mc_avg2_set'] / - self['tmpf_mc_avg2_var'] + - self['tmpb_mc_avg2_set'] / - self['tmpb_mc_avg2_var']) * \ - tmpw_var_avg2 + q = ( + self["tmpf_mc_avg2_set"] / self["tmpf_mc_avg2_var"] + + self["tmpb_mc_avg2_set"] / self["tmpb_mc_avg2_var"] + ) * tmpw_var_avg2 - self["tmpw" + '_mc_avg2_set'] = q # + self["tmpw" + "_mc_avg2_set"] = q # - self["tmpw" + '_avg2'] = \ - (self['tmpf_avg2'] / - self['tmpf_mc_avg2_var'] + - self['tmpb_avg2'] / - self['tmpb_mc_avg2_var'] - ) * tmpw_var_avg2 + self["tmpw" + "_avg2"] = ( + self["tmpf_avg2"] / self["tmpf_mc_avg2_var"] + + self["tmpb_avg2"] / self["tmpb_mc_avg2_var"] + ) * tmpw_var_avg2 - self["tmpw" + '_mc_avg2_var'] = \ - tmpw_var_avg2 + self["tmpw" + "_mc_avg2_var"] = tmpw_var_avg2 if conf_ints: # We first need to know the x-dim-chunk-size new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis_avg2 = self["tmpw" - + '_mc_avg2_set'].get_axis_num('mc') - q2 = self["tmpw" + '_mc_avg2_set'].data.map_blocks( - lambda x: np.percentile( - x, q=conf_ints, axis=avg_axis_avg2), + avg_axis_avg2 = self["tmpw" + "_mc_avg2_set"].get_axis_num("mc") + q2 = self["tmpw" + "_mc_avg2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), chunks=new_chunks_weighted, # Explicitly define output chunks drop_axis=avg_axis_avg2, # avg dimensions are dropped new_axis=0, - dtype=float) # The new CI dimension is added as firstax - self["tmpw" + '_mc_avg2'] = (('CI', x_dim2), q2) + dtype=float, + ) # The new CI dimension is added as firstax + self["tmpw" + "_mc_avg2"] = (("CI", x_dim2), q2) if ci_avg_x_flag1: - self["tmpw" + '_avgx1'] = \ - self["tmpw" + '_avgsec'].mean(dim=x_dim2) + self["tmpw" + "_avgx1"] = self["tmpw" + "_avgsec"].mean(dim=x_dim2) - self["tmpw" + '_mc_avgx1_var'] = \ - self["tmpw" + '_mc_set'].var(dim=x_dim2) + self["tmpw" + "_mc_avgx1_var"] = self["tmpw" + "_mc_set"].var(dim=x_dim2) if conf_ints: new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis = self["tmpw" + '_mc_set'].get_axis_num( - ['mc', x_dim2]) - q2 = self["tmpw" + '_mc_set'].data.map_blocks( + avg_axis = self["tmpw" + "_mc_set"].get_axis_num(["mc", x_dim2]) + q2 = self["tmpw" + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks_weighted, # Explicitly define output chunks drop_axis=avg_axis, # avg dimensions are dropped new_axis=0, - dtype=float) # The new CI dimension is added as + dtype=float, + ) # The new CI dimension is added as # first axis - self["tmpw" + '_mc_avgx1'] = (('CI', time_dim2), q2) + self["tmpw" + "_mc_avgx1"] = (("CI", time_dim2), q2) if ci_avg_x_flag2: tmpw_var_avgx2 = 1 / ( - 1 / self['tmpf_mc_avgx2_var'] - + 1 / self['tmpb_mc_avgx2_var']) + 1 / self["tmpf_mc_avgx2_var"] + 1 / self["tmpb_mc_avgx2_var"] + ) - q = (self['tmpf_mc_avgx2_set'] / - self['tmpf_mc_avgx2_var'] + - self['tmpb_mc_avgx2_set'] / - self['tmpb_mc_avgx2_var']) * \ - tmpw_var_avgx2 + q = ( + self["tmpf_mc_avgx2_set"] / self["tmpf_mc_avgx2_var"] + + self["tmpb_mc_avgx2_set"] / self["tmpb_mc_avgx2_var"] + ) * tmpw_var_avgx2 - self["tmpw" + '_mc_avgx2_set'] = q # + self["tmpw" + "_mc_avgx2_set"] = q # - self["tmpw" + '_avgx2'] = \ - (self['tmpf_avgx2'] / - self['tmpf_mc_avgx2_var'] + - self['tmpb_avgx2'] / - self['tmpb_mc_avgx2_var'] - ) * tmpw_var_avgx2 + self["tmpw" + "_avgx2"] = ( + self["tmpf_avgx2"] / self["tmpf_mc_avgx2_var"] + + self["tmpb_avgx2"] / self["tmpb_mc_avgx2_var"] + ) * tmpw_var_avgx2 - self["tmpw" + '_mc_avgx2_var'] = \ - tmpw_var_avgx2 + self["tmpw" + "_mc_avgx2_var"] = tmpw_var_avgx2 if conf_ints: # We first need to know the x-dim-chunk-size new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis_avgx2 = self["tmpw" - + '_mc_avgx2_set'].get_axis_num('mc') - q2 = self["tmpw" + '_mc_avgx2_set'].data.map_blocks( - lambda x: np.percentile( - x, q=conf_ints, axis=avg_axis_avgx2), + avg_axis_avgx2 = self["tmpw" + "_mc_avgx2_set"].get_axis_num("mc") + q2 = self["tmpw" + "_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx2), chunks=new_chunks_weighted, # Explicitly define output chunks drop_axis=avg_axis_avgx2, # avg dimensions are dropped new_axis=0, - dtype=float) # The new CI dimension is added as firstax - self["tmpw" + '_mc_avgx2'] = (('CI', time_dim2), q2) + dtype=float, + ) # The new CI dimension is added as firstax + self["tmpw" + "_mc_avgx2"] = (("CI", time_dim2), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: remove_mc_set = [ - 'r_st', 'r_ast', 'r_rst', 'r_rast', 'gamma_mc', 'alpha_mc', - 'df_mc', 'db_mc', 'x_avg', 'time_avg', 'mc'] + "r_st", + "r_ast", + "r_rst", + "r_rast", + "gamma_mc", + "alpha_mc", + "df_mc", + "db_mc", + "x_avg", + "time_avg", + "mc", + ] for i in ["tmpf", "tmpb", "tmpw"]: - remove_mc_set.append(i + '_avgsec') - remove_mc_set.append(i + '_mc_set') - remove_mc_set.append(i + '_mc_avg2_set') - remove_mc_set.append(i + '_mc_avgx2_set') - remove_mc_set.append(i + '_mc_avgsec_var') + remove_mc_set.append(i + "_avgsec") + remove_mc_set.append(i + "_mc_set") + remove_mc_set.append(i + "_mc_avg2_set") + remove_mc_set.append(i + "_mc_avgx2_set") + remove_mc_set.append(i + "_mc_avgsec_var") if self.trans_att.size: remove_mc_set.append('talpha"_fw_mc') @@ -4997,34 +5146,33 @@ def temperature_residuals(self, label=None, sections=None): The residuals as DataArray """ resid_temp = self.ufunc_per_section( - sections=sections, label=label, temp_err=True, calc_per='all') - resid_x = self.ufunc_per_section( - sections=sections, label='x', calc_per='all') + sections=sections, label=label, temp_err=True, calc_per="all" + ) + resid_x = self.ufunc_per_section(sections=sections, label="x", calc_per="all") - resid_ix = np.array( - [np.argmin(np.abs(ai - self.x.data)) for ai in resid_x]) + resid_ix = np.array([np.argmin(np.abs(ai - self.x.data)) for ai in resid_x]) resid_sorted = np.full(shape=self[label].shape, fill_value=np.nan) resid_sorted[resid_ix, :] = resid_temp resid_da = xr.DataArray( data=resid_sorted, - dims=('x', 'time'), - coords={ - 'x': self.x, - 'time': self.time}) + dims=("x", "time"), + coords={"x": self.x, "time": self.time}, + ) return resid_da def ufunc_per_section( - self, - sections=None, - func=None, - label=None, - subtract_from_label=None, - temp_err=False, - x_indices=False, - ref_temp_broadcasted=False, - calc_per='stretch', - **func_kwargs): + self, + sections=None, + func=None, + label=None, + subtract_from_label=None, + temp_err=False, + x_indices=False, + ref_temp_broadcasted=False, + calc_per="stretch", + **func_kwargs, + ): """ User function applied to parts of the cable. Super useful, many options and slightly @@ -5154,7 +5302,7 @@ def func(a): """ return a - elif isinstance(func, str) and func == 'var': + elif isinstance(func, str) and func == "var": def func(a): """ @@ -5172,12 +5320,15 @@ def func(a): else: assert callable(func) - assert calc_per in ['all', 'section', 'stretch'] + assert calc_per in ["all", "section", "stretch"] - if not x_indices and \ - ((label and hasattr(self[label].data, 'chunks')) or - (subtract_from_label and hasattr(self[subtract_from_label].data, - 'chunks'))): + if not x_indices and ( + (label and hasattr(self[label].data, "chunks")) + or ( + subtract_from_label + and hasattr(self[subtract_from_label].data, "chunks") + ) + ): concat = da.concatenate else: concat = np.concatenate @@ -5192,10 +5343,9 @@ def func(a): assert not temp_err assert not ref_temp_broadcasted # so it is slicable with x-indices - self['_x_indices'] = self.x.astype(int) * 0 + \ - np.arange(self.x.size) - arg1 = self['_x_indices'].sel(x=stretch).data - del self['_x_indices'] + self["_x_indices"] = self.x.astype(int) * 0 + np.arange(self.x.size) + arg1 = self["_x_indices"].sel(x=stretch).data + del self["_x_indices"] else: arg1 = self[label].sel(x=stretch).data @@ -5227,32 +5377,32 @@ def func(a): # calculate std wrt mean value out[k].append(arg1) - if calc_per == 'stretch': + if calc_per == "stretch": out[k] = [func(argi, **func_kwargs) for argi in out[k]] - elif calc_per == 'section': + elif calc_per == "section": # flatten the out_dict to sort them start = [i.start for i in section] i_sorted = np.argsort(start) out_flat_sort = [out[k][i] for i in i_sorted] out[k] = func(concat(out_flat_sort), **func_kwargs) - if calc_per == 'all': + if calc_per == "all": # flatten the out_dict to sort them - start = [ - item.start - for sublist in sections.values() - for item in sublist] + start = [item.start for sublist in sections.values() for item in sublist] i_sorted = np.argsort(start) out_flat = [item for sublist in out.values() for item in sublist] out_flat_sort = [out_flat[i] for i in i_sorted] out = func(concat(out_flat_sort, axis=0), **func_kwargs) - if (hasattr(out, 'chunks') and len(out.chunks) > 0 and - 'x' in self[label].dims): + if ( + hasattr(out, "chunks") + and len(out.chunks) > 0 + and "x" in self[label].dims + ): # also sum the chunksize in the x dimension # first find out where the x dim is - ixdim = self[label].dims.index('x') + ixdim = self[label].dims.index("x") c_old = out.chunks c_new = list(c_old) c_new[ixdim] = sum(c_old[ixdim]) @@ -5261,9 +5411,7 @@ def func(a): return out def resample_datastore(*args, **kwargs): - raise "ds.resample_datastore() is deprecated. Use " \ - "from dtscalibration import DataStore; DataStore(ds.resample()) " \ - "instead. See example notebook 2." + raise "ds.resample_datastore() is deprecated. Use " "from dtscalibration import DataStore; DataStore(ds.resample()) " "instead. See example notebook 2." class ParameterIndexDoubleEnded: @@ -5297,13 +5445,9 @@ def __init__(self, nt, nx, nta, fix_gamma=False, fix_alpha=False): @property def all(self): - return np.concatenate(( - self.gamma, - self.df, - self.db, - self.alpha, - self.ta.flatten(order='F') - )) + return np.concatenate( + (self.gamma, self.df, self.db, self.alpha, self.ta.flatten(order="F")) + ) @property def npar(self): @@ -5486,7 +5630,9 @@ class ParameterIndexSingleEnded: """ def __init__(self, nt, nx, nta, includes_alpha=False, includes_dalpha=True): - assert not (includes_alpha and includes_dalpha), "Cannot hold both dalpha and alpha" + assert not ( + includes_alpha and includes_dalpha + ), "Cannot hold both dalpha and alpha" self.nt = nt self.nx = nx self.nta = nta @@ -5495,13 +5641,9 @@ def __init__(self, nt, nx, nta, includes_alpha=False, includes_dalpha=True): @property def all(self): - return np.concatenate(( - self.gamma, - self.dalpha, - self.alpha, - self.c, - self.ta.flatten(order='F') - )) + return np.concatenate( + (self.gamma, self.dalpha, self.alpha, self.c, self.ta.flatten(order="F")) + ) @property def npar(self): @@ -5545,14 +5687,17 @@ def taf(self): # ta = p_val[-nt * nta:].reshape((nt, nta), order='F') # self["talpha"] = (('time', 'trans_att'), ta[:, :]) if self.includes_alpha: - return np.arange(1 + self.nx + self.nt, 1 + self.nx + self.nt + self.nt * self.nta - ).reshape((self.nt, self.nta), order='F') + return np.arange( + 1 + self.nx + self.nt, 1 + self.nx + self.nt + self.nt * self.nta + ).reshape((self.nt, self.nta), order="F") elif self.includes_dalpha: - return np.arange(1 + 1 + self.nt, 1 + 1 + self.nt + self.nt * self.nta - ).reshape((self.nt, self.nta), order='F') + return np.arange( + 1 + 1 + self.nt, 1 + 1 + self.nt + self.nt * self.nta + ).reshape((self.nt, self.nta), order="F") else: - return np.arange(1 + self.nt, 1 + self.nt + self.nt * self.nta - ).reshape((self.nt, self.nta), order='F') + return np.arange(1 + self.nt, 1 + self.nt + self.nt * self.nta).reshape( + (self.nt, self.nta), order="F" + ) def get_taf_pars(self, pval): if self.nta > 0: @@ -5582,7 +5727,10 @@ def get_taf_values(self, pval, x, trans_att, axis=""): pval = pval.flatten() assert pval.shape == (self.npar,) arr = self.get_taf_pars(pval) - assert arr.shape == (self.nta, self.nt, ) + assert arr.shape == ( + self.nta, + self.nt, + ) for tai, taxi in zip(arr, trans_att): arr_out[x >= taxi] += tai @@ -5609,21 +5757,22 @@ def get_taf_values(self, pval, x, trans_att, axis=""): def open_datastore( - filename_or_obj, - group=None, - decode_cf=True, - mask_and_scale=None, - decode_times=True, - concat_characters=True, - decode_coords=True, - engine=None, - chunks=None, - lock=None, - cache=None, - drop_variables=None, - backend_kwargs=None, - load_in_memory=False, - **kwargs): + filename_or_obj, + group=None, + decode_cf=True, + mask_and_scale=None, + decode_times=True, + concat_characters=True, + decode_coords=True, + engine=None, + chunks=None, + lock=None, + cache=None, + drop_variables=None, + backend_kwargs=None, + load_in_memory=False, + **kwargs, +): """Load and decode a datastore from a file or file-like object. Parameters ---------- @@ -5709,17 +5858,26 @@ def open_datastore( chunks = {} with xr.open_dataset( - filename_or_obj, group=group, decode_cf=decode_cf, - mask_and_scale=mask_and_scale, decode_times=decode_times, - concat_characters=concat_characters, decode_coords=decode_coords, - engine=engine, chunks=chunks, lock=lock, cache=cache, - drop_variables=drop_variables, - backend_kwargs=backend_kwargs) as ds_xr: + filename_or_obj, + group=group, + decode_cf=decode_cf, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + engine=engine, + chunks=chunks, + lock=lock, + cache=cache, + drop_variables=drop_variables, + backend_kwargs=backend_kwargs, + ) as ds_xr: ds = DataStore( data_vars=ds_xr.data_vars, coords=ds_xr.coords, attrs=ds_xr.attrs, - **ds_kwargs) + **ds_kwargs, + ) # to support deprecated st_labels ds = ds.rename_labels(assertion=False) @@ -5734,11 +5892,8 @@ def open_datastore( def open_mf_datastore( - path=None, - paths=None, - combine='by_coords', - load_in_memory=False, - **kwargs): + path=None, paths=None, combine="by_coords", load_in_memory=False, **kwargs +): """ Open a datastore from multiple netCDF files. This script assumes the datastore was split along the time dimension. But only variables with a @@ -5764,11 +5919,10 @@ def open_mf_datastore( if paths is None: paths = sorted(glob.glob(path)) - assert paths, 'No files match found with: ' + path + assert paths, "No files match found with: " + path with open_mfdataset(paths=paths, combine=combine, **kwargs) as xds: - ds = DataStore( - data_vars=xds.data_vars, coords=xds.coords, attrs=xds.attrs) + ds = DataStore(data_vars=xds.data_vars, coords=xds.coords, attrs=xds.attrs) # to support deprecated st_labels ds = ds.rename_labels(assertion=False) @@ -5783,14 +5937,15 @@ def open_mf_datastore( def read_silixa_files( - filepathlist=None, - directory=None, - zip_handle=None, - file_ext='*.xml', - timezone_netcdf='UTC', - silent=False, - load_in_memory='auto', - **kwargs): + filepathlist=None, + directory=None, + zip_handle=None, + file_ext="*.xml", + timezone_netcdf="UTC", + silent=False, + load_in_memory="auto", + **kwargs, +): """Read a folder with measurement files. Each measurement file contains values for a single timestep. Remember to check which timezone you are working in. @@ -5820,26 +5975,25 @@ def read_silixa_files( The newly created datastore. """ - assert 'timezone_input_files' not in kwargs, 'The silixa files are ' \ - 'already timezone aware' + assert "timezone_input_files" not in kwargs, ( + "The silixa files are " "already timezone aware" + ) if filepathlist is None and zip_handle is None: filepathlist = sorted(glob.glob(os.path.join(directory, file_ext))) # Make sure that the list of files contains any files - assert len( - filepathlist) >= 1, 'No measurement files found in provided ' \ - 'directory: \n' + \ - str(directory) + assert ( + len(filepathlist) >= 1 + ), "No measurement files found in provided " "directory: \n" + str(directory) elif filepathlist is None and zip_handle: - filepathlist = ziphandle_to_filepathlist( - fh=zip_handle, extension=file_ext) + filepathlist = ziphandle_to_filepathlist(fh=zip_handle, extension=file_ext) # Make sure that the list of files contains any files - assert len( - filepathlist) >= 1, 'No measurement files found in provided ' \ - 'list/directory' + assert len(filepathlist) >= 1, ( + "No measurement files found in provided " "list/directory" + ) xml_version = silixa_xml_version_check(filepathlist) @@ -5848,7 +6002,8 @@ def read_silixa_files( filepathlist, timezone_netcdf=timezone_netcdf, silent=silent, - load_in_memory=load_in_memory) + load_in_memory=load_in_memory, + ) elif xml_version in (6, 7, 8): data_vars, coords, attrs = read_silixa_files_routine_v6( @@ -5856,18 +6011,19 @@ def read_silixa_files( xml_version=xml_version, timezone_netcdf=timezone_netcdf, silent=silent, - load_in_memory=load_in_memory) + load_in_memory=load_in_memory, + ) else: raise NotImplementedError( - 'Silixa xml version ' + '{0} not implemented'.format(xml_version)) + "Silixa xml version " + "{0} not implemented".format(xml_version) + ) ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) return ds -def read_sensortran_files( - directory, timezone_netcdf='UTC', silent=False, **kwargs): +def read_sensortran_files(directory, timezone_netcdf="UTC", silent=False, **kwargs): """Read a folder with measurement files. Each measurement file contains values for a single timestep. Remember to check which timezone you are working in. @@ -5891,23 +6047,21 @@ def read_sensortran_files( The newly created datastore. """ - filepathlist_dts = sorted( - glob.glob(os.path.join(directory, '*BinaryRawDTS.dat'))) + filepathlist_dts = sorted(glob.glob(os.path.join(directory, "*BinaryRawDTS.dat"))) # Make sure that the list of files contains any files - assert len( - filepathlist_dts) >= 1, 'No RawDTS measurement files found ' \ - 'in provided directory: \n' + \ - str(directory) + assert ( + len(filepathlist_dts) >= 1 + ), "No RawDTS measurement files found " "in provided directory: \n" + str(directory) - filepathlist_temp = [f.replace('RawDTS', 'Temp') for f in filepathlist_dts] + filepathlist_temp = [f.replace("RawDTS", "Temp") for f in filepathlist_dts] for ii, fname in enumerate(filepathlist_dts): # Check if corresponding temperature file exists if not os.path.isfile(filepathlist_temp[ii]): raise FileNotFoundError( - 'Could not find BinaryTemp ' - + 'file corresponding to {}'.format(fname)) + "Could not find BinaryTemp " + "file corresponding to {}".format(fname) + ) version = sensortran_binary_version_check(filepathlist_dts) @@ -5916,25 +6070,27 @@ def read_sensortran_files( filepathlist_dts, filepathlist_temp, timezone_netcdf=timezone_netcdf, - silent=silent) + silent=silent, + ) else: raise NotImplementedError( - 'Sensortran binary version ' - + '{0} not implemented'.format(version)) + "Sensortran binary version " + "{0} not implemented".format(version) + ) ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) return ds def read_apsensing_files( - filepathlist=None, - directory=None, - file_ext='*.xml', - timezone_netcdf='UTC', - timezone_input_files='UTC', - silent=False, - load_in_memory='auto', - **kwargs): + filepathlist=None, + directory=None, + file_ext="*.xml", + timezone_netcdf="UTC", + timezone_input_files="UTC", + silent=False, + load_in_memory="auto", + **kwargs, +): """Read a folder with measurement files. Each measurement file contains values for a single timestep. Remember to check which timezone you are working in. @@ -5969,57 +6125,59 @@ def read_apsensing_files( datastore : DataStore The newly created datastore. """ - if not file_ext == '*.xml': - raise NotImplementedError('Only .xml files are supported for now') + if not file_ext == "*.xml": + raise NotImplementedError("Only .xml files are supported for now") if filepathlist is None: filepathlist = sorted(glob.glob(os.path.join(directory, file_ext))) # Make sure that the list of files contains any files - assert len( - filepathlist) >= 1, 'No measurement files found in provided ' \ - 'directory: \n' + \ - str(directory) + assert ( + len(filepathlist) >= 1 + ), "No measurement files found in provided " "directory: \n" + str(directory) # Make sure that the list of files contains any files - assert len( - filepathlist) >= 1, 'No measurement files found in provided ' \ - 'list/directory' + assert len(filepathlist) >= 1, ( + "No measurement files found in provided " "list/directory" + ) device = apsensing_xml_version_check(filepathlist) - valid_devices = ['CP320'] + valid_devices = ["CP320"] if device in valid_devices: pass else: warnings.warn( - 'AP sensing device ' + "AP sensing device " '"{0}"'.format(device) - + ' has not been tested.\nPlease open an issue on github' - + ' and provide an example file') + + " has not been tested.\nPlease open an issue on github" + + " and provide an example file" + ) data_vars, coords, attrs = read_apsensing_files_routine( filepathlist, timezone_netcdf=timezone_netcdf, silent=silent, - load_in_memory=load_in_memory) + load_in_memory=load_in_memory, + ) ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) return ds def read_sensornet_files( - filepathlist=None, - directory=None, - file_ext='*.ddf', - timezone_netcdf='UTC', - timezone_input_files='UTC', - silent=False, - add_internal_fiber_length=50., - fiber_length=None, - **kwargs): + filepathlist=None, + directory=None, + file_ext="*.ddf", + timezone_netcdf="UTC", + timezone_input_files="UTC", + silent=False, + add_internal_fiber_length=50.0, + fiber_length=None, + **kwargs, +): """Read a folder with measurement files. Each measurement file contains values for a single timestep. Remember to check which timezone you are working in. @@ -6065,41 +6223,44 @@ def read_sensornet_files( if filepathlist is None: # Also look for files in sub-folders filepathlist_unsorted = glob.glob( - os.path.join(directory, '**', file_ext), recursive=True) + os.path.join(directory, "**", file_ext), recursive=True + ) # Make sure that the list of files contains any files - msg = 'No measurement files found in provided directory: \n' + str( - directory) + msg = "No measurement files found in provided directory: \n" + str(directory) assert len(filepathlist_unsorted) >= 1, msg # sort based on dates in filesname. A simple sorted() is not sufficient # as month folders do not sort well basenames = [os.path.basename(fp) for fp in filepathlist_unsorted] - dates = [''.join(bn.split(' ')[2:4]) for bn in basenames] + dates = ["".join(bn.split(" ")[2:4]) for bn in basenames] i_sort = np.argsort(dates) filepathlist = [filepathlist_unsorted[i] for i in i_sort] # Check measurements are all from same channel - chno = [bn.split(' ')[1] for bn in basenames] - assert len( - set(chno) - ) == 1, 'Folder contains measurements from multiple channels' + chno = [bn.split(" ")[1] for bn in basenames] + assert ( + len(set(chno)) == 1 + ), "Folder contains measurements from multiple channels" # Make sure that the list of files contains any files - assert len( - filepathlist) >= 1, 'No measurement files found in provided ' \ - 'list/directory' + assert len(filepathlist) >= 1, ( + "No measurement files found in provided " "list/directory" + ) ddf_version = sensornet_ddf_version_check(filepathlist) valid_versions = [ - 'Halo DTS v1*', 'ORYX F/W v1.02 Oryx Data Collector v3*', - 'ORYX F/W v4.00 Oryx Data Collector v3*', "Sentinel DTS v5*"] + "Halo DTS v1*", + "ORYX F/W v1.02 Oryx Data Collector v3*", + "ORYX F/W v4.00 Oryx Data Collector v3*", + "Sentinel DTS v5*", + ] valid = any([fnmatch.fnmatch(ddf_version, v_) for v_ in valid_versions]) if valid: - if fnmatch.fnmatch(ddf_version, 'Halo DTS v1*'): + if fnmatch.fnmatch(ddf_version, "Halo DTS v1*"): flip_reverse_measurements = True elif fnmatch.fnmatch(ddf_version, "Sentinel DTS v5*"): flip_reverse_measurements = True @@ -6109,10 +6270,11 @@ def read_sensornet_files( else: flip_reverse_measurements = False warnings.warn( - 'Sensornet .dff version ' + "Sensornet .dff version " '"{0}"'.format(ddf_version) - + ' has not been tested.\nPlease open an issue on github' - + ' and provide an example file') + + " has not been tested.\nPlease open an issue on github" + + " and provide an example file" + ) data_vars, coords, attrs = read_sensornet_files_routine_v3( filepathlist, @@ -6121,7 +6283,8 @@ def read_sensornet_files( silent=silent, add_internal_fiber_length=add_internal_fiber_length, fiber_length=fiber_length, - flip_reverse_measurements=flip_reverse_measurements) + flip_reverse_measurements=flip_reverse_measurements, + ) ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) return ds @@ -6133,4 +6296,4 @@ def func_fit(p, xs): def func_cost(p, data, xs): fit = func_fit(p, xs) - return np.sum((fit - data)**2) + return np.sum((fit - data) ** 2) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index b0918e60..7156017c 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -23,15 +23,21 @@ def check_dims(ds, labels, correct_dims=None): """ if not correct_dims: - assert len(labels) > 1, 'Define the correct dimensions' + assert len(labels) > 1, "Define the correct dimensions" for li in labels[1:]: - assert ds[labels[0]].dims == ds[li].dims, li + ' doesnot have the correct dimensions.' \ - ' Should be ' + str(ds[labels[0]].dims) + assert ( + ds[labels[0]].dims == ds[li].dims + ), li + " doesnot have the correct dimensions." " Should be " + str( + ds[labels[0]].dims + ) else: for li in labels: - assert ds[li].dims == correct_dims, li + ' doesnot have the correct dimensions. ' \ - 'Should be ' + str(correct_dims) + assert ( + ds[li].dims == correct_dims + ), li + " doesnot have the correct dimensions. " "Should be " + str( + correct_dims + ) def get_netcdf_encoding(ds, zlib=True, complevel=5, **kwargs): @@ -75,22 +81,24 @@ def check_timestep_allclose(ds, eps=0.01): ------- """ - dim = ds.channel_configuration['chfw']['acquisitiontime_label'] + dim = ds.channel_configuration["chfw"]["acquisitiontime_label"] dt = ds[dim].data dtmin = dt.min() dtmax = dt.max() dtavg = (dtmin + dtmax) / 2 - assert (dtmax - dtmin) / dtavg < eps, 'Acquisition time is Forward channel not equal for all ' \ - 'time steps' + assert (dtmax - dtmin) / dtavg < eps, ( + "Acquisition time is Forward channel not equal for all " "time steps" + ) if ds.is_double_ended: - dim = ds.channel_configuration['chbw']['acquisitiontime_label'] + dim = ds.channel_configuration["chbw"]["acquisitiontime_label"] dt = ds[dim].data dtmin = dt.min() dtmax = dt.max() dtavg = (dtmin + dtmax) / 2 - assert (dtmax - dtmin) / dtavg < eps, 'Acquisition time Backward channel is not equal ' \ - 'for all time steps' + assert (dtmax - dtmin) / dtavg < eps, ( + "Acquisition time Backward channel is not equal " "for all time steps" + ) def merge_double_ended(ds_fw, ds_bw, cable_length, plot_result=True, verbose=True): @@ -119,38 +127,36 @@ def merge_double_ended(ds_fw, ds_bw, cable_length, plot_result=True, verbose=Tru ds : DataStore object With the two channels merged """ - assert (ds_fw.attrs['isDoubleEnded'] == '0' - and ds_bw.attrs['isDoubleEnded'] == '0'), \ - "(one of the) input DataStores is already double ended" + assert ( + ds_fw.attrs["isDoubleEnded"] == "0" and ds_bw.attrs["isDoubleEnded"] == "0" + ), "(one of the) input DataStores is already double ended" ds_fw, ds_bw = merge_double_ended_times(ds_fw, ds_bw, verbose=verbose) ds = ds_fw.copy() ds_bw = ds_bw.copy() - ds_bw['x'] = cable_length - ds_bw.x.values + ds_bw["x"] = cable_length - ds_bw.x.values # TODO: check if reindexing matters, and should be used. # one way to do it is performed below, but this could create artifacts x_resolution = ds.x.values[1] - ds.x.values[0] - ds_bw = ds_bw.reindex( - {'x': ds.x}, method='nearest', tolerance=0.99 * x_resolution) + ds_bw = ds_bw.reindex({"x": ds.x}, method="nearest", tolerance=0.99 * x_resolution) - ds_bw = ds_bw.sortby('x') + ds_bw = ds_bw.sortby("x") - ds['rst'] = (['x', 'time'], ds_bw.st.values) - ds['rast'] = (['x', 'time'], ds_bw.ast.values) + ds["rst"] = (["x", "time"], ds_bw.st.values) + ds["rast"] = (["x", "time"], ds_bw.ast.values) - ds = ds.dropna(dim='x') + ds = ds.dropna(dim="x") - ds.attrs['isDoubleEnded'] = '1' - ds['userAcquisitionTimeBW'] = ( - 'time', ds_bw['userAcquisitionTimeFW'].values) + ds.attrs["isDoubleEnded"] = "1" + ds["userAcquisitionTimeBW"] = ("time", ds_bw["userAcquisitionTimeFW"].values) if plot_result: _, ax = plt.subplots() - ds['st'].isel(time=0).plot(ax=ax, label='Stokes forward') - ds['rst'].isel(time=0).plot(ax=ax, label='Stokes backward') + ds["st"].isel(time=0).plot(ax=ax, label="Stokes forward") + ds["rst"].isel(time=0).plot(ax=ax, label="Stokes backward") ax.legend() return ds @@ -203,17 +209,28 @@ def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True) DataStore object representing the backward measurement channel with only times for which there is also a ds_fw measurement """ - if 'forward channel' in ds_fw.attrs and 'forward channel' in ds_bw.attrs: - assert ds_fw.attrs['forward channel'] < ds_bw.attrs['forward channel'], "ds_fw and ds_bw are swapped" - elif 'forwardMeasurementChannel' in ds_fw.attrs and 'forwardMeasurementChannel' in ds_bw.attrs: - assert ds_fw.attrs['forwardMeasurementChannel'] < ds_bw.attrs['forwardMeasurementChannel'], \ - "ds_fw and ds_bw are swapped" + if "forward channel" in ds_fw.attrs and "forward channel" in ds_bw.attrs: + assert ( + ds_fw.attrs["forward channel"] < ds_bw.attrs["forward channel"] + ), "ds_fw and ds_bw are swapped" + elif ( + "forwardMeasurementChannel" in ds_fw.attrs + and "forwardMeasurementChannel" in ds_bw.attrs + ): + assert ( + ds_fw.attrs["forwardMeasurementChannel"] + < ds_bw.attrs["forwardMeasurementChannel"] + ), "ds_fw and ds_bw are swapped" # Are all dt's within 1.5 seconds from one another? - if (ds_bw.time.size == ds_fw.time.size) and np.all(ds_bw.time.values > ds_fw.time.values): + if (ds_bw.time.size == ds_fw.time.size) and np.all( + ds_bw.time.values > ds_fw.time.values + ): if verify_timedeltas: - dt_ori = (ds_bw.time.values - ds_fw.time.values) / np.array(1000000000, dtype='timedelta64[ns]') - dt_all_close = np.allclose(dt_ori, dt_ori[0], atol=1.5, rtol=0.) + dt_ori = (ds_bw.time.values - ds_fw.time.values) / np.array( + 1000000000, dtype="timedelta64[ns]" + ) + dt_all_close = np.allclose(dt_ori, dt_ori[0], atol=1.5, rtol=0.0) else: dt_all_close = True @@ -228,7 +245,9 @@ def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True) times_all = dict(sorted(({**times_fw, **times_bw}).items())) times_all_val = list(times_all.values()) - for (direction, ind), (direction_next, ind_next) in zip(times_all_val[:-1], times_all_val[1:]): + for (direction, ind), (direction_next, ind_next) in zip( + times_all_val[:-1], times_all_val[1:] + ): if direction == "fw" and direction_next == "bw": iuse_chfw.append(ind) iuse_chbw.append(ind_next) @@ -238,29 +257,39 @@ def merge_double_ended_times(ds_fw, ds_bw, verify_timedeltas=True, verbose=True) elif direction == "fw" and direction_next == "fw": if verbose: - print(f"Missing backward measurement beween {ds_fw.time.values[ind]} and {ds_fw.time.values[ind_next]}") + print( + f"Missing backward measurement beween {ds_fw.time.values[ind]} and {ds_fw.time.values[ind_next]}" + ) elif direction == "bw" and direction_next == "bw": if verbose: - print(f"Missing forward measurement beween {ds_bw.time.values[ind]} and {ds_bw.time.values[ind_next]}") + print( + f"Missing forward measurement beween {ds_bw.time.values[ind]} and {ds_bw.time.values[ind_next]}" + ) # throw out is dt differs from its neighbors if verify_timedeltas: dt = ( - (ds_bw.isel(time=iuse_chbw).time.values - ds_fw.isel(time=iuse_chfw).time.values) / - np.timedelta64(1, "s")) + ds_bw.isel(time=iuse_chbw).time.values + - ds_fw.isel(time=iuse_chfw).time.values + ) / np.timedelta64(1, "s") leaveout = np.zeros_like(dt, dtype=bool) - leaveout[1:-1] = np.isclose(dt[:-2], dt[2:], atol=1.5, rtol=0.) * ~np.isclose(dt[:-2], dt[1:-1], atol=1.5, rtol=0.) + leaveout[1:-1] = np.isclose(dt[:-2], dt[2:], atol=1.5, rtol=0.0) * ~np.isclose( + dt[:-2], dt[1:-1], atol=1.5, rtol=0.0 + ) iuse_chfw2 = np.array(iuse_chfw)[~leaveout] iuse_chbw2 = np.array(iuse_chbw)[~leaveout] if verbose: - for itfw, itbw in zip(np.array(iuse_chfw)[leaveout], np.array(iuse_chbw)[leaveout]): + for itfw, itbw in zip( + np.array(iuse_chfw)[leaveout], np.array(iuse_chbw)[leaveout] + ): print( "The following measurements do not belong together, as the time difference\n" "between the\forward and backward measurements is more than 1.5 seconds\n" "larger than the neighboring measurements.\n" - f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values}") + f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values}" + ) else: iuse_chfw2 = iuse_chfw @@ -329,27 +358,26 @@ def shift_double_ended(ds, i_shift, verbose=True): # TMP2 = ds.tmp.data[i_shift:] d2_coords = dict(ds.coords) - d2_coords['x'] = (('x',), x2, ds.x.attrs) + d2_coords["x"] = (("x",), x2, ds.x.attrs) d2_data = dict(ds.data_vars) for k in ds.data_vars: - if 'x' in ds[k].dims and k in d2_data: + if "x" in ds[k].dims and k in d2_data: del d2_data[k] - new_data = (('st', st), ('ast', ast), ('rst', rst), ('rast', rast)) + new_data = (("st", st), ("ast", ast), ("rst", rst), ("rast", rast)) for k, v in new_data: d2_data[k] = (ds[k].dims, v, ds[k].attrs) not_included = [k for k in ds.data_vars if k not in d2_data] - if (not_included and verbose): - print('I dont know what to do with the following data', not_included) + if not_included and verbose: + print("I dont know what to do with the following data", not_included) return DataStore(data_vars=d2_data, coords=d2_coords, attrs=ds.attrs) -def suggest_cable_shift_double_ended( - ds, irange, plot_result=True, **fig_kwargs): +def suggest_cable_shift_double_ended(ds, irange, plot_result=True, **fig_kwargs): """The cable length was initially configured during the DTS measurement. For double ended measurements it is important to enter the correct length so that the forward channel and the backward channel are aligned. @@ -421,12 +449,12 @@ def suggest_cable_shift_double_ended( att_dif1 = np.diff(att, n=1, axis=0) att_x_dif1 = 0.5 * x2[1:] + 0.5 * x2[:-1] - err1_mask = np.logical_and(att_x_dif1 > 1., att_x_dif1 < 150.) + err1_mask = np.logical_and(att_x_dif1 > 1.0, att_x_dif1 < 150.0) err1.append(np.nansum(np.abs(att_dif1[err1_mask]))) att_dif2 = np.diff(att, n=2, axis=0) att_x_dif2 = x2[1:-1] - err2_mask = np.logical_and(att_x_dif2 > 1., att_x_dif2 < 150.) + err2_mask = np.logical_and(att_x_dif2 > 1.0, att_x_dif2 < 150.0) err2.append(np.nansum(np.abs(att_dif2[err2_mask]))) ishift1 = irange[np.argmin(err1, axis=0)] @@ -437,14 +465,14 @@ def suggest_cable_shift_double_ended( fig_kwargs = {} f, (ax0, ax1) = plt.subplots(2, 1, sharex=False, **fig_kwargs) - f.suptitle(f'best shift is {ishift1} or {ishift2}') + f.suptitle(f"best shift is {ishift1} or {ishift2}") dt = ds.isel(time=0) x = dt.x.data y = dt.st.data - ax0.plot(x, y, label='ST original') + ax0.plot(x, y, label="ST original") y = dt.rst.data - ax0.plot(x, y, label='REV-ST original') + ax0.plot(x, y, label="REV-ST original") dtsh1 = shift_double_ended(dt, ishift1) dtsh2 = shift_double_ended(dt, ishift2) @@ -452,19 +480,21 @@ def suggest_cable_shift_double_ended( x2 = dtsh2.x.data y1 = dtsh1.rst.data y2 = dtsh2.rst.data - ax0.plot(x1, y1, label=f'ST i_shift={ishift1}') - ax0.plot(x2, y2, label=f'ST i_shift={ishift2}') - ax0.set_xlabel('x (m)') + ax0.plot(x1, y1, label=f"ST i_shift={ishift1}") + ax0.plot(x2, y2, label=f"ST i_shift={ishift2}") + ax0.set_xlabel("x (m)") ax0.legend() ax2 = ax1.twinx() - ax1.plot(irange, err1, c='red', label='1 deriv') - ax2.plot(irange, err2, c='blue', label='2 deriv') + ax1.plot(irange, err1, c="red", label="1 deriv") + ax2.plot(irange, err2, c="blue", label="2 deriv") ax1.axvline( - ishift1, c='red', linewidth=0.8, label=f'1 deriv. i_shift={ishift1}') + ishift1, c="red", linewidth=0.8, label=f"1 deriv. i_shift={ishift1}" + ) ax2.axvline( - ishift2, c='blue', linewidth=0.8, label=f'2 deriv. i_shift={ishift1}') - ax1.set_xlabel('i_shift') + ishift2, c="blue", linewidth=0.8, label=f"2 deriv. i_shift={ishift1}" + ) + ax1.set_xlabel("i_shift") ax1.legend(loc=2) # left axis ax2.legend(loc=1) # right axis diff --git a/src/dtscalibration/io.py b/src/dtscalibration/io.py index 1a7130c2..db10c5bd 100644 --- a/src/dtscalibration/io.py +++ b/src/dtscalibration/io.py @@ -13,125 +13,117 @@ # The keys refer to the naming used in the raw files. # TODO: attrs for st_var and use it in parse_st_var function _dim_attrs = { - ('x', 'distance'): - dict( - name='distance', - description='Length along fiber', - long_description='Starting at connector of forward channel', - units='m'), - ('tmp', 'temperature'): - dict( - name='tmp', - description='Temperature calibrated by device', - units=r'$^\circ$C'), - ('st',): dict(name='st', description='Stokes intensity', units='-'), - ('ast',): dict(name='ast', description='anti-Stokes intensity', units='-'), - ('rst',): - dict(name='rst', description='reverse Stokes intensity', units='-'), - ('rast',): - dict( - name='rast', - description='reverse anti-Stokes intensity', - units='-'), - ('tmpf',): - dict( - name='tmpf', - description='Temperature estimated using the Stokes and anti-Stokes from the Forward channel', - units=r'$^\circ$C'), - ('tmpb',): - dict( - name='tmpb', - description='Temperature estimated using the Stokes and anti-Stokes from the Backward channel', - units=r'$^\circ$C'), - ('tmpw',): - dict( - name='tmpw', - description='Temperature estimated using the Stokes and anti-Stokes from both the Forward and Backward channel.', - units=r'$^\circ$C'), - ('tmpf_var',): - dict( - name='tmpf_var', - description='Uncertainty variance in tmpf estimated with linear-error propagation', - units=r'$^\circ$C$^2$'), - ('tmpb_var',): - dict( - name='tmpb_var', - description='Uncertainty variance in tmpb estimated with linear-error propagation', - units=r'$^\circ$C$^2$'), - ('tmpw_var',): - dict( - name='tmpw_var', - description='Uncertainty variance in tmpw estimated with linear-error propagation', - units=r'$^\circ$C$^2$'), - ('tmpw_var_lower',): - dict( - name='tmpw_var_lower', - description='Lower bound of uncertainty variance in tmpw estimated with linear-error propagation. ' - 'Excludes the parameter uncertainties.', - units=r'$^\circ$C$^2$'), - ('tmpw_var_approx',): - dict( - name='tmpw_var_upper', - description='Upper bound of uncertainty variance in tmpw estimated with linear-error propagation. ' - 'Excludes the correlation between tmpf and tmpb caused by alpha.', - units=r'$^\circ$C$^2$'), - ('tmpf_mc_var',): - dict( - name='tmpf_mc_var', - description='Uncertainty variance in tmpf estimated with Monte Carlo', - units=r'$^\circ$C$^2$'), - ('tmpb_mc_var',): - dict( - name='tmpb_mc_var', - description='Uncertainty variance in tmpb estimated with Monte Carlo', - units=r'$^\circ$C$^2$'), - ('tmpw_mc_var',): - dict( - name='tmpw_mc_var', - description='Uncertainty variance in tmpw estimated with Monte Carlo', - units=r'$^\circ$C$^2$'), - ('acquisitionTime',): - dict( - name='acquisitionTime', - description='Measurement duration of forward channel', - long_description='Actual measurement duration of forward ' - 'channel', - units='seconds'), - ('userAcquisitionTimeFW',): - dict( - name='userAcquisitionTimeFW', - description='Measurement duration of forward channel', - long_description='Desired measurement duration of forward ' - 'channel', - units='seconds'), - ('userAcquisitionTimeBW',): - dict( - name='userAcquisitionTimeBW', - description='Measurement duration of backward channel', - long_description='Desired measurement duration of backward ' - 'channel', - units='seconds'), - ('trans_att',): - dict( - name='Locations introducing transient directional differential ' - 'attenuation', - description='Locations along the x-dimension introducing transient ' - 'directional differential attenuation', - long_description='Connectors introduce additional differential ' - 'attenuation that is different for the forward ' - 'and backward direction, and varies over time.', - units='m')} + ("x", "distance"): dict( + name="distance", + description="Length along fiber", + long_description="Starting at connector of forward channel", + units="m", + ), + ("tmp", "temperature"): dict( + name="tmp", description="Temperature calibrated by device", units=r"$^\circ$C" + ), + ("st",): dict(name="st", description="Stokes intensity", units="-"), + ("ast",): dict(name="ast", description="anti-Stokes intensity", units="-"), + ("rst",): dict(name="rst", description="reverse Stokes intensity", units="-"), + ("rast",): dict( + name="rast", description="reverse anti-Stokes intensity", units="-" + ), + ("tmpf",): dict( + name="tmpf", + description="Temperature estimated using the Stokes and anti-Stokes from the Forward channel", + units=r"$^\circ$C", + ), + ("tmpb",): dict( + name="tmpb", + description="Temperature estimated using the Stokes and anti-Stokes from the Backward channel", + units=r"$^\circ$C", + ), + ("tmpw",): dict( + name="tmpw", + description="Temperature estimated using the Stokes and anti-Stokes from both the Forward and Backward channel.", + units=r"$^\circ$C", + ), + ("tmpf_var",): dict( + name="tmpf_var", + description="Uncertainty variance in tmpf estimated with linear-error propagation", + units=r"$^\circ$C$^2$", + ), + ("tmpb_var",): dict( + name="tmpb_var", + description="Uncertainty variance in tmpb estimated with linear-error propagation", + units=r"$^\circ$C$^2$", + ), + ("tmpw_var",): dict( + name="tmpw_var", + description="Uncertainty variance in tmpw estimated with linear-error propagation", + units=r"$^\circ$C$^2$", + ), + ("tmpw_var_lower",): dict( + name="tmpw_var_lower", + description="Lower bound of uncertainty variance in tmpw estimated with linear-error propagation. " + "Excludes the parameter uncertainties.", + units=r"$^\circ$C$^2$", + ), + ("tmpw_var_approx",): dict( + name="tmpw_var_upper", + description="Upper bound of uncertainty variance in tmpw estimated with linear-error propagation. " + "Excludes the correlation between tmpf and tmpb caused by alpha.", + units=r"$^\circ$C$^2$", + ), + ("tmpf_mc_var",): dict( + name="tmpf_mc_var", + description="Uncertainty variance in tmpf estimated with Monte Carlo", + units=r"$^\circ$C$^2$", + ), + ("tmpb_mc_var",): dict( + name="tmpb_mc_var", + description="Uncertainty variance in tmpb estimated with Monte Carlo", + units=r"$^\circ$C$^2$", + ), + ("tmpw_mc_var",): dict( + name="tmpw_mc_var", + description="Uncertainty variance in tmpw estimated with Monte Carlo", + units=r"$^\circ$C$^2$", + ), + ("acquisitionTime",): dict( + name="acquisitionTime", + description="Measurement duration of forward channel", + long_description="Actual measurement duration of forward " "channel", + units="seconds", + ), + ("userAcquisitionTimeFW",): dict( + name="userAcquisitionTimeFW", + description="Measurement duration of forward channel", + long_description="Desired measurement duration of forward " "channel", + units="seconds", + ), + ("userAcquisitionTimeBW",): dict( + name="userAcquisitionTimeBW", + description="Measurement duration of backward channel", + long_description="Desired measurement duration of backward " "channel", + units="seconds", + ), + ("trans_att",): dict( + name="Locations introducing transient directional differential " "attenuation", + description="Locations along the x-dimension introducing transient " + "directional differential attenuation", + long_description="Connectors introduce additional differential " + "attenuation that is different for the forward " + "and backward direction, and varies over time.", + units="m", + ), +} # Because variations in the names exist between the different file formats. The # tuple as key contains the possible keys, which is expanded below. dim_attrs = {k: v for kl, v in _dim_attrs.items() for k in kl} dim_attrs_apsensing = dict(dim_attrs) -dim_attrs_apsensing['TEMP'] = dim_attrs_apsensing.pop('tmp') -dim_attrs_apsensing['TEMP']['name'] = 'TEMP' -dim_attrs_apsensing.pop('acquisitionTime') -dim_attrs_apsensing.pop('userAcquisitionTimeFW') -dim_attrs_apsensing.pop('userAcquisitionTimeBW') +dim_attrs_apsensing["TEMP"] = dim_attrs_apsensing.pop("tmp") +dim_attrs_apsensing["TEMP"]["name"] = "TEMP" +dim_attrs_apsensing.pop("acquisitionTime") +dim_attrs_apsensing.pop("userAcquisitionTimeFW") +dim_attrs_apsensing.pop("userAcquisitionTimeBW") @contextmanager @@ -160,13 +152,13 @@ def silixa_xml_version_check(filepathlist): """ - sep = ':' + sep = ":" attrs = read_silixa_attrs_singlefile(filepathlist[0], sep) - version_string = attrs['customData:SystemSettings:softwareVersion'] + version_string = attrs["customData:SystemSettings:softwareVersion"] # Get major version from string. Tested for Ultima v4, v6, v7 XT-DTS v6 - major_version = int(version_string.replace(' ', '').split(':')[-1][0]) + major_version = int(version_string.replace(" ", "").split(":")[-1][0]) return major_version @@ -183,10 +175,10 @@ def apsensing_xml_version_check(filepathlist): """ - sep = ':' + sep = ":" attrs, _ = read_apsensing_attrs_singlefile(filepathlist[0], sep) - return attrs['wellbore:uid'] + return attrs["wellbore:uid"] def sensornet_ddf_version_check(filepathlist): @@ -205,14 +197,15 @@ def sensornet_ddf_version_check(filepathlist): # Obtain metadata fro mthe first file _, meta = read_sensornet_single(filepathlist[0]) - if 'Software version number' in meta.keys(): - version_string = meta['Software version number'] + if "Software version number" in meta.keys(): + version_string = meta["Software version number"] else: raise ValueError( - 'Software version number could not be detected in .ddf file' - + 'Either file is corrupted or not supported') + "Software version number could not be detected in .ddf file" + + "Either file is corrupted or not supported" + ) - ddf_version = version_string.replace(',', '.') + ddf_version = version_string.replace(",", ".") return ddf_version @@ -230,19 +223,20 @@ def sensortran_binary_version_check(filepathlist): """ fname = filepathlist[0] - with open(fname, 'rb') as f: + with open(fname, "rb") as f: f.read(2) - version = struct.unpack(' 0., '`fiber_length` is not defined. Use key' \ - 'word argument in read function.' + \ - str(fiber_length) + assert fiber_length > 0.0, ( + "`fiber_length` is not defined. Use key" + "word argument in read function." + str(fiber_length) + ) fiber_start_index = (np.abs(xraw + add_internal_fiber_length)).argmin() fiber_0_index = np.abs(xraw).argmin() @@ -970,24 +988,21 @@ def read_sensornet_files_routine_v3( fiber_n_indices = fiber_1_index - fiber_0_index fiber_n_indices_internal = fiber_0_index - fiber_start_index if double_ended_flag: - fiber_end_index = np.min( - [xraw.size, fiber_1_index + fiber_n_indices_internal]) + fiber_end_index = np.min([xraw.size, fiber_1_index + fiber_n_indices_internal]) else: fiber_end_index = fiber_1_index if double_ended_flag: if not flip_reverse_measurements: # fiber length how the backward channel is aligned - fiber_length_raw = float(meta['fibre end']) + fiber_length_raw = float(meta["fibre end"]) fiber_bw_1_index = np.abs(xraw - fiber_length_raw).argmin() fiber_bw_end_index = np.min( - [ - xraw.size, fiber_bw_1_index + - (fiber_end_index - fiber_1_index)]) + [xraw.size, fiber_bw_1_index + (fiber_end_index - fiber_1_index)] + ) fiber_bw_start_index = np.max( - [ - 0, fiber_bw_1_index - fiber_n_indices - - fiber_n_indices_internal]) + [0, fiber_bw_1_index - fiber_n_indices - fiber_n_indices_internal] + ) REV_ST = REV_ST[fiber_bw_start_index:fiber_bw_end_index] REV_AST = REV_AST[fiber_bw_start_index:fiber_bw_end_index] @@ -995,13 +1010,14 @@ def read_sensornet_files_routine_v3( else: # Use the fiber indices from the forward channel n_indices_internal_left = fiber_0_index - fiber_start_index - n_indices_internal_right = np.max( - [0, fiber_end_index - fiber_1_index]) + n_indices_internal_right = np.max([0, fiber_end_index - fiber_1_index]) n_indices_internal_shortest = np.min( - [n_indices_internal_left, n_indices_internal_right]) + [n_indices_internal_left, n_indices_internal_right] + ) fiber_start_index = fiber_0_index - n_indices_internal_shortest - fiber_end_index = fiber_0_index + fiber_n_indices + \ - n_indices_internal_shortest + fiber_end_index = ( + fiber_0_index + fiber_n_indices + n_indices_internal_shortest + ) REV_ST = REV_ST[fiber_end_index:fiber_start_index:-1] REV_AST = REV_AST[fiber_end_index:fiber_start_index:-1] @@ -1011,83 +1027,102 @@ def read_sensornet_files_routine_v3( AST = AST[fiber_start_index:fiber_end_index] data_vars = { - 'st': (['x', 'time'], ST, dim_attrs['st']), - 'ast': (['x', 'time'], AST, dim_attrs['ast']), - 'tmp': (['x', 'time'], TMP, dim_attrs['tmp']), - 'probe1Temperature': - ( - 'time', probe1temperature, { - 'name': 'Probe 1 temperature', - 'description': 'reference probe 1 ' - 'temperature', - 'units': r'$^\circ$C'}), - 'probe2Temperature': - ( - 'time', probe2temperature, { - 'name': 'Probe 2 temperature', - 'description': 'reference probe 2 ' - 'temperature', - 'units': r'$^\circ$C'}), - 'referenceTemperature': - ( - 'time', referenceTemperature, { - 'name': 'reference temperature', - 'description': 'Internal reference ' - 'temperature', - 'units': r'$^\circ$C'}), - 'gamma_ddf': - ( - 'time', gamma_ddf, { - 'name': 'gamma ddf', - 'description': 'machine ' - 'calibrated gamma', - 'units': '-'}), - 'k_internal': - ( - 'time', k_internal, { - 'name': 'k internal', - 'description': 'machine calibrated ' - 'internal k', - 'units': '-'}), - 'k_external': - ( - 'time', k_external, { - 'name': 'reference temperature', - 'description': 'machine calibrated ' - 'external k', - 'units': '-'}), - 'userAcquisitionTimeFW': - ('time', acquisitiontimeFW, dim_attrs['userAcquisitionTimeFW']), - 'userAcquisitionTimeBW': - ('time', acquisitiontimeBW, dim_attrs['userAcquisitionTimeBW'])} + "st": (["x", "time"], ST, dim_attrs["st"]), + "ast": (["x", "time"], AST, dim_attrs["ast"]), + "tmp": (["x", "time"], TMP, dim_attrs["tmp"]), + "probe1Temperature": ( + "time", + probe1temperature, + { + "name": "Probe 1 temperature", + "description": "reference probe 1 " "temperature", + "units": r"$^\circ$C", + }, + ), + "probe2Temperature": ( + "time", + probe2temperature, + { + "name": "Probe 2 temperature", + "description": "reference probe 2 " "temperature", + "units": r"$^\circ$C", + }, + ), + "referenceTemperature": ( + "time", + referenceTemperature, + { + "name": "reference temperature", + "description": "Internal reference " "temperature", + "units": r"$^\circ$C", + }, + ), + "gamma_ddf": ( + "time", + gamma_ddf, + { + "name": "gamma ddf", + "description": "machine " "calibrated gamma", + "units": "-", + }, + ), + "k_internal": ( + "time", + k_internal, + { + "name": "k internal", + "description": "machine calibrated " "internal k", + "units": "-", + }, + ), + "k_external": ( + "time", + k_external, + { + "name": "reference temperature", + "description": "machine calibrated " "external k", + "units": "-", + }, + ), + "userAcquisitionTimeFW": ( + "time", + acquisitiontimeFW, + dim_attrs["userAcquisitionTimeFW"], + ), + "userAcquisitionTimeBW": ( + "time", + acquisitiontimeBW, + dim_attrs["userAcquisitionTimeBW"], + ), + } if double_ended_flag: - data_vars['rst'] = (['x', 'time'], REV_ST, dim_attrs['rst']) - data_vars['rast'] = (['x', 'time'], REV_AST, dim_attrs['rast']) + data_vars["rst"] = (["x", "time"], REV_ST, dim_attrs["rst"]) + data_vars["rast"] = (["x", "time"], REV_AST, dim_attrs["rast"]) filenamelist = [os.path.split(f)[-1] for f in filepathlist] - coords = { - 'x': ('x', x, dim_attrs['x']), - 'filename': ('time', filenamelist)} + coords = {"x": ("x", x, dim_attrs["x"]), "filename": ("time", filenamelist)} - dtFW = data_vars['userAcquisitionTimeFW'][1].astype('timedelta64[s]') - dtBW = data_vars['userAcquisitionTimeBW'][1].astype('timedelta64[s]') + dtFW = data_vars["userAcquisitionTimeFW"][1].astype("timedelta64[s]") + dtBW = data_vars["userAcquisitionTimeBW"][1].astype("timedelta64[s]") if not double_ended_flag: tcoords = coords_time( - np.array(timestamp).astype('datetime64[ns]'), + np.array(timestamp).astype("datetime64[ns]"), timezone_netcdf=timezone_netcdf, timezone_input_files=timezone_input_files, dtFW=dtFW, - double_ended_flag=double_ended_flag) + double_ended_flag=double_ended_flag, + ) else: tcoords = coords_time( - np.array(timestamp).astype('datetime64[ns]'), + np.array(timestamp).astype("datetime64[ns]"), timezone_netcdf=timezone_netcdf, timezone_input_files=timezone_input_files, dtFW=dtFW, dtBW=dtBW, - double_ended_flag=double_ended_flag) + double_ended_flag=double_ended_flag, + ) coords.update(tcoords) @@ -1132,26 +1167,24 @@ def metakey(meta, dict_to_parse, prefix): for key in dict_to_parse: if prefix == "": - - prefix_parse = key.replace('@', '') + prefix_parse = key.replace("@", "") else: - prefix_parse = sep.join([prefix, key.replace('@', '')]) + prefix_parse = sep.join([prefix, key.replace("@", "")]) - if prefix_parse == sep.join(('logData', 'data')): + if prefix_parse == sep.join(("logData", "data")): # skip the LAF , ST data continue - if hasattr(dict_to_parse[key], 'keys'): + if hasattr(dict_to_parse[key], "keys"): # Nested dictionaries, flatten hierarchy. meta.update(metakey(meta, dict_to_parse[key], prefix_parse)) elif isinstance(dict_to_parse[key], list): # if the key has values for the multiple channels for ival, val in enumerate(dict_to_parse[key]): - num_key = prefix_parse + '_' + str(ival) + num_key = prefix_parse + "_" + str(ival) meta.update(metakey(meta, val, num_key)) else: - meta[prefix_parse] = dict_to_parse[key] return meta @@ -1159,19 +1192,17 @@ def metakey(meta, dict_to_parse, prefix): with open_file(filename) as fh: doc_ = xmltodict.parse(fh.read()) - if 'wellLogs' in doc_.keys(): - doc = doc_['wellLogs']['wellLog'] + if "wellLogs" in doc_.keys(): + doc = doc_["wellLogs"]["wellLog"] else: - doc = doc_['logs']['log'] + doc = doc_["logs"]["log"] - return metakey({}, doc, '') + return metakey({}, doc, "") def read_sensortran_files_routine( - filepathlist_dts, - filepathlist_temp, - timezone_netcdf='UTC', - silent=False): + filepathlist_dts, filepathlist_temp, timezone_netcdf="UTC", silent=False +): """ Internal routine that reads sensortran files. Use dtscalibration.read_sensortran_files function instead. @@ -1197,34 +1228,32 @@ def read_sensortran_files_routine( attrs = meta_dts # Add standardised required attributes - attrs['isDoubleEnded'] = '0' + attrs["isDoubleEnded"] = "0" - attrs['forwardMeasurementChannel'] = meta_dts['channel_id'] - 1 - attrs['backwardMeasurementChannel'] = 'N/A' + attrs["forwardMeasurementChannel"] = meta_dts["channel_id"] - 1 + attrs["backwardMeasurementChannel"] = "N/A" # obtain basic data info - nx = meta_temp['num_points'] + nx = meta_temp["num_points"] ntime = len(filepathlist_dts) # print summary if not silent: - print( - '%s files were found,' % ntime - + ' each representing a single timestep') - print('Recorded at %s points along the cable' % nx) + print("%s files were found," % ntime + " each representing a single timestep") + print("Recorded at %s points along the cable" % nx) - print('The measurement is single ended') + print("The measurement is single ended") # Gather data # x has already been read. should not change over time - x = data_temp['x'] + x = data_temp["x"] # Define all variables referenceTemperature = np.zeros(ntime) acquisitiontimeFW = np.ones(ntime) - timestamp = [''] * ntime + timestamp = [""] * ntime ST = np.zeros((nx, ntime), dtype=np.int32) AST = np.zeros((nx, ntime), dtype=np.int32) TMP = np.zeros((nx, ntime)) @@ -1236,73 +1265,92 @@ def read_sensortran_files_routine( data_dts, meta_dts = read_sensortran_single(filepathlist_dts[ii]) data_temp, meta_temp = read_sensortran_single(filepathlist_temp[ii]) - timestamp[ii] = data_dts['time'] + timestamp[ii] = data_dts["time"] - referenceTemperature[ii] = data_temp['reference_temperature'] - 273.15 + referenceTemperature[ii] = data_temp["reference_temperature"] - 273.15 - ST[:, ii] = data_dts['st'][:nx] - AST[:, ii] = data_dts['ast'][:nx] + ST[:, ii] = data_dts["st"][:nx] + AST[:, ii] = data_dts["ast"][:nx] # The TMP can vary by 1 or 2 datapoints, dynamically assign the values - TMP[:meta_temp['num_points'], ii] = data_temp['tmp'][:nx] + TMP[: meta_temp["num_points"], ii] = data_temp["tmp"][:nx] - zero_index = (meta_dts['num_points'] - nx) // 2 - ST_zero[ii] = np.mean(data_dts['st'][nx + zero_index:]) - AST_zero[ii] = np.mean(data_dts['ast'][nx + zero_index:]) + zero_index = (meta_dts["num_points"] - nx) // 2 + ST_zero[ii] = np.mean(data_dts["st"][nx + zero_index :]) + AST_zero[ii] = np.mean(data_dts["ast"][nx + zero_index :]) data_vars = { - 'st': (['x', 'time'], ST, dim_attrs['st']), - 'ast': (['x', 'time'], AST, dim_attrs['ast']), - 'tmp': - ( - ['x', 'time'], TMP, { - 'name': 'tmp', - 'description': 'Temperature calibrated by device', - 'units': meta_temp['y_units']}), - 'referenceTemperature': - ( - 'time', referenceTemperature, { - 'name': 'reference temperature', - 'description': 'Internal reference ' - 'temperature', - 'units': r'$^\circ$C'}), - 'st_zero': - ( - ['time'], ST_zero, { - 'name': 'ST_zero', - 'description': 'Stokes zero count', - 'units': meta_dts['y_units']}), - 'ast_zero': - ( - ['time'], AST_zero, { - 'name': 'AST_zero', - 'description': 'anit-Stokes zero count', - 'units': meta_dts['y_units']}), - 'userAcquisitionTimeFW': - ('time', acquisitiontimeFW, dim_attrs['userAcquisitionTimeFW'])} + "st": (["x", "time"], ST, dim_attrs["st"]), + "ast": (["x", "time"], AST, dim_attrs["ast"]), + "tmp": ( + ["x", "time"], + TMP, + { + "name": "tmp", + "description": "Temperature calibrated by device", + "units": meta_temp["y_units"], + }, + ), + "referenceTemperature": ( + "time", + referenceTemperature, + { + "name": "reference temperature", + "description": "Internal reference " "temperature", + "units": r"$^\circ$C", + }, + ), + "st_zero": ( + ["time"], + ST_zero, + { + "name": "ST_zero", + "description": "Stokes zero count", + "units": meta_dts["y_units"], + }, + ), + "ast_zero": ( + ["time"], + AST_zero, + { + "name": "AST_zero", + "description": "anit-Stokes zero count", + "units": meta_dts["y_units"], + }, + ), + "userAcquisitionTimeFW": ( + "time", + acquisitiontimeFW, + dim_attrs["userAcquisitionTimeFW"], + ), + } filenamelist_dts = [os.path.split(f)[-1] for f in filepathlist_dts] filenamelist_temp = [os.path.split(f)[-1] for f in filepathlist_temp] coords = { - 'x': - ( - 'x', x, { - 'name': 'distance', - 'description': 'Length along fiber', - 'long_description': - 'Starting at connector ' + 'of forward channel', - 'units': 'm'}), - 'filename': ('time', filenamelist_dts), - 'filename_temp': ('time', filenamelist_temp)} - - dtFW = data_vars['userAcquisitionTimeFW'][1].astype('timedelta64[s]') + "x": ( + "x", + x, + { + "name": "distance", + "description": "Length along fiber", + "long_description": "Starting at connector " + "of forward channel", + "units": "m", + }, + ), + "filename": ("time", filenamelist_dts), + "filename_temp": ("time", filenamelist_temp), + } + + dtFW = data_vars["userAcquisitionTimeFW"][1].astype("timedelta64[s]") tcoords = coords_time( - np.array(timestamp).astype('datetime64[ns]'), + np.array(timestamp).astype("datetime64[ns]"), timezone_netcdf=timezone_netcdf, - timezone_input_files='UTC', + timezone_input_files="UTC", dtFW=dtFW, - double_ended_flag=False) + double_ended_flag=False, + ) coords.update(tcoords) @@ -1327,54 +1375,51 @@ def read_sensortran_single(fname): meta = {} data = {} - with open(fname, 'rb') as f: - meta['survey_type'] = struct.unpack('