From 2d90428f5e7976ad7f7fc3e2d7a5cab720446662 Mon Sep 17 00:00:00 2001 From: Joshua Bloom Date: Thu, 9 Mar 2023 13:06:20 -0800 Subject: [PATCH] Harmonic oscillation reduction [JSB version] (#313) - Add the ability compute L-S models on normalized data. - Adds periodogram to the outputs (not just the features computed on the periodogram) Completed work by S. Jamal in PR https://github.com/cesium-ml/cesium/pull/285 Co-authored-by: Sara Jamal Co-authored-by: Ari Crellin-Quick --- cesium/features/lomb_scargle.py | 181 +++++++++++++----- .../tests/test_lomb_scargle_features.py | 35 ++++ 2 files changed, 173 insertions(+), 43 deletions(-) diff --git a/cesium/features/lomb_scargle.py b/cesium/features/lomb_scargle.py index f99c3238..4d4d1e6d 100644 --- a/cesium/features/lomb_scargle.py +++ b/cesium/features/lomb_scargle.py @@ -4,11 +4,20 @@ def lomb_scargle_model( - time, signal, error, sys_err=0.05, nharm=8, nfreq=3, tone_control=5.0 + time, + signal, + error, + sys_err=0.05, + nharm=8, + nfreq=3, + tone_control=5.0, + normalize=False, + default_order=1, ): - """Simultaneous fit of a sum of sinusoids by weighted least squares: - y(t) = Sum_k Ck*t^k + Sum_i Sum_j A_ij sin(2*pi*j*fi*(t-t0)+phi_j), - i=[1,nfreq], j=[1,nharm] + """Simultaneous fit of a sum of sinusoids by weighted least squares. + + y(t) = Sum_k Ck*t^k + Sum_i Sum_j A_ij sin(2*pi*j*fi*(t-t0)+phi_j), + i=[1,nfreq], j=[1,nharm] Parameters ---------- @@ -21,11 +30,26 @@ def lomb_scargle_model( error : array_like Array containing measurement error values. - nharm : int - Number of harmonics to fit for each frequency. + sys_err : float, optional + Defaults to 0.05 + + nharm : int, optional + Number of harmonics to fit for each frequency. Defaults to 8. + + nfreq : int, optional + Number of frequencies to fit. Defaults to 3. + + tone_control : float, optional + Defaults to 5.0 + + normalize : boolean, optional + Normalize the timeseries before fitting? This can + help with instabilities seen at large values of `nharm` + Defaults to False. - nfreq : int - Number of frequencies to fit. + detrend_order : int, optional + Order of polynomial to fit to the data while + fitting the dominant frequency. Defaults to 1. Returns ------- @@ -36,13 +60,18 @@ def lomb_scargle_model( fit_lomb_scargle(...) """ - - dy0 = np.sqrt(error**2 + sys_err**2) - - wt = 1.0 / dy0**2 time = time.copy() - min(time) # speeds up lomb_scargle code to have min(time)==0 signal = signal.copy() + # Normalization + mmean = np.nanmean(signal) + mscale = np.nanstd(signal) + if normalize: + signal = (signal - mmean) / mscale + error = error / mscale + + dy0 = np.sqrt(error**2 + sys_err**2) + wt = 1.0 / dy0**2 chi0 = np.dot(signal**2, wt) # TODO parametrize? @@ -57,42 +86,46 @@ def lomb_scargle_model( 8, ] # these numbers "fix" the strange-amplitude effect for i in range(nfreq): + fit = fit_lomb_scargle( + time, + signal, + dy0, + f0, + df, + numf, + tone_control=tone_control, + lambda0_range=lambda0_range, + nharm=nharm, + detrend_order=default_order if i == 0 else 0, + ) if i == 0: - fit = fit_lomb_scargle( - time, - signal, - dy0, - f0, - df, - numf, - tone_control=tone_control, - lambda0_range=lambda0_range, - nharm=nharm, - detrend_order=1, - ) model_dict["trend"] = fit["trend_coef"][1] - else: - fit = fit_lomb_scargle( - time, - signal, - dy0, - f0, - df, - numf, - tone_control=tone_control, - lambda0_range=lambda0_range, - nharm=nharm, - detrend_order=0, - ) - model_dict["freq_fits"].append(fit) - signal -= fit["model"] - model_dict["freq_fits"][-1]["resid"] = signal.copy() + + # remove current model from the (unscaled) signal + # so that the next iteration fits to the residual + norm_residual = signal - fit["model"] + signal = norm_residual + + # store results for the current estimated frequency + # store the rescaled model if user requested normalization + # and it's the first dominant frequency + current_fit = ( + rescale_lomb_model(fit, mmean, mscale) if (normalize & (i == 0)) else fit + ) + model_dict["freq_fits"].append(current_fit) + model_dict["freq_fits"][-1]["resid"] = norm_residual + + if normalize & (i == 0): + model_dict["freq_fits"][-1]["resid"] *= mscale + if i == 0: - model_dict["varrat"] = np.dot(signal**2, wt) / chi0 + model_dict["varrat"] = ( + np.dot(norm_residual**2, wt) / chi0 + ) # no need to rescale model_dict["nfreq"] = nfreq model_dict["nharm"] = nharm - model_dict["chi2"] = fit["chi2"] + model_dict["chi2"] = current_fit["chi2"] model_dict["f0"] = f0 model_dict["df"] = df model_dict["numf"] = numf @@ -100,6 +133,48 @@ def lomb_scargle_model( return model_dict +def rescale_lomb_model(norm_model, mmean, mscale): + """Rescale Lomb-Scargle model if compiled on normalized data. + + Parameters + ---------- + norm_model : dict + Lomb-Scargle model computed on normalized data + + mmean : float64 + mean of input_signal + + mscale : float64 + standard-deviation of input_signal + + Returns + ------- + rescaled_model: dict + rescaled Lomb-Scargle model (in adequacy with the input_signal) + + """ + rescaled_model = norm_model.copy() + # unchanged : 'psd', 'chi0', 'freq', 'chi2', 'trace', 'nu0', 'nu', 'npars', + # 'rel_phase', 'rel_phase_error', 'time0', 'signif' + for param in ["s0", "lambda"]: + rescaled_model[param] /= mscale**2 + for param in [ + "model", + "model_error", + "trend", + "trend_error", + "trend_coef", + "amplitude", + "amplitude_error", + "y_offset", + ]: + rescaled_model[param] *= mscale + for param in ["model", "trend", "trend_coef"]: + rescaled_model[param] += mmean + + return rescaled_model + + def lprob2sigma(lprob): """Translate a log_e(probability) to units of Gaussian sigmas.""" if lprob > -36.0: @@ -131,7 +206,8 @@ def fit_lomb_scargle( lomb_scargle_model in order to produce a fit with multiple distinct frequencies. - Inputs: + Parameters + ---------- time : array_like Array containing time values. @@ -304,6 +380,12 @@ def fit_lomb_scargle( out_dict["nu"] = ntime - npars out_dict["npars"] = npars + allfreqs = [ + f0 + df * ii + (ifreq / freq_zoom - 1 / 2.0) * df for ii in range(len(psd)) + ] + out_dict["freqs_vector"] = np.asarray(allfreqs) + out_dict["psd_vector"] = psd + A0, B0 = soln[0:nharm], soln[nharm:] hat_hat /= np.outer( np.hstack(((1.0 + ii) ** 2, (1.0 + ii) ** 2)), @@ -439,3 +521,16 @@ def get_lomb_trend(lomb_model): def get_lomb_y_offset(lomb_model): """Get the y-intercept of a fitted Lomb-Scargle model.""" return lomb_model["freq_fits"][0]["y_offset"] + + +def get_lomb_psd(lomb_model): + """ + Get the power spectrum of a fitted Lomb-Scargle model per fitted frequency. + """ + dict_psd = {} + for i in range(lomb_model["nfreq"]): + dict_psd[i] = { + "freqs_vector": lomb_model["freq_fits"][i]["freqs_vector"], + "psd_vector": lomb_model["freq_fits"][i]["psd_vector"], + } + return dict_psd diff --git a/cesium/features/tests/test_lomb_scargle_features.py b/cesium/features/tests/test_lomb_scargle_features.py index 90890422..cf70638c 100644 --- a/cesium/features/tests/test_lomb_scargle_features.py +++ b/cesium/features/tests/test_lomb_scargle_features.py @@ -203,6 +203,41 @@ def test_lomb_scargle_regular_multi_freq(): npt.assert_array_less(10.0, all_lomb["freq1_signif"]) +def test_lomb_scargle_irregular_multi_freq_normalize(): + """Use the normalize parameter on irregularly sampled data + and make sure that we still get back the frequencies that + we expect. + """ + frequencies = WAVE_FREQS + amplitudes = np.zeros((len(frequencies), 4)) + amplitudes[:, 0] = [4, 2, 1] + phase = 0.1 + times, values, errors = irregular_periodic(frequencies, amplitudes, phase) + + sys_err = 0.15 + nfreq = len(frequencies) + tone_control = 20.0 + for nharm in range(1, 21): + model_cesium = lomb_scargle.lomb_scargle_model( + times - min(times), + values, + errors, + sys_err=sys_err, + nharm=nharm, + nfreq=nfreq, + tone_control=tone_control, + normalize=True, + ) + for i, frequency in enumerate(frequencies): + npt.assert_allclose( + frequency, model_cesium["freq_fits"][i]["freq"], rtol=1e-1 + ) + # check to see if the power spectrum is returned + assert len(model_cesium["freq_fits"][i]["freqs_vector"]) == len( + model_cesium["freq_fits"][i]["psd_vector"] + ) + + def test_lomb_scargle_irregular_multi_freq(): """Test Lomb-Scargle model features on irregularly-sampled periodic data with multiple frequencies, each with a single harmonic. More difficult than