From 592a624d65b04faad4dbab4fbff6156c04884161 Mon Sep 17 00:00:00 2001 From: Theo Heimel Date: Tue, 2 May 2023 11:28:31 -0400 Subject: [PATCH] make plots look nice, new plots for disc vs binn --- src/__main__.py | 2 +- src/loaders/prec_inn.py | 16 ++-- src/plots.py | 208 +++++++++++++++++++++++++++++++--------- 3 files changed, 172 insertions(+), 54 deletions(-) diff --git a/src/__main__.py b/src/__main__.py index 995dd20..3cd8c00 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -89,7 +89,7 @@ def main(): print(f" Classifier score: {clf_score:.7f}") print(" Creating plots") - lab_def = ["Comb.", "True", "Gen."] + lab_def = ["Comb", "Truth", "Gen"] labels = params.get('w_labels', lab_def) add_comb = params.get('add_w_comb', True) plots = Plots( diff --git a/src/loaders/prec_inn.py b/src/loaders/prec_inn.py index af15d09..361fc6d 100644 --- a/src/loaders/prec_inn.py +++ b/src/loaders/prec_inn.py @@ -5,6 +5,8 @@ from ..dataset import DiscriminatorData from ..observable import Observable +PARTICLE_NAMES = [r"\mu_1", r"\mu_2", "j_1", "j_2", "j_3"] + def load(params: dict) -> list[DiscriminatorData]: """ Loads the training, test and validation data (truth samples and generated samples) @@ -171,7 +173,7 @@ def compute_observables(true_data: np.ndarray, fake_data: np.ndarray) -> list[Ob Observable( true_data = obs_one_true[i].pt, fake_data = obs_one_fake[i].pt, - tex_label = f"p_{{T,{i}}}", + tex_label = f"p_{{T,{PARTICLE_NAMES[i]}}}", bins = np.linspace( np.min(obs_one_true[i].pt), np.quantile(obs_one_true[i].pt, 0.99), @@ -183,7 +185,7 @@ def compute_observables(true_data: np.ndarray, fake_data: np.ndarray) -> list[Ob Observable( true_data = obs_one_true[i].eta, fake_data = obs_one_fake[i].eta, - tex_label = f"\\eta_{i}", + tex_label = f"\\eta_{{{PARTICLE_NAMES[i]}}}", bins = np.linspace(-6, 6, 50), ) ]) @@ -191,7 +193,7 @@ def compute_observables(true_data: np.ndarray, fake_data: np.ndarray) -> list[Ob observables.append(Observable( true_data = obs_one_true[i].m, fake_data = obs_one_fake[i].m, - tex_label = f"M_{i}", + tex_label = f"M_{{{PARTICLE_NAMES[i]}}}", bins = np.linspace( np.quantile(obs_one_true[i].m, 0.005), np.quantile(obs_one_true[i].m, 0.995), @@ -217,19 +219,19 @@ def compute_observables(true_data: np.ndarray, fake_data: np.ndarray) -> list[Ob Observable( true_data = obs_two_true[(i,j)].delta_r, fake_data = obs_two_fake[(i,j)].delta_r, - tex_label = f"\\Delta R_{{{i},{j}}}", - bins = np.linspace(0, 12, 50), + tex_label = f"\\Delta R_{{{PARTICLE_NAMES[i]},{PARTICLE_NAMES[j]}}}", + bins = np.linspace(0, 8, 50), ), Observable( true_data = obs_two_true[(i,j)].delta_eta, fake_data = obs_two_fake[(i,j)].delta_eta, - tex_label = f"\\Delta \\eta_{{{i},{j}}}", + tex_label = f"\\Delta \\eta_{{{PARTICLE_NAMES[i]},{PARTICLE_NAMES[j]}}}", bins = np.linspace(-10, 10, 50), ), Observable( true_data = obs_two_true[(i,j)].delta_phi, fake_data = obs_two_fake[(i,j)].delta_phi, - tex_label = f"\\Delta \\phi_{{{i},{j}}}", + tex_label = f"\\Delta \\phi_{{{PARTICLE_NAMES[i]},{PARTICLE_NAMES[j]}}}", bins = np.linspace(-np.pi, np.pi, 50), ) ]) diff --git a/src/plots.py b/src/plots.py index 130565c..224cf32 100644 --- a/src/plots.py +++ b/src/plots.py @@ -5,7 +5,7 @@ import matplotlib.pyplot as plt import matplotlib as mpl from matplotlib.backends.backend_pdf import PdfPages -from scipy.stats import lognorm, norm +from scipy.stats import lognorm, norm, binned_statistic from scipy.optimize import curve_fit from .observable import Observable @@ -63,8 +63,8 @@ def __init__( def process_weights(self, weights_true, weights_fake): w_comb = np.concatenate((weights_true, weights_fake), axis=0) - self.p_low = np.percentile(w_comb[w_comb!=0], 0.05) - self.p_high = np.percentile(w_comb[w_comb!=np.inf], 99.95) + self.p_low = np.percentile(w_comb[w_comb!=0], 0.01) + self.p_high = np.percentile(w_comb[w_comb!=np.inf], 99.99) weights_true[weights_true >= self.p_high] = self.p_high weights_fake[weights_fake <= self.p_low] = self.p_low @@ -139,7 +139,7 @@ def plot_single_loss( ax.legend(loc="best", frameon=False, title=self.title) else: self.corner_text(ax, self.title, "right", "top") - plt.savefig(pdf, format="pdf", bbox_inches="tight") + plt.savefig(pdf, format="pdf") plt.close() @@ -169,6 +169,7 @@ def plot_roc(self, file: str): auc = np.trapz(x=fpr, y=tpr, axis=0) fig, ax = plt.subplots(figsize=(4,3.5)) + fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.11,0.09,1.00,1.00)) if self.bayesian: for i in range(self.weights_fake.shape[1]): ax.plot(fpr[:,i], tpr[:,i], alpha=0.3, color=self.colors[0]) @@ -186,7 +187,7 @@ def plot_roc(self, file: str): "top" ) self.corner_text(ax, self.title, "right", "bottom") - plt.savefig(file, bbox_inches="tight") + plt.savefig(file) plt.close() @@ -209,7 +210,7 @@ def plot_weight_hist(self, file: str): bins=np.linspace(0, 3, 50), xscale="linear", yscale="linear", - secax=True, + secax=False, ) self.plot_single_weight_hist( pdf, @@ -286,7 +287,8 @@ def plot_single_weight_hist( y_combined = np.histogram(weights_combined, bins=bins)[0] y_combined_err = None - fig, ax = plt.subplots(figsize=(4.5, 4.5)) + fig, ax = plt.subplots(figsize=(4, 3.5)) + fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.11,0.09,1.00,1.00)) if self.add_comb: self.hist_line( ax, @@ -337,7 +339,7 @@ def Dtow(x): if yscale == "linear": ax.set_ylim(bottom=0) ax.legend(frameon=False, title=self.title) - plt.savefig(pdf, format="pdf", bbox_inches="tight") + plt.savefig(pdf, format="pdf") plt.close() @@ -352,8 +354,14 @@ def plot_bgen_weights(self, file: str): assert self.log_gen_weights is not None with PdfPages(file) as pdf: - mean_w_bgen = np.mean(np.exp(self.log_gen_weights), axis=1) - std_w_bgen = np.std(np.exp(self.log_gen_weights), axis=1) + #mean_w_bgen = np.mean(np.exp(self.log_gen_weights), axis=1) + #std_w_bgen = np.std(np.exp(self.log_gen_weights), axis=1) + w_bgen = np.exp(self.log_gen_weights) + mean_w_bgen = np.median(w_bgen, axis=1) + std_w_bgen = ( + np.quantile(w_bgen, 0.841, axis=1) - + np.quantile(w_bgen, 0.159, axis=1) + ) / 2 std_log_w_bgen = np.std(self.log_gen_weights, axis=1) if self.bayesian: mean_w_bgen = np.repeat( @@ -368,60 +376,113 @@ def plot_bgen_weights(self, file: str): w_disc = self.weights_fake.flatten() else: w_disc = self.weights_fake - x_bins = np.linspace(0,4,30) - y_bins = np.linspace(0,3,30) - - fig, ax = plt.subplots(figsize=(4.5,4.0)) + ratio = std_w_bgen / mean_w_bgen + wbinn_bins = np.linspace(0,4,30) + wdisc_bins = np.linspace(0,3,30) + mswbg, _, _ = binned_statistic(w_disc, ratio, "median", wdisc_bins) + mswbg_stds = np.stack([ + binned_statistic(w_disc, ratio, lambda x: np.nanquantile(x, 0.159), wdisc_bins)[0], + binned_statistic(w_disc, ratio, lambda x: np.nanquantile(x, 0.841), wdisc_bins)[0] + ], axis=0) + + fig, ax = plt.subplots(figsize=(4,3.5)) + fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.11,0.09,1.00,1.00)) _, _, _, img = ax.hist2d( - std_log_w_bgen, + ratio, #std_log_w_bgen, w_disc, - bins=(x_bins, y_bins), + bins=(wbinn_bins, wdisc_bins), rasterized=True, norm = mpl.colors.LogNorm(), density=True, cmap="jet" ) - cb = plt.colorbar(img) - cb.set_label('norm.') - ax.set_xlabel(r"$\sigma(\log p_\text{gen})$") + #cb = plt.colorbar(img) + #cb.set_label('norm.') + ax.set_xlabel(r"$\sigma(p_\text{gen}) / \mu(p_\text{gen})$") ax.set_ylabel(r"$w_\text{disc}$") - plt.savefig(pdf, format="pdf", bbox_inches="tight") + plt.savefig(pdf, format="pdf") + plt.close() + + fig, axs = plt.subplots( + 2, 1, + sharex = True, + figsize = (4, 5), + gridspec_kw = {"height_ratios": (2, 1.5), "hspace": 0.00} + ) + fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.083,0.07,1.00,0.98)) + _, _, _, img = axs[0].hist2d( + w_disc, + ratio, #std_log_w_bgen, + bins=(wdisc_bins, wbinn_bins), + rasterized=True, + norm = mpl.colors.LogNorm(), + density=True, + cmap="jet" + ) + self.hist_line( + axs[1], + wdisc_bins, + mswbg, + y_err = None, #mswbg_stds, + label = "Shitbull", + color = self.colors[0] + ) + #cb = plt.colorbar(img) + #cb.set_label('norm.') + axs[0].set_ylabel(r"$\sigma(p_\text{gen}) / \mu(p_\text{gen})$") + axs[1].set_ylabel(r"median $\sigma / \mu$") + axs[1].set_xlabel(r"$w_\text{disc}$") + fig.align_ylabels() + self.corner_text(axs[1], self.title, "right", "top", h_offset=0.15) + plt.savefig(pdf, format="pdf") plt.close() fig, ax = plt.subplots(figsize=(4, 3.5)) + fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.11,0.09,1.00,1.00)) pull = mean_w_bgen * (w_disc - 1) / std_w_bgen - low_lim = np.quantile(pull[np.isfinite(pull)], 0.0001) - high_lim = np.quantile(pull[np.isfinite(pull)], 0.9999) - #lim = max(high_lim, -low_lim) - bins = np.linspace(low_lim, high_lim, 50) + low_lim = np.quantile(pull[np.isfinite(pull)], 0.005) + high_lim = np.quantile(pull[np.isfinite(pull)], 0.995) + lim = max(high_lim, -low_lim) + bins = np.linspace(-lim, lim, 50) y, _ = np.histogram(pull, bins, density=True) self.hist_line( ax, bins, y, y_err = None, - label = "Gen", + label = "Pull", color = self.colors[0] ) x_fit, bins_fit, norm_param = self.fit_norm(bins, y, p0=(y.mean(), y.var())) - ax.plot(bins_fit, x_fit, label=r"$\mu={:.3f}$ $\sigma={:.3f}$".format(norm_param[0], norm_param[1])) - ax.set_xlabel(r"$(\mu(p_\text{gen}) (w_\text{disc} - 1) / \sigma(p_\text{gen}))$") + ax.plot( + bins_fit, + x_fit, + label="Fit", + color=self.colors[1] + ) + ax.set_xlabel(r"$\mu(p_\text{gen}) (w_\text{disc} - 1) / \sigma(p_\text{gen})$") ax.set_ylabel("normalized") #ax.set_yscale("log") ax.set_xlim(bins[0], bins[-1]) - ax.legend(loc='upper left') + ax.legend(loc="upper right", frameon=False, title = self.title) + self.corner_text( + ax, + f"$\\mu={norm_param[0]:.3f}$\n$\\sigma={norm_param[1]:.3f}$", + "left", + "top", + ) #ax.set_ylim(top=2.0e1, bottom=1.0e-4) - plt.savefig(pdf, format="pdf", bbox_inches="tight") + plt.savefig(pdf, format="pdf") plt.close() fig, ax = plt.subplots(figsize=(4, 3.5)) + fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.11,0.09,1.00,1.00)) log_pull = np.log(w_disc) / std_log_w_bgen - bins = np.linspace( - np.quantile(log_pull[np.isfinite(log_pull)], 0.0001), - np.quantile(log_pull[np.isfinite(log_pull)], 0.9999), - 50 - ) + low_lim = np.quantile(log_pull[np.isfinite(log_pull)], 0.005) + high_lim = np.quantile(log_pull[np.isfinite(log_pull)], 0.995) + lim = max(high_lim, -low_lim) + bins = np.linspace(-lim, lim, 50) y, _ = np.histogram(log_pull, bins, density=True) self.hist_line( ax, @@ -432,20 +493,46 @@ def plot_bgen_weights(self, file: str): color = self.colors[0] ) x_fit, bins_fit, norm_param = self.fit_norm(bins, y, p0=(y.mean(), y.var())) - - ax.plot(bins_fit, x_fit, label=r"$\mu={:.3f}$ ""\n"" $\sigma={:.3f}$".format(norm_param[0], norm_param[1]), color='red') + ax.plot( + bins_fit, + x_fit, + label="Fit", + color=self.colors[1] + ) ax.set_xlabel(r"$\log w_\text{disc} / \sigma(\log p_\text{gen})$") ax.set_ylabel("normalized") #ax.set_yscale("log") ax.set_xlim(bins[0], bins[-1]) #ax.set_ylim(bottom=1.0e-2, top=y.max()+5) - ax.legend(loc='upper left') - plt.savefig(pdf, format="pdf", bbox_inches="tight") + ax.legend(loc="upper right", frameon=False) + self.corner_text( + ax, + f"$\\mu={norm_param[0]:.3f}$\n$\\sigma={norm_param[1]:.3f}$", + "left", + "top", + ) + plt.savefig(pdf, format="pdf") + plt.close() + + fig, ax = plt.subplots(figsize=(4, 3.5)) + fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.11,0.09,1.00,1.00)) + self.hist_line( + ax, + wdisc_bins, + mswbg, + y_err = None, + label = "", + color = self.colors[1] + ) + ax.set_xlabel(r"$w_\text{disc}$") + ax.set_ylabel(r"median $\sigma(p_\text{gen}) / \mu(p_\text{gen})$") + self.corner_text(ax, self.title, "right", "top") + plt.savefig(pdf, format="pdf") plt.close() def fit_norm(self, bins, y, p0, points=1000): to_fit = norm.pdf - param, cov = curve_fit(to_fit, bins[:-1], y, p0=p0) + param, cov = curve_fit(to_fit, (bins[:-1]+bins[1:])/2, y, p0=p0) bins_fit = np.linspace(bins[0], bins[-1], points) pred = norm.pdf(bins_fit, loc=param[0], scale=param[1]) return pred, bins_fit, param @@ -499,6 +586,25 @@ def plot_single_observable(self, pdf: PdfPages, observable: Observable): true_hist, _ = np.histogram(observable.true_data, bins=bins, density=True) fake_hist, _ = np.histogram(observable.fake_data, bins=bins, density=True) + if self.log_gen_weights is None: + fake_std = None + else: + gen_weights = np.exp(self.log_gen_weights) + mean_w_bgen = np.mean(gen_weights, axis=1, keepdims=True) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + norm_w_bgen = gen_weights / mean_w_bgen + norm_w_bgen[~np.isfinite(norm_w_bgen)] = 1. + hists = np.stack([ + np.histogram(observable.fake_data, bins=bins, weights=norm_w_bgen[:,i])[0] + for i in range(norm_w_bgen.shape[1]) + ], axis=1) + fake_median = np.median(hists, axis=1) + fake_std = np.stack(( + np.quantile(hists, 0.159, axis=1) / fake_median * fake_hist, + np.quantile(hists, 0.841, axis=1) / fake_median * fake_hist + ), axis=0) + lines = [ Line( y = true_hist, @@ -507,6 +613,7 @@ def plot_single_observable(self, pdf: PdfPages, observable: Observable): ), Line( y = fake_hist, + y_err = fake_std, y_ref = true_hist, label = "Gen", color = self.colors[1], @@ -570,6 +677,10 @@ def plot_single_clustering( weights_fake = np.median(self.weights_fake, axis=1) else: weights_fake = self.weights_fake + #mean_w_bgen = np.mean(np.exp(self.log_gen_weights), axis=1) + #std_w_bgen = np.std(np.exp(self.log_gen_weights), axis=1) + #std_log_w_bgen = np.std(self.log_gen_weights, axis=1) + #weights_fake = std_log_w_bgen masks, labels = [], [] for threshold in lower_thresholds: @@ -592,7 +703,7 @@ def plot_single_clustering( y = true_hist, label = "Truth", color = "k", - fill = True + #fill = True ), *[ Line(y=hist, label=label, color=color) @@ -607,11 +718,13 @@ def corner_text( ax: mpl.axes.Axes, text: str, horizontal_pos: str, - vertical_pos: str + vertical_pos: str, + h_offset: float = 0.05, + v_offset: float = 0.05 ): ax.text( - x = 0.95 if horizontal_pos == "right" else 0.05, - y = 0.95 if vertical_pos == "top" else 0.05, + x = 1 - h_offset if horizontal_pos == "right" else h_offset, + y = 1 - v_offset if vertical_pos == "top" else v_offset, s = text, horizontalalignment = horizontal_pos, verticalalignment = vertical_pos, @@ -656,6 +769,7 @@ def hist_plot( figsize = (6, 4.5), gridspec_kw = {"height_ratios": (4, 1, 1)[:n_panels], "hspace": 0.00} ) + fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.08,0.08,0.99,0.98)) if n_panels == 1: axs = [axs] @@ -705,7 +819,7 @@ def hist_plot( if show_ratios: axs[1].set_ylabel(r"$\frac{\mathrm{Model}}{\mathrm{Truth}}$") axs[1].set_yticks([0.8,1,1.2]) - axs[1].set_ylim([0.75,1.25]) + axs[1].set_ylim([0.71,1.29]) axs[1].axhline(y=1, c="black", ls="--", lw=0.7) axs[1].axhline(y=1.2, c="black", ls="dotted", lw=0.5) axs[1].axhline(y=0.8, c="black", ls="dotted", lw=0.5) @@ -715,14 +829,16 @@ def hist_plot( axs[ax_idx].set_ylabel(r"$w$") axs[ax_idx].axhline(y=1, c="black", ls="--", lw=0.7) ymin, ymax = axs[ax_idx].get_ylim() - axs[ax_idx].set_ylim(ymin, min(ymax, 2)) + ymax = min(ymax, 2.3) + yrange = ymax - ymin + axs[ax_idx].set_ylim(ymin - 0.05*yrange, ymax + 0.05*yrange) unit = "" if observable.unit is None else f" [{observable.unit}]" axs[-1].set_xlabel(f"${{{observable.tex_label}}}${unit}") axs[-1].set_xscale(observable.xscale) axs[-1].set_xlim(bins[0], bins[-1]) - plt.savefig(pdf, format="pdf", bbox_inches="tight") + plt.savefig(pdf, format="pdf") plt.close()