From 9619042fc9e1dd07b1bcdec47d1e30cc76acd89e Mon Sep 17 00:00:00 2001
From: Luigi Favaro <favaro@thphys.uni-heidelberg.de>
Date: Tue, 9 May 2023 18:22:02 +0200
Subject: [PATCH] add calibration curve in plots

---
 src/plots.py | 71 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 56 insertions(+), 15 deletions(-)

diff --git a/src/plots.py b/src/plots.py
index 84f9ba3..dddd465 100644
--- a/src/plots.py
+++ b/src/plots.py
@@ -7,13 +7,14 @@
 from matplotlib.backends.backend_pdf import PdfPages
 from scipy.stats import lognorm, norm, binned_statistic
 from scipy.optimize import curve_fit
+from sklearn.calibration import calibration_curve
 
 from .observable import Observable
 
 Line = namedtuple(
     "Line",
-    ["y", "y_err", "y_ref", "y_orig", "label", "color", "fill"],
-    defaults = [None, None, None, None, None, False]
+    ["y", "y_err", "y_ref", "y_orig", "label", "color", "fill", "linestyle"],
+    defaults = [None, None, None, None, None, False, None]
 )
 
 class Plots:
@@ -190,6 +191,28 @@ def plot_roc(self, file: str):
         plt.savefig(file)
         plt.close()
 
+    def plot_calibration_curve(self, file: str):
+        """
+        plot calibration curve
+        """
+        scores = np.concatenate((self.weights_fake, self.weights_true))
+        labels = np.concatenate((
+                    np.zeros_like(self.weights_fake),
+                    np.ones_like(self.weights_true))
+                    )
+        cls_output = scores/(1+scores)
+        prob_true, prob_pred = calibration_curve(labels, cls_output, n_bins=10)
+        
+        fig, ax = plt.subplots(figsize=(4, 3.5))
+        fig.tight_layout(pad=0.0, w_pad=0.0, h_pad=0.0, rect=(0.11,0.09,1.00,1.00))
+
+        ax.plot(prob_true, prob_pred, label='cal. curve')
+        ax.plot(np.linspace(0,1,10), np.linspace(0,1,10))
+        self.corner_text(ax, self.title, "right", "top", h_offset=0.15)
+
+        ax.legend()
+        plt.savefig(file)
+        plt.close()
 
     def plot_weight_hist(self, file: str):
         """
@@ -356,13 +379,16 @@ def plot_bgen_weights(self, file: str):
         with PdfPages(file) as pdf:
             #mean_w_bgen = np.mean(np.exp(self.log_gen_weights), axis=1)
             #std_w_bgen = np.std(np.exp(self.log_gen_weights), axis=1)
-            w_bgen = np.exp(self.log_gen_weights)
-            mean_w_bgen = np.median(w_bgen, axis=1)
+            #w_bgen = np.exp(self.log_gen_weights)
+            w_bgen = self.log_gen_weights
+            mean_w_bgen = np.nanmedian(w_bgen, axis=1)
+
             std_w_bgen = (
                 np.quantile(w_bgen, 0.841, axis=1) -
                 np.quantile(w_bgen, 0.159, axis=1)
             ) / 2
-            std_log_w_bgen = np.std(self.log_gen_weights, axis=1)
+            std_log_w_bgen = np.nanstd(self.log_gen_weights, axis=1)
+
             if self.bayesian:
                 mean_w_bgen = np.repeat(
                     mean_w_bgen[:,None], self.weights_fake.shape[1], axis=1
@@ -377,9 +403,10 @@ def plot_bgen_weights(self, file: str):
             else:
                 w_disc = self.weights_fake
             ratio = std_w_bgen / mean_w_bgen
-            wbinn_bins = np.linspace(0,4,30)
+            wbinn_bins = np.linspace(0,0.01,30)
             wdisc_bins = np.linspace(0,3,30)
-            mswbg, _, _ = binned_statistic(w_disc, ratio, "median", wdisc_bins)
+            print(w_disc.shape, ratio.shape, wdisc_bins.shape)
+            mswbg, _, _ = binned_statistic(w_disc, ratio, lambda x: np.nanmedian(x), wdisc_bins)
             mswbg_stds = np.stack([
                 binned_statistic(w_disc, ratio, lambda x: np.nanquantile(x, 0.159), wdisc_bins)[0],
                 binned_statistic(w_disc, ratio, lambda x: np.nanquantile(x, 0.841), wdisc_bins)[0]
@@ -398,7 +425,7 @@ def plot_bgen_weights(self, file: str):
             )
             #cb = plt.colorbar(img)
             #cb.set_label('norm.')
-            ax.set_xlabel(r"$\sigma(p_\text{model}) / \mu(p_\text{model})$")
+            ax.set_xlabel(r"$\sigma(\log p_\text{model}) / \mu(\log p_\text{model})$")
             ax.set_ylabel(r"$w$")
 
             plt.savefig(pdf, format="pdf")
@@ -430,9 +457,11 @@ def plot_bgen_weights(self, file: str):
             )
             #cb = plt.colorbar(img)
             #cb.set_label('norm.')
-            axs[0].set_ylabel(r"$\sigma(p_\text{model}) / \mu(p_\text{model})$")
+            axs[0].set_ylabel(r"$\sigma(\log p_\text{model}) / \mu(\log p_\text{model})$")
             axs[1].set_ylabel(r"median $\sigma / \mu$")
             axs[1].set_xlabel(r"$w$")
+
+            fig.tight_layout()
             fig.align_ylabels()
             self.corner_text(axs[1], self.title, "right", "top", h_offset=0.15)
             plt.savefig(pdf, format="pdf")
@@ -608,7 +637,7 @@ def plot_single_observable(self, pdf: PdfPages, observable: Observable):
         lines = [
             Line(
                 y = true_hist,
-                label = "Truth",
+                label = "Geant",
                 color = self.colors[0],
             ),
             Line(
@@ -690,6 +719,7 @@ def plot_single_clustering(
             masks.append(weights_fake > threshold)
             labels.append(f"$w > {threshold}$")
         true_hist, _ = np.histogram(observable.true_data, bins=bins, density=True)
+        fake_hist, _ = np.histogram(observable.fake_data, bins=bins, density=True)
         hists = [
             np.histogram(
                 observable.fake_data[mask],
@@ -699,9 +729,15 @@ def plot_single_clustering(
             for mask in masks
         ]
         lines = [
+            Line(
+                y = fake_hist,
+                label = "Gen.",
+                color = "k",
+                linestyle = "--"
+            ),
             Line(
                 y = true_hist,
-                label = "Truth",
+                label = "Geant",
                 color = "k",
                 #fill = True
             ),
@@ -787,7 +823,8 @@ def hist_plot(
                     line.y_err * scale if line.y_err is not None else None,
                     label=line.label,
                     color=line.color,
-                    fill=line.fill
+                    fill=line.fill,
+                    linestyle=line.linestyle,
                 )
 
                 ratio_panels = []
@@ -817,7 +854,7 @@ def hist_plot(
             #self.corner_text(axs[0], self.title, "right", "top")
 
             if show_ratios:
-                axs[1].set_ylabel(r"$\frac{\mathrm{Model}}{\mathrm{Truth}}$")
+                axs[1].set_ylabel(r"$\frac{\mathrm{Model}}{\mathrm{Geant}}$")
                 axs[1].set_yticks([0.8,1,1.2])
                 axs[1].set_ylim([0.71,1.29])
                 axs[1].axhline(y=1, c="black", ls="--", lw=0.7)
@@ -850,7 +887,8 @@ def hist_line(
         y_err: np.ndarray,
         label: str,
         color: str,
-        fill: bool = False
+        fill: bool = False,
+        linestyle: str = None,
     ):
         """
         Plot a stepped line for a histogram, optionally with error bars.
@@ -863,6 +901,7 @@ def hist_line(
             label: Label of the line
             color: Color of the line
             fill: Filled histogram
+            linestyle
         """
 
         dup_last = lambda a: np.append(a, a[-1])
@@ -874,7 +913,8 @@ def hist_line(
                 label = label,
                 facecolor = color,
                 step = "post",
-                alpha = 0.2
+                alpha = 0.2,
+                linestyle = linestyle
             )
         else:
             ax.step(
@@ -884,6 +924,7 @@ def hist_line(
                 color = color,
                 linewidth = 1.0,
                 where = "post",
+                linestyle = linestyle
             )
         if y_err is not None:
             if len(y_err.shape) == 2: