add calo high level

heidelberg-hepml · Mar 27, 2023 · 8445854 · 8445854
1 parent c7aace9
commit 8445854
Show file tree

Hide file tree

Showing 2 changed files with 228 additions and 2 deletions.
diff --git a/src/calo_plotting_helper.py b/src/calo_plotting_helper.py
@@ -0,0 +1,162 @@
+# pylint: disable=invalid-name
+""" Helper functions that compute the high-level observables of calorimeter showers.
+
+    Used for
+
+    "CaloFlow: Fast and Accurate Generation of Calorimeter Showers with Normalizing Flows"
+    by Claudius Krause and David Shih
+    arxiv:2106.05285
+
+    "CaloFlow II: Even Faster and Still Accurate Generation of Calorimeter Showers with
+     Normalizing Flows"
+    by Claudius Krause and David Shih
+    arxiv:2110.11377
+
+    Functions inspired by
+    "CaloGAN: Simulating 3D High Energy Particle Showers in Multi-LayerElectromagnetic
+     Calorimeters with Generative Adversarial Networks"
+    by Michela Paganini, Luke de Oliveira, and Benjamin Nachman
+    arxiv:1712.10321
+    https://github.com/hep-lbdl/CaloGAN
+"""
+
+import numpy as np
+
+# number of voxel per layer in the given dimension, see fig 2 of 1712.10321
+PHI_CELLS = {0: 3, 1: 12, 2: 12}
+ETA_CELLS = {0: 96, 1: 12, 2: 6}
+
+def to_np_thres(tensor, threshold):
+    """ moves tensor to CPU, then to numpy, then applies threshold """
+    ret = tensor.clamp_(0., 1e5).to('cpu').numpy()
+    ret = np.where(ret < threshold, np.zeros_like(ret), ret)
+    return ret
+
+def layer_split(data):
+    """ splits data into the 3 layers """
+    return np.split(data, [288, 432], axis=-1)
+
+def layer_std(layer1, layer2, total):
+    """ helper function for standard deviation of layer depth"""
+    term1 = (layer1 + 4.*layer2) / total
+    term2 = ((layer1 + 2.*layer2)/total)**2
+    return np.sqrt(term1-term2)
+
+def energy_sum(data, normalization=1e3):
+    """ Returns the energy sum of all energy depositions.
+        If len(data.shape) is 3, the sum is taken over the last 2 axis (summing voxels per layer).
+        If it is 2, the sum is taken over the last axis (summing voxels of entire event).
+        normalization of 1e3 accounts for MeV to GeV unit conversion.
+    """
+    if len(data.shape) == 3:
+        ret = data.sum(axis=(1, 2))
+    else:
+        ret = data.sum(axis=-1)
+    return ret / normalization
+
+def energy_ratio(data, layer):
+    """ Returns the sum of energy of the given layer divided by the total energy """
+    ret = energy_sum(layer_split(data)[layer]) / energy_sum(data)
+    return ret
+
+def layer_sparsity(data, threshold, layer=None):
+    """ Returns the sparsity (=fraction of voxel above threshold) of the given layer
+        Supports either sparsity in given data array (of size 3), assuming it's a layer
+        or needs a layer_nr if data has size 2 (assuming it's a full shower).
+    """
+    if len(data.shape) == 3:
+        sparsity = (data > threshold).mean((1, 2))
+    else:
+        if layer is not None:
+            data = layer_split(data)[layer]
+        sparsity = (data > threshold).mean(-1)
+    return sparsity
+
+def n_brightest_voxel(data, num_brightest, ratio=True):
+    """ Returns the ratio of the "num_brightest" voxel to the energy deposited in the layer. """
+    if len(data.shape) == 3:
+        data = data.reshape(len(data), -1)
+    top_n = np.sort(data, axis=1)[:, -max(num_brightest):]
+    energies = data.sum(axis=-1).reshape(-1, 1)
+    ret = top_n[:, [-num for num in num_brightest]]
+    if ratio:
+        # why did I filter energies>0?
+        #ret = (ret/ (energies + 1e-16))[np.tile(energies > 0, len(num_brightest))].reshape(
+        #    -1, len(num_brightest))
+        ret = (ret/ (energies + 1e-16)).reshape(-1, len(num_brightest))
+    return ret
+
+def ratio_two_brightest(data):
+    """ Returns the ratio of the difference of the 2 brightest voxels to their sum. """
+    top = np.sort(data, axis=1)[:, -2:]
+    #ret = ((top[:, 1] - top[:, 0]) / (top[:, 0] + top[:, 1] + 1e-16))[top[:, 1] > 0]
+    ret = ((top[:, 1] - top[:, 0]) / (top[:, 0] + top[:, 1] + 1e-16))
+    return ret
+
+def maxdepth_nr(data):
+    """ Returns the layer that has the last energy deposition """
+    _, layer_1, layer_2 = layer_split(data)
+
+    maxdepth = 2* (energy_sum(layer_2) != 0)
+    maxdepth[maxdepth == 0] = 1* (energy_sum(layer_1[maxdepth == 0]) != 0)
+    return maxdepth
+
+def depth_weighted_energy(data):
+    """ Returns the depth-weighted total energy deposit in all 3 layers for a given batch. """
+    _, layer_1, layer_2 = layer_split(data)
+    ret = energy_sum(layer_1, normalization=1.) + 2.* energy_sum(layer_2, normalization=1.)
+    return ret
+
+def depth_weighted_energy_normed_std(data):
+    """ Returns the standard deviation of the depth-weighted total energy deposit in all 3 layers
+        normalized by the total deposited energy for a given batch
+    """
+    _, layer_1, layer_2 = layer_split(data)
+    layer1 = energy_sum(layer_1, normalization=1.)
+    layer2 = energy_sum(layer_2, normalization=1.)
+    energies = energy_sum(data, normalization=1.)
+    return layer_std(layer1, layer2, energies)
+
+def center_of_energy(data, layer, direction):
+    """ Returns the center of energy in the direction 'direction' for layer 'layer'. """
+    if direction == 'eta':
+        bins = np.linspace(-240, 240, ETA_CELLS[layer] + 1)
+    elif direction == 'phi':
+        bins = np.linspace(-240, 240, PHI_CELLS[layer] + 1)
+    else:
+        raise ValueError("direction={} not in ['eta', 'phi']".format(direction))
+    bin_centers = (bins[1:] + bins[:-1]) / 2.
+
+    if direction == 'phi':
+        data = data.reshape(len(data), PHI_CELLS[layer], -1)
+        ret = (data * bin_centers.reshape(-1, 1)).sum(axis=(1, 2))
+    else:
+        data = data.reshape(len(data), -1, ETA_CELLS[layer])
+        ret = (data * bin_centers.reshape(1, -1)).sum(axis=(1, 2))
+    energies = energy_sum(data, normalization=1.)
+    ret = ret / (energies + 1e-8)
+    return ret
+
+def center_of_energy_std(data, layer, direction):
+    """ Returns the standard deviation of center of energy in the direction
+        'direction' for layer 'layer'.
+    """
+    if direction == 'eta':
+        bins = np.linspace(-240, 240, ETA_CELLS[layer] + 1)
+    elif direction == 'phi':
+        bins = np.linspace(-240, 240, PHI_CELLS[layer] + 1)
+    else:
+        raise ValueError("direction={} not in ['eta', 'phi']".format(direction))
+    bin_centers = (bins[1:] + bins[:-1]) / 2.
+
+    if direction == 'phi':
+        data = data.reshape(len(data), PHI_CELLS[layer], -1)
+        ret = (data * bin_centers.reshape(-1, 1)).sum(axis=(1, 2))
+        ret2 = (data * bin_centers.reshape(-1, 1)**2).sum(axis=(1, 2))
+    else:
+        data = data.reshape(len(data), -1, ETA_CELLS[layer])
+        ret = (data * bin_centers.reshape(1, -1)).sum(axis=(1, 2))
+        ret2 = (data * bin_centers.reshape(1, -1)**2).sum(axis=(1, 2))
+    energies = energy_sum(data, normalization=1.)
+    ret = np.sqrt((ret2 / (energies+1e-8)) - (ret / (energies+1e-8)) ** 2)
+    return ret
diff --git a/src/loaders/calo_inn.py b/src/loaders/calo_inn.py
@@ -1,10 +1,12 @@
 import pandas as pd
 import numpy as np
+import torch
 import h5py
 from types import SimpleNamespace
 
 from ..dataset import DiscriminatorData
 from ..observable import Observable
+from ..calo_plotting_helper import *
 
 def load(params: dict) -> list[DiscriminatorData]:
     """
@@ -21,11 +23,12 @@ def load(params: dict) -> list[DiscriminatorData]:
             "add_log_energy": params.get("add_log_energy", False),
             "add_log_layer_ens": params.get("add_log_layer_ens", False),
             "add_logit_step": params.get("add_logit_step", False),
+			"add_cut": params.get("add_cut", 0.0),
             }
     datasets_list = [
             {'level': 'low', 'normalize': True, 'label': 'Norm.', 'suffix': 'norm'},
             {'level': 'low', 'normalize': False, 'label': 'Unnorm.', 'suffix': 'unnorm'},
-            #{'level': 'high', 'normalize': False, 'label': 'High', 'suffix': 'high'},
+            {'level': 'high', 'normalize': False, 'label': 'High', 'suffix': 'high'},
             ] 
 
     for dataset in datasets_list:
@@ -96,7 +99,68 @@ def create_data(data_path, dataset_list, **kwargs):
     return data
 
 def create_data_high(data_path, dataset_list, **kwargs):
-    pass
+    cut = kwargs['add_cut']
+    with h5py.File(data_path, "r") as f:
+        en_test = f.get('energy')[:] / 1e2
+        lay_0 = f.get('layer_0')[:] / 1e5
+        lay_1 = f.get('layer_1')[:] / 1e5
+        lay_2 = f.get('layer_2')[:] / 1e5
+
+    incident_energy = torch.log10(torch.tensor(en_test)*10.)
+    # scale them back to MeV
+    layer0 = torch.tensor(lay_0) * 1e5
+    layer1 = torch.tensor(lay_1) * 1e5
+    layer2 = torch.tensor(lay_2) * 1e5
+    layer0 = to_np_thres(layer0.view(layer0.shape[0], -1), cut)
+    layer1 = to_np_thres(layer1.view(layer1.shape[0], -1), cut)
+    layer2 = to_np_thres(layer2.view(layer2.shape[0], -1), cut)
+    # detour to numpy in order to use same functions as plotting script
+    full_shower = np.concatenate((layer0, layer1, layer2), 1)
+    E_0 = energy_sum(layer0)
+    E_1 = energy_sum(layer1)
+    E_2 = energy_sum(layer2)
+    E_tot = E_0 + E_1 + E_2
+    f_0 = E_0 / E_tot
+    f_1 = E_1 / E_tot
+    f_2 = E_2 / E_tot
+    l_d = depth_weighted_energy(full_shower)
+    s_d = l_d / (E_tot * 1e3)
+    sigma_sd = depth_weighted_energy_normed_std(full_shower)
+    E_1b0, E_2b0, E_3b0, E_4b0, E_5b0 = n_brightest_voxel(layer0, [1, 2, 3, 4, 5]).T
+    E_1b1, E_2b1, E_3b1, E_4b1, E_5b1 = n_brightest_voxel(layer1, [1, 2, 3, 4, 5]).T
+    E_1b2, E_2b2, E_3b2, E_4b2, E_5b2 = n_brightest_voxel(layer2, [1, 2, 3, 4, 5]).T
+    ratio_0 = ratio_two_brightest(layer0)
+    ratio_1 = ratio_two_brightest(layer1)
+    ratio_2 = ratio_two_brightest(layer2)
+    sparsity_0 = layer_sparsity(layer0, cut)
+    sparsity_1 = layer_sparsity(layer1, cut)
+    sparsity_2 = layer_sparsity(layer2, cut)
+    phi_0 = center_of_energy(layer0, 0, 'phi')
+    phi_1 = center_of_energy(layer1, 1, 'phi')
+    phi_2 = center_of_energy(layer2, 2, 'phi')
+    eta_0 = center_of_energy(layer0, 0, 'eta')
+    eta_1 = center_of_energy(layer1, 1, 'eta')
+    eta_2 = center_of_energy(layer2, 2, 'eta')
+    sigma_0 = center_of_energy_std(layer0, 0, 'phi')
+    sigma_1 = center_of_energy_std(layer1, 1, 'phi')
+    sigma_2 = center_of_energy_std(layer2, 2, 'phi')
+
+    # to be log10-processed:
+    ret1 = np.vstack([E_0+1e-8, E_1+1e-8, E_2+1e-8, E_tot,
+                      f_0+1e-8, f_1+1e-8, f_2+1e-8, l_d+1e-8,
+                      sigma_0+1e-8, sigma_1+1e-8, sigma_2+1e-8]).T
+    ret1 = np.log10(ret1)
+    # without log10 processing:
+    ret2 = np.vstack([s_d, sigma_sd,
+                      1e1*E_1b0, 1e1*E_2b0, 1e1*E_3b0, 1e1*E_4b0, 1e1*E_5b0,
+                      1e1*E_1b1, 1e1*E_2b1, 1e1*E_3b1, 1e1*E_4b1, 1e1*E_5b1,
+                      1e1*E_1b2, 1e1*E_2b2, 1e1*E_3b2, 1e1*E_4b2, 1e1*E_5b2,
+                      ratio_0, ratio_1, ratio_2, sparsity_0, sparsity_1, sparsity_2,
+                      phi_0/1e2, phi_1/1e2, phi_2/1e2, eta_0/1e2, eta_1/1e2, eta_2/1e2]).T
+    ret = torch.from_numpy(np.hstack([ret1, ret2]))
+
+    ret = torch.cat((ret, incident_energy), 1)
+    return ret.numpy()
 
 def split_data(
     data: np.ndarray,