diff --git a/src/calo_plotting_helper.py b/src/calo_plotting_helper.py new file mode 100644 index 0000000..9be8851 --- /dev/null +++ b/src/calo_plotting_helper.py @@ -0,0 +1,162 @@ +# pylint: disable=invalid-name +""" Helper functions that compute the high-level observables of calorimeter showers. + + Used for + + "CaloFlow: Fast and Accurate Generation of Calorimeter Showers with Normalizing Flows" + by Claudius Krause and David Shih + arxiv:2106.05285 + + "CaloFlow II: Even Faster and Still Accurate Generation of Calorimeter Showers with + Normalizing Flows" + by Claudius Krause and David Shih + arxiv:2110.11377 + + Functions inspired by + "CaloGAN: Simulating 3D High Energy Particle Showers in Multi-LayerElectromagnetic + Calorimeters with Generative Adversarial Networks" + by Michela Paganini, Luke de Oliveira, and Benjamin Nachman + arxiv:1712.10321 + https://github.com/hep-lbdl/CaloGAN +""" + +import numpy as np + +# number of voxel per layer in the given dimension, see fig 2 of 1712.10321 +PHI_CELLS = {0: 3, 1: 12, 2: 12} +ETA_CELLS = {0: 96, 1: 12, 2: 6} + +def to_np_thres(tensor, threshold): + """ moves tensor to CPU, then to numpy, then applies threshold """ + ret = tensor.clamp_(0., 1e5).to('cpu').numpy() + ret = np.where(ret < threshold, np.zeros_like(ret), ret) + return ret + +def layer_split(data): + """ splits data into the 3 layers """ + return np.split(data, [288, 432], axis=-1) + +def layer_std(layer1, layer2, total): + """ helper function for standard deviation of layer depth""" + term1 = (layer1 + 4.*layer2) / total + term2 = ((layer1 + 2.*layer2)/total)**2 + return np.sqrt(term1-term2) + +def energy_sum(data, normalization=1e3): + """ Returns the energy sum of all energy depositions. + If len(data.shape) is 3, the sum is taken over the last 2 axis (summing voxels per layer). + If it is 2, the sum is taken over the last axis (summing voxels of entire event). + normalization of 1e3 accounts for MeV to GeV unit conversion. + """ + if len(data.shape) == 3: + ret = data.sum(axis=(1, 2)) + else: + ret = data.sum(axis=-1) + return ret / normalization + +def energy_ratio(data, layer): + """ Returns the sum of energy of the given layer divided by the total energy """ + ret = energy_sum(layer_split(data)[layer]) / energy_sum(data) + return ret + +def layer_sparsity(data, threshold, layer=None): + """ Returns the sparsity (=fraction of voxel above threshold) of the given layer + Supports either sparsity in given data array (of size 3), assuming it's a layer + or needs a layer_nr if data has size 2 (assuming it's a full shower). + """ + if len(data.shape) == 3: + sparsity = (data > threshold).mean((1, 2)) + else: + if layer is not None: + data = layer_split(data)[layer] + sparsity = (data > threshold).mean(-1) + return sparsity + +def n_brightest_voxel(data, num_brightest, ratio=True): + """ Returns the ratio of the "num_brightest" voxel to the energy deposited in the layer. """ + if len(data.shape) == 3: + data = data.reshape(len(data), -1) + top_n = np.sort(data, axis=1)[:, -max(num_brightest):] + energies = data.sum(axis=-1).reshape(-1, 1) + ret = top_n[:, [-num for num in num_brightest]] + if ratio: + # why did I filter energies>0? + #ret = (ret/ (energies + 1e-16))[np.tile(energies > 0, len(num_brightest))].reshape( + # -1, len(num_brightest)) + ret = (ret/ (energies + 1e-16)).reshape(-1, len(num_brightest)) + return ret + +def ratio_two_brightest(data): + """ Returns the ratio of the difference of the 2 brightest voxels to their sum. """ + top = np.sort(data, axis=1)[:, -2:] + #ret = ((top[:, 1] - top[:, 0]) / (top[:, 0] + top[:, 1] + 1e-16))[top[:, 1] > 0] + ret = ((top[:, 1] - top[:, 0]) / (top[:, 0] + top[:, 1] + 1e-16)) + return ret + +def maxdepth_nr(data): + """ Returns the layer that has the last energy deposition """ + _, layer_1, layer_2 = layer_split(data) + + maxdepth = 2* (energy_sum(layer_2) != 0) + maxdepth[maxdepth == 0] = 1* (energy_sum(layer_1[maxdepth == 0]) != 0) + return maxdepth + +def depth_weighted_energy(data): + """ Returns the depth-weighted total energy deposit in all 3 layers for a given batch. """ + _, layer_1, layer_2 = layer_split(data) + ret = energy_sum(layer_1, normalization=1.) + 2.* energy_sum(layer_2, normalization=1.) + return ret + +def depth_weighted_energy_normed_std(data): + """ Returns the standard deviation of the depth-weighted total energy deposit in all 3 layers + normalized by the total deposited energy for a given batch + """ + _, layer_1, layer_2 = layer_split(data) + layer1 = energy_sum(layer_1, normalization=1.) + layer2 = energy_sum(layer_2, normalization=1.) + energies = energy_sum(data, normalization=1.) + return layer_std(layer1, layer2, energies) + +def center_of_energy(data, layer, direction): + """ Returns the center of energy in the direction 'direction' for layer 'layer'. """ + if direction == 'eta': + bins = np.linspace(-240, 240, ETA_CELLS[layer] + 1) + elif direction == 'phi': + bins = np.linspace(-240, 240, PHI_CELLS[layer] + 1) + else: + raise ValueError("direction={} not in ['eta', 'phi']".format(direction)) + bin_centers = (bins[1:] + bins[:-1]) / 2. + + if direction == 'phi': + data = data.reshape(len(data), PHI_CELLS[layer], -1) + ret = (data * bin_centers.reshape(-1, 1)).sum(axis=(1, 2)) + else: + data = data.reshape(len(data), -1, ETA_CELLS[layer]) + ret = (data * bin_centers.reshape(1, -1)).sum(axis=(1, 2)) + energies = energy_sum(data, normalization=1.) + ret = ret / (energies + 1e-8) + return ret + +def center_of_energy_std(data, layer, direction): + """ Returns the standard deviation of center of energy in the direction + 'direction' for layer 'layer'. + """ + if direction == 'eta': + bins = np.linspace(-240, 240, ETA_CELLS[layer] + 1) + elif direction == 'phi': + bins = np.linspace(-240, 240, PHI_CELLS[layer] + 1) + else: + raise ValueError("direction={} not in ['eta', 'phi']".format(direction)) + bin_centers = (bins[1:] + bins[:-1]) / 2. + + if direction == 'phi': + data = data.reshape(len(data), PHI_CELLS[layer], -1) + ret = (data * bin_centers.reshape(-1, 1)).sum(axis=(1, 2)) + ret2 = (data * bin_centers.reshape(-1, 1)**2).sum(axis=(1, 2)) + else: + data = data.reshape(len(data), -1, ETA_CELLS[layer]) + ret = (data * bin_centers.reshape(1, -1)).sum(axis=(1, 2)) + ret2 = (data * bin_centers.reshape(1, -1)**2).sum(axis=(1, 2)) + energies = energy_sum(data, normalization=1.) + ret = np.sqrt((ret2 / (energies+1e-8)) - (ret / (energies+1e-8)) ** 2) + return ret diff --git a/src/loaders/calo_inn.py b/src/loaders/calo_inn.py index 0802a0e..2b8895f 100644 --- a/src/loaders/calo_inn.py +++ b/src/loaders/calo_inn.py @@ -1,10 +1,12 @@ import pandas as pd import numpy as np +import torch import h5py from types import SimpleNamespace from ..dataset import DiscriminatorData from ..observable import Observable +from ..calo_plotting_helper import * def load(params: dict) -> list[DiscriminatorData]: """ @@ -21,11 +23,12 @@ def load(params: dict) -> list[DiscriminatorData]: "add_log_energy": params.get("add_log_energy", False), "add_log_layer_ens": params.get("add_log_layer_ens", False), "add_logit_step": params.get("add_logit_step", False), + "add_cut": params.get("add_cut", 0.0), } datasets_list = [ {'level': 'low', 'normalize': True, 'label': 'Norm.', 'suffix': 'norm'}, {'level': 'low', 'normalize': False, 'label': 'Unnorm.', 'suffix': 'unnorm'}, - #{'level': 'high', 'normalize': False, 'label': 'High', 'suffix': 'high'}, + {'level': 'high', 'normalize': False, 'label': 'High', 'suffix': 'high'}, ] for dataset in datasets_list: @@ -96,7 +99,68 @@ def create_data(data_path, dataset_list, **kwargs): return data def create_data_high(data_path, dataset_list, **kwargs): - pass + cut = kwargs['add_cut'] + with h5py.File(data_path, "r") as f: + en_test = f.get('energy')[:] / 1e2 + lay_0 = f.get('layer_0')[:] / 1e5 + lay_1 = f.get('layer_1')[:] / 1e5 + lay_2 = f.get('layer_2')[:] / 1e5 + + incident_energy = torch.log10(torch.tensor(en_test)*10.) + # scale them back to MeV + layer0 = torch.tensor(lay_0) * 1e5 + layer1 = torch.tensor(lay_1) * 1e5 + layer2 = torch.tensor(lay_2) * 1e5 + layer0 = to_np_thres(layer0.view(layer0.shape[0], -1), cut) + layer1 = to_np_thres(layer1.view(layer1.shape[0], -1), cut) + layer2 = to_np_thres(layer2.view(layer2.shape[0], -1), cut) + # detour to numpy in order to use same functions as plotting script + full_shower = np.concatenate((layer0, layer1, layer2), 1) + E_0 = energy_sum(layer0) + E_1 = energy_sum(layer1) + E_2 = energy_sum(layer2) + E_tot = E_0 + E_1 + E_2 + f_0 = E_0 / E_tot + f_1 = E_1 / E_tot + f_2 = E_2 / E_tot + l_d = depth_weighted_energy(full_shower) + s_d = l_d / (E_tot * 1e3) + sigma_sd = depth_weighted_energy_normed_std(full_shower) + E_1b0, E_2b0, E_3b0, E_4b0, E_5b0 = n_brightest_voxel(layer0, [1, 2, 3, 4, 5]).T + E_1b1, E_2b1, E_3b1, E_4b1, E_5b1 = n_brightest_voxel(layer1, [1, 2, 3, 4, 5]).T + E_1b2, E_2b2, E_3b2, E_4b2, E_5b2 = n_brightest_voxel(layer2, [1, 2, 3, 4, 5]).T + ratio_0 = ratio_two_brightest(layer0) + ratio_1 = ratio_two_brightest(layer1) + ratio_2 = ratio_two_brightest(layer2) + sparsity_0 = layer_sparsity(layer0, cut) + sparsity_1 = layer_sparsity(layer1, cut) + sparsity_2 = layer_sparsity(layer2, cut) + phi_0 = center_of_energy(layer0, 0, 'phi') + phi_1 = center_of_energy(layer1, 1, 'phi') + phi_2 = center_of_energy(layer2, 2, 'phi') + eta_0 = center_of_energy(layer0, 0, 'eta') + eta_1 = center_of_energy(layer1, 1, 'eta') + eta_2 = center_of_energy(layer2, 2, 'eta') + sigma_0 = center_of_energy_std(layer0, 0, 'phi') + sigma_1 = center_of_energy_std(layer1, 1, 'phi') + sigma_2 = center_of_energy_std(layer2, 2, 'phi') + + # to be log10-processed: + ret1 = np.vstack([E_0+1e-8, E_1+1e-8, E_2+1e-8, E_tot, + f_0+1e-8, f_1+1e-8, f_2+1e-8, l_d+1e-8, + sigma_0+1e-8, sigma_1+1e-8, sigma_2+1e-8]).T + ret1 = np.log10(ret1) + # without log10 processing: + ret2 = np.vstack([s_d, sigma_sd, + 1e1*E_1b0, 1e1*E_2b0, 1e1*E_3b0, 1e1*E_4b0, 1e1*E_5b0, + 1e1*E_1b1, 1e1*E_2b1, 1e1*E_3b1, 1e1*E_4b1, 1e1*E_5b1, + 1e1*E_1b2, 1e1*E_2b2, 1e1*E_3b2, 1e1*E_4b2, 1e1*E_5b2, + ratio_0, ratio_1, ratio_2, sparsity_0, sparsity_1, sparsity_2, + phi_0/1e2, phi_1/1e2, phi_2/1e2, eta_0/1e2, eta_1/1e2, eta_2/1e2]).T + ret = torch.from_numpy(np.hstack([ret1, ret2])) + + ret = torch.cat((ret, incident_energy), 1) + return ret.numpy() def split_data( data: np.ndarray,