From 7de17d49a49ee95307f94feed0206890df81fa97 Mon Sep 17 00:00:00 2001 From: Luigi Favaro Date: Mon, 27 Mar 2023 13:38:30 +0200 Subject: [PATCH] caloinn loader v1 --- params/calo_inn.yaml | 34 ++++++++++++++++++ src/loaders/calo_inn.py | 80 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 params/calo_inn.yaml diff --git a/params/calo_inn.yaml b/params/calo_inn.yaml new file mode 100644 index 0000000..cc80e83 --- /dev/null +++ b/params/calo_inn.yaml @@ -0,0 +1,34 @@ +run_name: calo_inn + +#Dataset +loader_module: calo_inn +loader_params: + geant_file: path_to_file.hdf5 + generated_file: path_to_file.hdf5 + add_log_energy: True + add_log_layer_ens: True + add_logit_step: False + +# Model +activation: leaky_relu +negative_slope: 0.1 +dropout: 0.1 +layers: 5 +hidden_size: 256 + +# Training +bayesian: False +lr: 1.e-3 +betas: [0.9, 0.99] +weight_decay: 0.0 +epochs: 50 +batch_size: 1024 +lr_scheduler: reduce_on_plateau +lr_decay_factor: 0.1 +lr_patience: 5 +checkpoint_interval: 5 + +# Evaluation +#bayesian_samples: 2 +#lower_cluster_thresholds: [0.01, 0.1] +#upper_cluster_thresholds: [0.9, 0.99] diff --git a/src/loaders/calo_inn.py b/src/loaders/calo_inn.py index e69de29..cbca0e4 100644 --- a/src/loaders/calo_inn.py +++ b/src/loaders/calo_inn.py @@ -0,0 +1,80 @@ +import pandas as pd +import numpy as np +from types import SimpleNamespace + +from ..dataset import DiscriminatorData +from ..observable import Observable + +def load(params: dict) -> list[DiscriminatorData]: + """ + dataloader for calo data + parameters: + + args: + + return: + + """ + datasets = [] + preproc_kwargs = { + "add_log_energy": params.get("add_log_energy", False), + "add_log_layer_ens": params.get("add_log_layer_ens", False), + "add_logit_step": params.get("add_logit_step", False), + } + datasets_list = [ + {'level': 'low', 'normalize': True, 'label': 'Norm.', 'suffix': 'norm'}, + {'level': 'low', 'normalize': False, 'label': 'Unnorm.', 'suffix': 'unnorm'}, + {'level': 'high', 'normalize': False, 'label': 'High', 'suffix': 'high'}, + ] + + for dataset in datasets_list: + if dataset['level'] == 'low': + geant_sample = create_data(params['geant_file'], dataset) + gen_sample = create_data(params['generated_file'], dataset) + elif dataset['level'] == 'high': + geant_sample = create_data_high(params['geant_file'], dataset) + gen_sample = create_data_high(params['generated_file'], dataset) + else: + raise ValueError('Classifier preprocessing running at unknown level.') + + train_true, test_true, val_true = split_data( + geant_sample, + params["train_split"], + params["test_split"] + ) + train_fake, test_fake, val_fake = split_data( + generated_sample, + params["train_split"], + params["test_split"] + ) + + datasets.append(DiscriminatorData( + label = dataset['label'], + suffix = dataset['suffix'], + dim = geant_sample.shape[-1], + train_true = train_true, + train_fake = train_fake, + test_true = test_true, + test_fake = test_fake, + val_true = val_true, + val_fake = val_fake, + observables = [], + ) + ) + return datasets + +#def create_data(data_path): + +def split_data( + data: np.ndarray, + train_split: float, + test_split: float +) -> tuple[np.ndarray, ...]: + n_train = int(train_split * len(data)) + n_test = int(test_split * len(data)) + train_data = data[:n_train] + test_data = data[-n_test:] + val_data = data[n_train:-n_test] + return train_data, test_data, val_data + +