Skip to content

Commit

Permalink
caloinn loader v1
Browse files Browse the repository at this point in the history
  • Loading branch information
luigifvr committed Mar 27, 2023
1 parent eb173ab commit 7de17d4
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 0 deletions.
34 changes: 34 additions & 0 deletions params/calo_inn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
run_name: calo_inn

#Dataset
loader_module: calo_inn
loader_params:
geant_file: path_to_file.hdf5
generated_file: path_to_file.hdf5
add_log_energy: True
add_log_layer_ens: True
add_logit_step: False

# Model
activation: leaky_relu
negative_slope: 0.1
dropout: 0.1
layers: 5
hidden_size: 256

# Training
bayesian: False
lr: 1.e-3
betas: [0.9, 0.99]
weight_decay: 0.0
epochs: 50
batch_size: 1024
lr_scheduler: reduce_on_plateau
lr_decay_factor: 0.1
lr_patience: 5
checkpoint_interval: 5

# Evaluation
#bayesian_samples: 2
#lower_cluster_thresholds: [0.01, 0.1]
#upper_cluster_thresholds: [0.9, 0.99]
80 changes: 80 additions & 0 deletions src/loaders/calo_inn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import pandas as pd
import numpy as np
from types import SimpleNamespace

from ..dataset import DiscriminatorData
from ..observable import Observable

def load(params: dict) -> list[DiscriminatorData]:
"""
dataloader for calo data
parameters:
args:
return:
"""
datasets = []
preproc_kwargs = {
"add_log_energy": params.get("add_log_energy", False),
"add_log_layer_ens": params.get("add_log_layer_ens", False),
"add_logit_step": params.get("add_logit_step", False),
}
datasets_list = [
{'level': 'low', 'normalize': True, 'label': 'Norm.', 'suffix': 'norm'},
{'level': 'low', 'normalize': False, 'label': 'Unnorm.', 'suffix': 'unnorm'},
{'level': 'high', 'normalize': False, 'label': 'High', 'suffix': 'high'},
]

for dataset in datasets_list:
if dataset['level'] == 'low':
geant_sample = create_data(params['geant_file'], dataset)
gen_sample = create_data(params['generated_file'], dataset)
elif dataset['level'] == 'high':
geant_sample = create_data_high(params['geant_file'], dataset)
gen_sample = create_data_high(params['generated_file'], dataset)
else:
raise ValueError('Classifier preprocessing running at unknown level.')

train_true, test_true, val_true = split_data(
geant_sample,
params["train_split"],
params["test_split"]
)
train_fake, test_fake, val_fake = split_data(
generated_sample,
params["train_split"],
params["test_split"]
)

datasets.append(DiscriminatorData(
label = dataset['label'],
suffix = dataset['suffix'],
dim = geant_sample.shape[-1],
train_true = train_true,
train_fake = train_fake,
test_true = test_true,
test_fake = test_fake,
val_true = val_true,
val_fake = val_fake,
observables = [],
)
)
return datasets

#def create_data(data_path):

def split_data(
data: np.ndarray,
train_split: float,
test_split: float
) -> tuple[np.ndarray, ...]:
n_train = int(train_split * len(data))
n_test = int(test_split * len(data))
train_data = data[:n_train]
test_data = data[-n_test:]
val_data = data[n_train:-n_test]
return train_data, test_data, val_data


0 comments on commit 7de17d4

Please sign in to comment.