Skip to content

Commit

Permalink
working low data preproc
Browse files Browse the repository at this point in the history
  • Loading branch information
luigifvr committed Mar 27, 2023
1 parent 7de17d4 commit c7aace9
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 9 deletions.
6 changes: 4 additions & 2 deletions params/calo_inn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ run_name: calo_inn
#Dataset
loader_module: calo_inn
loader_params:
geant_file: path_to_file.hdf5
generated_file: path_to_file.hdf5
geant_file: /remote/gpu06/favaro/calo_inn/datasets/cls_data/train_cls_piplus.hdf5
generated_file: /remote/gpu06/favaro/calo_inn/datasets/train_piplus.hdf5
add_log_energy: True
add_log_layer_ens: True
add_logit_step: False
train_split: 0.6
test_split: 0.2

# Model
activation: leaky_relu
Expand Down
47 changes: 40 additions & 7 deletions src/loaders/calo_inn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
import numpy as np
import h5py
from types import SimpleNamespace

from ..dataset import DiscriminatorData
Expand All @@ -24,16 +25,16 @@ def load(params: dict) -> list[DiscriminatorData]:
datasets_list = [
{'level': 'low', 'normalize': True, 'label': 'Norm.', 'suffix': 'norm'},
{'level': 'low', 'normalize': False, 'label': 'Unnorm.', 'suffix': 'unnorm'},
{'level': 'high', 'normalize': False, 'label': 'High', 'suffix': 'high'},
#{'level': 'high', 'normalize': False, 'label': 'High', 'suffix': 'high'},
]

for dataset in datasets_list:
if dataset['level'] == 'low':
geant_sample = create_data(params['geant_file'], dataset)
gen_sample = create_data(params['generated_file'], dataset)
geant_sample = create_data(params['geant_file'], dataset, **preproc_kwargs)
gen_sample = create_data(params['generated_file'], dataset, **preproc_kwargs)
elif dataset['level'] == 'high':
geant_sample = create_data_high(params['geant_file'], dataset)
gen_sample = create_data_high(params['generated_file'], dataset)
geant_sample = create_data_high(params['geant_file'], dataset, **preproc_kwargs)
gen_sample = create_data_high(params['generated_file'], dataset, **preproc_kwargs)
else:
raise ValueError('Classifier preprocessing running at unknown level.')

Expand All @@ -43,7 +44,7 @@ def load(params: dict) -> list[DiscriminatorData]:
params["test_split"]
)
train_fake, test_fake, val_fake = split_data(
generated_sample,
gen_sample,
params["train_split"],
params["test_split"]
)
Expand All @@ -63,8 +64,40 @@ def load(params: dict) -> list[DiscriminatorData]:
)
return datasets

#def create_data(data_path):
def create_data(data_path, dataset_list, **kwargs):
with h5py.File(data_path, "r") as f:
en_test = f.get('energy')[:] / 1e2
lay_0 = f.get('layer_0')[:] / 1e5
lay_1 = f.get('layer_1')[:] / 1e5
lay_2 = f.get('layer_2')[:] / 1e5
data = np.concatenate((lay_0.reshape(-1, 288), lay_1.reshape(-1, 144), lay_2.reshape(-1, 72)), axis=1)

en0_t = np.sum(data[:, :288], axis=1, keepdims=True)
en1_t = np.sum(data[:, 288:432], axis=1, keepdims=True)
en2_t = np.sum(data[:, 432:], axis=1, keepdims=True)

if dataset_list['normalize']:
data[:, :288] /= en0_t + 1e-16
data[:, 288:432] /= en1_t + 1e-16
data[:, 432:] /= en2_t + 1e-16

if kwargs['add_log_energy']:
data = np.concatenate((data, np.log10(en_test*10).reshape(-1, 1)), axis=1)
data = np.nan_to_num(data, posinf=0, neginf=0)

en0_t = np.log10(en0_t + 1e-8) + 2.
en1_t = np.log10(en1_t + 1e-8) +2.
en2_t = np.log10(en2_t + 1e-8) +2.

if kwargs['add_log_layer_ens']:
data = np.concatenate((data, en0_t, en1_t, en2_t), axis=1)
if kwargs['add_logit_step']:
raise ValueError('Not implemented yet')
return data

def create_data_high(data_path, dataset_list, **kwargs):
pass

def split_data(
data: np.ndarray,
train_split: float,
Expand Down

0 comments on commit c7aace9

Please sign in to comment.