Merge pull request #37 from ewencedr/high-level-classifier

Cedric Dev
ewencedr · Sep 15, 2023 · a253ed2 · a253ed2
2 parents 4e9ce04 + b141910
commit a253ed2
Show file tree

Hide file tree

Showing 39 changed files with 13,062 additions and 322 deletions.
diff --git a/configs/callbacks/lhco/lhco.yaml b/configs/callbacks/lhco/lhco.yaml
@@ -27,7 +27,7 @@ model_checkpoint:
 
 early_stopping:
   monitor: "val/loss"
-  patience: 200
+  patience: 100
   mode: "min"
 
 model_summary:

diff --git a/configs/data/classifier_data.yaml b/configs/data/classifier_data.yaml
@@ -1,5 +1,10 @@
 _target_: src.data.classifier_datamodule.ClassifierDataModule
 data_dir: ${paths.data_dir}
-batch_size: 1024
+gendatafile: "idealized_LHCO"
+batch_size: 128
 num_workers: 32
-pin_memory: False
+pin_memory: False
+gen_jet: "both"
+ref_jet: "both"
+idealized: False
+use_shuffled_data: False
diff --git a/configs/data/hl_classifier.yaml b/configs/data/hl_classifier.yaml
@@ -0,0 +1,6 @@
+_target_: src.data.hl_classifier_datamodule.HLClassifierDataModule
+data_dir: ${paths.data_dir}
+file_name: cathode_v
+batch_size: 128
+num_workers: 32
+pin_memory: False
diff --git a/configs/data/lhco.yaml b/configs/data/lhco.yaml
@@ -9,10 +9,14 @@ num_particles: 279
 variable_jet_sizes: True
 centering: False
 normalize: True
+normalize_sigma: 5
 conditioning: True
 use_all_data: True
 relative_coords: True
 jet_type: "x"
 shuffle_data: True
 use_calculated_base_distribution: True
 file_suffix_processed_data: ""
+log_pt: False
+pt_standardization: False
+multiplicity_conditioning: False
diff --git a/configs/experiment/jetnet/fm_transformer.yaml b/configs/experiment/jetnet/fm_transformer.yaml
@@ -0,0 +1,89 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py experiment=fm_tops
+
+defaults:
+  - override /data: /jetnet/jetnet_tops_30_jedi.yaml
+  - override /model: flow_matching_transformer.yaml
+  - override /callbacks: jetnet.yaml
+  - override /trainer: gpu.yaml
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+
+
+tags: ["flow_matching", "tops30", "uncond", "transformer"]
+
+seed: 12345
+
+trainer:
+  min_epochs: 1000
+  max_epochs: 10000
+  gradient_clip_val: 0.5
+
+model:
+  net_config:
+    emb: 128
+    heads: 2
+    depth: 3
+    seq_length: 30
+    mask: False
+
+data:
+  jet_type:
+    #- "g"
+    #- "q"
+    - "t"
+    #- "w"
+    #- "z"
+  batch_size: 1024
+  num_particles: ${model.net_config.seq_length}
+  variable_jet_sizes: False
+  centering: False
+  normalize: True
+  normalize_sigma: 5
+  use_calculated_base_distribution: False
+  conditioning_type: False
+  conditioning_pt: False
+  conditioning_eta: False
+  conditioning_mass: False
+  conditioning_num_particles: False
+# global_cond_dim needs to be set when using conditioning
+
+callbacks:
+  ema:
+    decay: 0.999
+    apply_ema_every_n_steps: 1
+    start_step: 0
+    save_ema_weights_in_callback_state: True
+    evaluate_ema_weights_instead: True
+
+  jetnet_eval:
+    every_n_epochs: 100
+    num_jet_samples: -1
+    data_type: "val"
+    use_ema: ${callbacks.ema.evaluate_ema_weights_instead}
+    generation_config:
+      ode_solver: "midpoint"
+    w_dist_config:
+      num_batches: 40
+    plot_config:
+      plot_w_dists: False
+      plot_jet_features: True
+
+  #early_stopping:
+  #  monitor: "val/loss"
+  #  patience: 2000
+  #  mode: "min"
+
+task_name: "fm_tops-${model.net_config.seq_length}-transformer"
+
+logger:
+  wandb:
+    tags: ${tags}
+    group: "fm_tops"
+    name: ${task_name}
+  comet:
+    experiment_name: ${task_name}
diff --git a/configs/experiment/lhco/epic_classifier.yaml b/configs/experiment/lhco/epic_classifier.yaml
@@ -0,0 +1,52 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py experiment=fm_tops
+
+defaults:
+  - override /data: classifier_data.yaml
+  - override /model: epic_classifier.yaml
+  - override /callbacks: default.yaml
+  - override /trainer: gpu.yaml
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+
+
+tags: ["classifier", "LHCO", "EPiC"]
+
+seed: 12345
+
+trainer:
+  min_epochs: 10
+  max_epochs: 300
+  gradient_clip_val: 0.5
+
+model:
+  net_config: 
+    num_sup_sets: 1
+
+callbacks:
+  early_stopping:
+    monitor: val/loss
+    patience: 20
+    mode: min
+    verbose: true
+
+data:
+  #gendatafile: "lhco-xy-10layer-256latent-logpt-new2m_sr-midpoint-300_shuffled"
+  gendatafile: "lhco-xy-10layer-256latent-logpt-nnew_sr-midpoint-300"
+  #gendatafile: "FPCD_LHCO_SR_shuffled"
+
+task_name: "epic_classifier"
+
+logger:
+  wandb:
+    tags: ${tags}
+    project: "LHCO_epic_classifier"
+    group: "lhco_flow_matching"
+    name: ${task_name}
+  comet:
+    project_name: "LHCO_epic_classifier"
+    experiment_name: ${task_name}
diff --git a/configs/experiment/lhco/classifier_test.yaml → configs/experiment/lhco/hl_classifier.yaml b/configs/experiment/lhco/classifier_test.yaml → configs/experiment/lhco/hl_classifier.yaml
@@ -4,8 +4,8 @@
 # python train.py experiment=fm_tops
 
 defaults:
-  - override /data: classifier_data.yaml
-  - override /model: epic_classifier.yaml
+  - override /data: hl_classifier.yaml
+  - override /model: hl_classifier.yaml
   - override /callbacks: default.yaml
   - override /trainer: gpu.yaml
 
@@ -14,25 +14,29 @@ defaults:
 
 
 
-tags: ["classifier", "LHCO"]
+tags: ["classifier", "LHCO", "high_level"]
 
 seed: 12345
 
 trainer:
   min_epochs: 10
-  max_epochs: 10
+  max_epochs: 100
   gradient_clip_val: 0.5
 
+task_name: "hl_classifier"
 
-
-task_name: "classifier"
+callbacks:
+  early_stopping:
+    monitor: "val/acc"
+    patience: 10
+    mode: "max"
 
 logger:
   wandb:
     tags: ${tags}
-    project: "LHCO"
-    group: "lhco_flow_matching"
+    project: "LHCO_classification"
+    group: "lhco_hl_classifier"
     name: ${task_name}
   comet:
-    project_name: "LHCO"
+    project_name: "LHCO_classification"
     experiment_name: ${task_name}
diff --git a/configs/experiment/lhco/lhco_both_jets.yaml b/configs/experiment/lhco/lhco_both_jets.yaml
@@ -27,8 +27,8 @@ model:
   num_particles: 279
   global_cond_dim: 4 # needs to be calculated when using conditioning
   local_cond_dim: 4
-  layers: 10
-  hidden_dim: 200
+  layers: 6
+  hidden_dim: 128
   latent: 16
   scheduler:
     warmup: ${trainer.min_epochs}
@@ -45,6 +45,7 @@ data:
   use_all_data: False
   shuffle_data: False
   file_suffix_processed_data: ""
+  log_pt: True
 
 callbacks:
   ema:

diff --git a/configs/experiment/lhco/lhco_both_jets2.yaml b/configs/experiment/lhco/lhco_both_jets2.yaml
@@ -0,0 +1,122 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py experiment=fm_tops
+
+defaults:
+  - override /data: lhco.yaml
+  - override /model: flow_matching2.yaml
+  - override /callbacks: /lhco/lhco.yaml
+  - override /trainer: gpu.yaml
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+
+
+tags: ["flow_matching", "LHCO", "cond", "both_jet"]
+
+seed: 12345
+
+trainer:
+  min_epochs: 500
+  max_epochs: 5000
+  gradient_clip_val: 0.5
+
+model:
+  num_particles: 279
+  global_cond_dim: 5 # needs to be calculated when using conditioning
+  local_cond_dim: 5
+  layers: 12
+  hidden_dim: 150
+  latent: 256
+  #scheduler:
+  #  warmup: ${trainer.min_epochs}
+  #  max_iters: ${trainer.max_epochs}
+  optimizer:
+    _target_: torch.optim.AdamW
+    _partial_: true
+    lr: 0.0003
+    weight_decay: 0.00005
+
+  scheduler:
+    _target_: torch.optim.lr_scheduler.CosineAnnealingLR
+    _partial_: true
+    T_max: ${trainer.max_epochs}
+
+data:
+  batch_size: 128
+  normalize: True
+  normalize_sigma: 1
+  conditioning: True
+  relative_coords: True
+  jet_type: "all"
+  num_particles: ${model.num_particles}
+  val_fraction: 0.05
+  test_fraction: 0.35
+  use_all_data: False
+  shuffle_data: False
+  file_suffix_processed_data: "_masssorted"
+  log_pt: True
+  pt_standardization: False
+  multiplicity_conditioning: True
+
+callbacks:
+  ema:
+    decay: 0.999
+    apply_ema_every_n_steps: 1
+    start_step: 0
+    save_ema_weights_in_callback_state: True
+    evaluate_ema_weights_instead: True
+
+  early_stopping: null
+
+  #lhco_eval:
+  #  every_n_epochs: 300 # evaluate every n epochs
+  #  num_jet_samples: -1 # jet samples to generate
+  #  model_name: "lhco_flow_matching_both_jet"
+  #  log_epoch_zero: True
+  #  data_type: "val"
+  #  w_dist_config:
+  #    num_eval_samples: 10_000
+  #    num_batches: 40
+  #    calculate_efps: False
+  #  generation_config:
+  #    batch_size: 2048
+  #    ode_solver: "midpoint"
+  #    ode_steps: 100
+  #    verbose: True
+  #  plot_config:
+  #    plot_efps: False
+  #    plottype: ""
+
+  lhco_eval_sr:
+    every_n_epochs: 300 # evaluate every n epochs
+    num_jet_samples: -1 # jet samples to generate
+    model_name: "lhco_flow_matching_both_jet_sr"
+    log_epoch_zero: True
+    data_type: "val"
+    w_dist_config:
+      num_eval_samples: 10_000
+      num_batches: 40
+      calculate_efps: False
+    generation_config:
+      batch_size: 2048
+      ode_solver: "midpoint"
+      ode_steps: 100
+      verbose: True
+    plot_config:
+      plot_efps: False
+      plottype: ""
+
+task_name: "lhco_flow_matching_both_jet"
+
+logger:
+  wandb:
+    tags: ${tags}
+    project: "LHCO"
+    group: "lhco_flow_matching"
+    name: ${task_name}
+  comet:
+    project_name: "LHCO"
+    experiment_name: ${task_name}