params.yaml

## Model architecture.
model: 
  ## Shape can be: [global, window-based, hybrid].
  shape: global
  ## Model trained on species: [human, canid]
  species: canid
  ## Model trained on chromosome:
  chm: all
  ## Segment of SNPs to process.
  arange:
     ini: 0
     end: 198473 
     #[human: 839629, canid: 198473]
  ## If shape is [hybrid] define n_windows, otherwise leave it null.
  n_windows: null
  ## If shape is [global, window-based] define window_size, otherwise leave it null.
  window_size: 2500
  ## If [window-based] define window_cloning, otherwise leave it null.
  window_cloning: false
  ## This mode is only compatible with [window_cloning] set to true.
  ## [window_train_mode] admits 2 modes:
  ## - [randomized]: a random window is taken from chromosome ([ini],[end]).
  ##   This approach is useful for training on very long sequences, e.g., 
  ##   whole chromosomes.  
  ## - [sequential]: a unique window is slided through the sequence.
  ##   Number of forward passes = ([end]-[ini])/[window_size].
  window_train_mode: sequential
  ## Distribution can be: [Unknown, Gaussian, Multi-Bernoulli, Uniform].
  distribution: Gaussian
  ## Definition of Quantizer:  
  ## If distribution is [Uniform] define codebook_size.
  quantizer:
      ## Number of embeddings in the codebook.
      codebook_size: 64
      ## Choose a different codebook for each window or not.
      win_independent: false
      ## Defines the rate of the code change.
      beta: 2
      ## Define if use multi-head at the encoder end.
      multi_head:
          using: false
          ## Number of heads at the encoder end.
          features: 1
  ## If model is conditioned define number of classes.
  conditioning:
      using: false
      num_classes: 7
      only: 0
  ## If denoising mode is turned on.
  denoising:
      using: false
      missing: 0
  ## Architecture of Encoder:
  ## Layers must be numbered sequentially (0,1,2,...).
  ## Layers must have the following parameters: [size, dropout, normalize, activation].
  ## [size] defines the input shape tensor to the layer.
  ## The actual size of the param matrix is [size_{i}] x [size_{i+1}].
  ## The last layer has only one parameter: [size].
  encoder:
    layer0:
      size: 198473
      dropout: 0
      normalize: true
      activation: ReLU
    layer1:
      size: 512
      dropout: 0
      normalize: true
      activation: ReLU
    layer2:
      size: 64
      dropout: 0
      normalize: true
      activation: null
    layer3: 
      size: 2
  ## Architecture of Decoder:
  ## Layers must be numbered sequentially (0,1,2,...).
  ## Layers must have the following parameters: [size, dropout, normalize, activation].
  ## [size] defines the input shape tensor to the layer.
  ## The actual size of the param matrix is [size_{i}] x [size_{i+1}].
  ## The last layer has only one parameter: [size].
  decoder: 
    layer0:
      size: 2
      dropout: 0
      normalize: true
      activation: ReLU
    layer1:
      size: 64
      dropout: 0
      normalize: true
      activation: ReLU
    layer2:
      size: 512
      dropout: 0
      normalize: true
      activation: Sigmoid
    layer3:
      size: 198473

## Hyperparameters for training.
hyperparams:
  epochs: 2000
  batch_size: 512
  single_ancestry: true
  granular_simulation: false
  slide: 15
  loss:
      ## Variational beta.
      beta: 1,
      ## Varloss: using.
      varloss: false
  ## Optimizer algorithm can be: [Adam, AdamW, RAdam, QHAdam, Yogi, DiffGrad].
  optimizer:
       algorithm: QHAdam
       lr: 0.0025
       weight_decay: 0.0001
  ## Scheduler can be: [null, plateau, step, multistep, exponential].
  scheduler: 
       method: plateau
       factor: 0.1
       patience: 5
       threshold: 0.0001
       mode: rel
  ## TR simulation can be: [offline, online].
  ## [offline] loads a dataset from disk.
  ## [online] performs simulation on-the-fly.
  ## [n_batches] determines the length of an epoch in terms of batches in [online] simulation.
  ## In [offline] simulation [n_batches, device, mode] are neglected.
  ## [mode] can be [uniform, exponential, pre-defined, fix].
  training:
       simulation: online
       num_generation_max: 400
       n_batches: 5
       device: cuda
       mode: uniform
       balanced: true
       monitor: wandb
  ## VD simulation can be: [offline].
  ## [offline] loads a dataset from disk.
  validation:
       simulation: online
       num_generation_max: 400
       n_batches: 1
       device: cuda
       mode: uniform
       balanced: true
       ## Scheduler sets the rate on which validation is done. E.g.:
       ## 1: validation is done at the end of each epoch.
       ## 100: validation is done each 100 epochs.
       scheduler: 5
  ## TS simulation can be: [offline].
  ## [offline] loads a dataset from disk.
  testing:
      simulation: offline
      ## Scheduler sets the rate on which testing is done. E.g.:
      ## 1: testing is done at the end of each epoch.
      ## 100: testing is done each 100 epochs.
      scheduler: 150
  ## Checkpoint params.
  checkpointing:
      ## Scheduler sets the rate on which checkpointing is done. E.g.:
      ## 1: checkpointing is done at the end of each epoch.
      ## 100: checkpointing is done each 100 epochs.
      scheduler: 5