-
Notifications
You must be signed in to change notification settings - Fork 2
/
params.yaml
166 lines (165 loc) · 5.16 KB
/
params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
## Model architecture.
model:
## Shape can be: [global, window-based, hybrid].
shape: global
## Model trained on species: [human, canid]
species: canid
## Model trained on chromosome:
chm: all
## Segment of SNPs to process.
arange:
ini: 0
end: 198473
#[human: 839629, canid: 198473]
## If shape is [hybrid] define n_windows, otherwise leave it null.
n_windows: null
## If shape is [global, window-based] define window_size, otherwise leave it null.
window_size: 2500
## If [window-based] define window_cloning, otherwise leave it null.
window_cloning: false
## This mode is only compatible with [window_cloning] set to true.
## [window_train_mode] admits 2 modes:
## - [randomized]: a random window is taken from chromosome ([ini],[end]).
## This approach is useful for training on very long sequences, e.g.,
## whole chromosomes.
## - [sequential]: a unique window is slided through the sequence.
## Number of forward passes = ([end]-[ini])/[window_size].
window_train_mode: sequential
## Distribution can be: [Unknown, Gaussian, Multi-Bernoulli, Uniform].
distribution: Gaussian
## Definition of Quantizer:
## If distribution is [Uniform] define codebook_size.
quantizer:
## Number of embeddings in the codebook.
codebook_size: 64
## Choose a different codebook for each window or not.
win_independent: false
## Defines the rate of the code change.
beta: 2
## Define if use multi-head at the encoder end.
multi_head:
using: false
## Number of heads at the encoder end.
features: 1
## If model is conditioned define number of classes.
conditioning:
using: false
num_classes: 7
only: 0
## If denoising mode is turned on.
denoising:
using: false
missing: 0
## Architecture of Encoder:
## Layers must be numbered sequentially (0,1,2,...).
## Layers must have the following parameters: [size, dropout, normalize, activation].
## [size] defines the input shape tensor to the layer.
## The actual size of the param matrix is [size_{i}] x [size_{i+1}].
## The last layer has only one parameter: [size].
encoder:
layer0:
size: 198473
dropout: 0
normalize: true
activation: ReLU
layer1:
size: 512
dropout: 0
normalize: true
activation: ReLU
layer2:
size: 64
dropout: 0
normalize: true
activation: null
layer3:
size: 2
## Architecture of Decoder:
## Layers must be numbered sequentially (0,1,2,...).
## Layers must have the following parameters: [size, dropout, normalize, activation].
## [size] defines the input shape tensor to the layer.
## The actual size of the param matrix is [size_{i}] x [size_{i+1}].
## The last layer has only one parameter: [size].
decoder:
layer0:
size: 2
dropout: 0
normalize: true
activation: ReLU
layer1:
size: 64
dropout: 0
normalize: true
activation: ReLU
layer2:
size: 512
dropout: 0
normalize: true
activation: Sigmoid
layer3:
size: 198473
## Hyperparameters for training.
hyperparams:
epochs: 2000
batch_size: 512
single_ancestry: true
granular_simulation: false
slide: 15
loss:
## Variational beta.
beta: 1,
## Varloss: using.
varloss: false
## Optimizer algorithm can be: [Adam, AdamW, RAdam, QHAdam, Yogi, DiffGrad].
optimizer:
algorithm: QHAdam
lr: 0.0025
weight_decay: 0.0001
## Scheduler can be: [null, plateau, step, multistep, exponential].
scheduler:
method: plateau
factor: 0.1
patience: 5
threshold: 0.0001
mode: rel
## TR simulation can be: [offline, online].
## [offline] loads a dataset from disk.
## [online] performs simulation on-the-fly.
## [n_batches] determines the length of an epoch in terms of batches in [online] simulation.
## In [offline] simulation [n_batches, device, mode] are neglected.
## [mode] can be [uniform, exponential, pre-defined, fix].
training:
simulation: online
num_generation_max: 400
n_batches: 5
device: cuda
mode: uniform
balanced: true
monitor: wandb
## VD simulation can be: [offline].
## [offline] loads a dataset from disk.
validation:
simulation: online
num_generation_max: 400
n_batches: 1
device: cuda
mode: uniform
balanced: true
## Scheduler sets the rate on which validation is done. E.g.:
## 1: validation is done at the end of each epoch.
## 100: validation is done each 100 epochs.
scheduler: 5
## TS simulation can be: [offline].
## [offline] loads a dataset from disk.
testing:
simulation: offline
## Scheduler sets the rate on which testing is done. E.g.:
## 1: testing is done at the end of each epoch.
## 100: testing is done each 100 epochs.
scheduler: 150
## Checkpoint params.
checkpointing:
## Scheduler sets the rate on which checkpointing is done. E.g.:
## 1: checkpointing is done at the end of each epoch.
## 100: checkpointing is done each 100 epochs.
scheduler: 5