-
Notifications
You must be signed in to change notification settings - Fork 43
/
base.yaml
71 lines (66 loc) · 2.09 KB
/
base.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
common:
# The number of historical images
img_history_size: 2
# The number of future actions to predict
action_chunk_size: 64
# The number of cameras to be used in the model
num_cameras: 3
# Dimension for state/action, we use the same space for both state and action
# This MUST be equal to configs/state_vec.py
state_dim: 128
dataset:
# We will extract the data from raw dataset
# and store them in the disk buffer by producer
# When training, we will read the data
# randomly from the buffer by consumer
# The producer will replace the data which has been
# read by the consumer with new data
# The path to the buffer (at least 400GB)
buf_path: /path/to/buffer
# The number of chunks in the buffer
buf_num_chunks: 512
# The number of samples (step rather than episode) in each chunk
buf_chunk_size: 512
# We will filter the episodes with length less than `epsd_len_thresh_low`
epsd_len_thresh_low: 32
# For those more than `epsd_len_thresh_high`,
# we will randomly sample `epsd_len_thresh_high` steps each time we load the episode
# to better balance the training datasets
epsd_len_thresh_high: 2048
# How to fit the image size
image_aspect_ratio: pad
# Maximum number of language tokens
tokenizer_max_length: 1024
model:
# Config for condition adpators
lang_adaptor: mlp2x_gelu
img_adaptor: mlp2x_gelu
state_adaptor: mlp3x_gelu
lang_token_dim: 4096
img_token_dim: 1152
# Dim of action or proprioception vector
# A `state` refers to an action or a proprioception vector
state_token_dim: 128
# Config for RDT structure
rdt:
# 1B: num_head 32 hidden_size 2048
hidden_size: 2048
depth: 28
num_heads: 32
cond_pos_embed_type: multimodal
# For noise scheduler
noise_scheduler:
type: ddpm
num_train_timesteps: 1000
num_inference_timesteps: 5
beta_schedule: squaredcos_cap_v2 # Critical choice
prediction_type: sample
clip_sample: False
# For EMA (params averaging)
# We do not use EMA currently
ema:
update_after_step: 0
inv_gamma: 1.0
power: 0.75
min_value: 0.0
max_value: 0.9999