-
Notifications
You must be signed in to change notification settings - Fork 61
/
cfg.yaml
124 lines (116 loc) · 2.73 KB
/
cfg.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
dataset:
dataset_name: "nbody_digits3_len20_size64_r0_train20k"
num_train_samples: 20000
num_val_samples: 1000
num_test_samples: 1000
digit_num: null
img_size: 64
raw_img_size: 128
seq_len: 20
raw_seq_len_multiplier: 5
distractor_num: null
distractor_size: 5
max_velocity_scale: 2.0
initial_velocity_range: [0.0, 2.0]
random_acceleration_range: [0.0, 0.0]
scale_variation_range: [1.0, 1.0]
rotation_angle_range: [-0, 0]
illumination_factor_range: [1.0, 1.0]
period: 5
global_rotation_prob: 0.5
index_range: [0, 40000]
mnist_data_path: null
# N-Body params
nbody_acc_mode: "r0"
nbody_G: 0.05
nbody_softening_distance: 10.0
nbody_mass: null
layout:
in_len: 10
out_len: 10
layout: "NTHWC"
data_seq_len: 20
optim:
total_batch_size: 32
micro_batch_size: 2
seed: 0
method: "adamw"
lr: 0.001
wd: 1.0e-05
gradient_clip_val: 1.0
max_epochs: 100
# scheduler
lr_scheduler_mode: "cosine"
min_lr_ratio: 1.0e-3
warmup_min_lr_ratio: 0.0
warmup_percentage: 0.2
# early stopping
early_stop: true
early_stop_mode: "min"
early_stop_patience: 20
save_top_k: 1
logging:
logging_prefix: "Cuboid_NBody"
monitor_lr: true
monitor_device: false
track_grad_norm: -1
use_wandb: false
trainer:
check_val_every_n_epoch: 1
log_step_ratio: 0.001
precision: 32
vis:
train_example_data_idx_list: [0, ]
val_example_data_idx_list: [0, ]
test_example_data_idx_list: [0, ]
eval_example_only: false
model:
input_shape: [10, 64, 64, 1]
target_shape: [10, 64, 64, 1]
base_units: 64
# block_units: null
scale_alpha: 1.0
enc_depth: [4, 4]
dec_depth: [4, 4]
enc_use_inter_ffn: true
dec_use_inter_ffn: true
dec_hierarchical_pos_embed: false
downsample: 2
downsample_type: "patch_merge"
upsample_type: "upsample"
num_global_vectors: 0
use_dec_self_global: false
dec_self_update_global: true
use_dec_cross_global: false
use_global_vector_ffn: false
use_global_self_attn: false
separate_global_qkv: false
global_dim_ratio: 1
self_pattern: "axial"
cross_self_pattern: "axial"
cross_pattern: "cross_1x1"
dec_cross_last_n_frames: null
attn_drop: 0.1
proj_drop: 0.1
ffn_drop: 0.1
num_heads: 4
ffn_activation: "gelu"
gated_ffn: false
norm_layer: "layer_norm"
padding_type: "zeros"
pos_embed_type: "t+hw"
use_relative_pos: true
self_attn_use_final_proj: true
dec_use_first_self_attn: false
z_init_method: "zeros"
initial_downsample_type: "conv"
initial_downsample_activation: "leaky"
initial_downsample_scale: 2
initial_downsample_conv_layers: 2
final_upsample_conv_layers: 1
checkpoint_level: 0
attn_linear_init_mode: "0"
ffn_linear_init_mode: "0"
conv_init_mode: "0"
down_up_linear_init_mode: "0"
norm_init_mode: "0"