forked from sming256/OpenTAD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
thumos_internvideo6b.py
94 lines (87 loc) · 2.74 KB
/
thumos_internvideo6b.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
_base_ = [
"../_base_/datasets/thumos-14/features_i3d_pad.py", # dataset config
"../_base_/models/videomambasuite.py", # model config
]
trunc_len = 2304
data_path = "data/thumos-14/features/thumos14_6b/"
dataset = dict(
train=dict(
data_path=data_path,
block_list=None,
offset_frames=8,
pipeline=[
dict(type="LoadFeats", feat_format="pt", suffix="_spatial_feature"),
dict(type="ConvertToTensor", keys=["feats", "gt_segments", "gt_labels"]),
dict(
type="RandomTrunc",
trunc_len=trunc_len,
trunc_thresh=0.5,
crop_ratio=[0.9, 1.0],
),
dict(type="Rearrange", keys=["feats"], ops="t c -> c t"),
dict(
type="Collect",
inputs="feats",
keys=["masks", "gt_segments", "gt_labels"],
),
],
),
val=dict(
data_path=data_path,
block_list=None,
offset_frames=8,
pipeline=[
dict(type="LoadFeats", feat_format="pt", suffix="_spatial_feature"),
dict(type="ConvertToTensor", keys=["feats", "gt_segments", "gt_labels"]),
dict(type="Rearrange", keys=["feats"], ops="t c -> c t"),
dict(
type="Collect",
inputs="feats",
keys=["masks", "gt_segments", "gt_labels"],
),
],
),
test=dict(
data_path=data_path,
block_list=None,
offset_frames=8,
pipeline=[
dict(type="LoadFeats", feat_format="pt", suffix="_spatial_feature"),
dict(type="ConvertToTensor", keys=["feats"]),
dict(type="Rearrange", keys=["feats"], ops="t c -> c t"),
dict(type="Collect", inputs="feats", keys=["masks"]),
],
),
)
model = dict(
projection=dict(in_channels=3200, input_pdrop=0.1),
)
solver = dict(
train=dict(batch_size=2, num_workers=2),
val=dict(batch_size=1, num_workers=1),
test=dict(batch_size=1, num_workers=1),
clip_grad_norm=1,
ema=True,
)
optimizer = dict(type="AdamW", lr=1e-4, weight_decay=0.05, paramwise=True)
scheduler = dict(type="LinearWarmupCosineAnnealingLR", warmup_epoch=5, max_epoch=50)
inference = dict(load_from_raw_predictions=False, save_raw_prediction=False)
post_processing = dict(
nms=dict(
use_soft_nms=True,
sigma=0.5,
max_seg_num=2000,
min_score=0.001,
multiclass=True,
voting_thresh=0.7, # set 0 to disable
),
save_dict=False,
)
workflow = dict(
logging_interval=20,
checkpoint_interval=1,
val_loss_interval=1,
val_eval_interval=1,
val_start_epoch=24,
)
work_dir = "exps/thumos/videomambasuite_internvideo6b"