Skip to content

Commit

Permalink
Add configs for inference (open-mmlab#15)
Browse files Browse the repository at this point in the history
* add configs for inference

* remove unused pipeline
  • Loading branch information
JoannaLXY authored Jul 13, 2020
1 parent 25746b3 commit 6071697
Show file tree
Hide file tree
Showing 6 changed files with 271 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3d',
pretrained2d=True,
pretrained='torchvision://resnet50',
depth=50,
conv_cfg=dict(type='Conv3d'),
norm_eval=False,
inflate=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)),
zero_init_residual=False),
cls_head=dict(
type='I3DHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
dropout_ratio=0.5,
init_std=0.01))
# model training and testing settings
test_cfg = dict(average_clips=None)
# dataset settings
dataset_type = 'VideoDataset'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
test_pipeline = [
dict(type='DecordInit', num_threads=1),
dict(
type='SampleFrames',
clip_len=32,
frame_interval=2,
num_clips=1,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='ThreeCrop', crop_size=256),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW'),
dict(type='Collect', keys=['imgs'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]
data = dict(
videos_per_gpu=1,
workers_per_gpu=2,
test=dict(
type=dataset_type,
ann_file=None,
data_prefix=None,
pipeline=test_pipeline))
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet2Plus1d',
depth=34,
pretrained=None,
pretrained2d=False,
norm_eval=False,
conv_cfg=dict(type='Conv2plus1d'),
norm_cfg=dict(type='SyncBN', requires_grad=True, eps=1e-3),
act_cfg=dict(type='ReLU'),
conv1_kernel=(3, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(1, 1, 1, 1),
spatial_strides=(1, 2, 2, 2),
temporal_strides=(1, 2, 2, 2),
zero_init_residual=False),
cls_head=dict(
type='I3DHead',
num_classes=400,
in_channels=512,
spatial_type='avg',
dropout_ratio=0.5,
init_std=0.01))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips=None)
# dataset settings
dataset_type = 'VideoDataset'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
test_pipeline = [
dict(type='DecordInit'),
dict(
type='SampleFrames',
clip_len=8,
frame_interval=8,
num_clips=10,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='ThreeCrop', crop_size=256),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]
data = dict(
videos_per_gpu=1,
workers_per_gpu=2,
test=dict(
type=dataset_type,
ann_file=None,
data_prefix=None,
pipeline=test_pipeline))
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dSlowFast',
pretrained=None,
resample_rate=8, # tau
speed_ratio=8, # alpha
channel_ratio=8, # beta_inv
slow_pathway=dict(
type='resnet3d',
depth=50,
pretrained=None,
lateral=True,
conv1_kernel=(1, 7, 7),
dilations=(1, 1, 1, 1),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
fast_pathway=dict(
type='resnet3d',
depth=50,
pretrained=None,
lateral=False,
base_channels=8,
conv1_kernel=(5, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
norm_eval=False)),
cls_head=dict(
type='SlowFastHead',
in_channels=2304, # 2048+256
num_classes=400,
spatial_type='avg',
dropout_ratio=0.5))

# model training and testing settings
test_cfg = dict(average_clips=None)
# dataset settings
dataset_type = 'VideoDataset'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)

test_pipeline = [
dict(type='DecordInit'),
dict(
type='SampleFrames',
clip_len=32,
frame_interval=2,
num_clips=10,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='ThreeCrop', crop_size=256),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW'),
dict(type='Collect', keys=['imgs'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]

data = dict(
videos_per_gpu=1,
workers_per_gpu=2,
test=dict(
type=dataset_type,
ann_file=None,
data_prefix=None,
pipeline=test_pipeline))
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dSlowOnly',
depth=50,
pretrained=None,
lateral=False,
conv1_kernel=(1, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
cls_head=dict(
type='I3DHead',
in_channels=2048,
num_classes=400,
spatial_type='avg',
dropout_ratio=0.5))
train_cfg = None
test_cfg = dict(average_clips=None)
dataset_type = 'VideoDataset'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
test_pipeline = [
dict(type='DecordInit'),
dict(
type='SampleFrames',
clip_len=4,
frame_interval=16,
num_clips=10,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='ThreeCrop', crop_size=256),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW'),
dict(type='Collect', keys=['imgs'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]
data = dict(
videos_per_gpu=1,
workers_per_gpu=2,
test=dict(
type=dataset_type,
ann_file=None,
data_prefix=None,
pipeline=test_pipeline))
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNetTSM',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False,
shift_div=8),
cls_head=dict(
type='TSMHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.001,
is_shift=True))
# model training and testing settings
train_cfg = None
test_cfg = dict(average_clips=None)
# dataset settings
dataset_type = 'VideoDataset'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
test_pipeline = [
dict(type='DecordInit', num_threads=1),
dict(
type='SampleFrames',
clip_len=1,
frame_interval=1,
num_clips=8,
test_mode=True),
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='CenterCrop', crop_size=224),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]

data = dict(
videos_per_gpu=1,
workers_per_gpu=2,
test=dict(
type=dataset_type,
ann_file=None,
data_prefix=None,
pipeline=test_pipeline))
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
dict(type='DecordDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='TenCrop', crop_size=224),
dict(type='Flip', flip_ratio=0),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs'], meta_keys=[]),
Expand Down

0 comments on commit 6071697

Please sign in to comment.