Skip to content

Commit

Permalink
specify a start index for sampling frames (open-mmlab#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
dreamerlin authored Jul 14, 2020
1 parent b44845f commit ad1364b
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 13 deletions.
15 changes: 9 additions & 6 deletions mmaction/datasets/pipelines/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class SampleFrames(object):
frame_interval (int): Temporal interval of adjacent sampled frames.
Default: 1.
num_clips (int): Number of clips to be sampled. Default: 1.
start_index (int): Specify a start index for frames in consideration of
different filename format. Default: 1.
temporal_jitter (bool): Whether to apply temporal jittering.
Default: False.
twice_sample (bool): Whether to use twice sample when testing.
Expand All @@ -39,6 +41,7 @@ def __init__(self,
clip_len,
frame_interval=1,
num_clips=1,
start_index=1,
temporal_jitter=False,
twice_sample=False,
out_of_bound_opt='loop',
Expand All @@ -47,6 +50,7 @@ def __init__(self,
self.clip_len = clip_len
self.frame_interval = frame_interval
self.num_clips = num_clips
self.start_index = start_index
self.temporal_jitter = temporal_jitter
self.twice_sample = twice_sample
self.out_of_bound_opt = out_of_bound_opt
Expand Down Expand Up @@ -144,7 +148,6 @@ def __call__(self, results):
else:
total_frames = results['total_frames']

# TODO: index in different mode may be different
clip_offsets = self._sample_clips(total_frames)
frame_inds = clip_offsets[:, None] + np.arange(
self.clip_len)[None, :] * self.frame_interval
Expand All @@ -166,7 +169,7 @@ def __call__(self, results):
frame_inds = new_inds
else:
raise ValueError('Illegal out_of_bound option.')
frame_inds = np.concatenate(frame_inds)
frame_inds = np.concatenate(frame_inds) + self.start_index
results['frame_inds'] = frame_inds.astype(np.int)
results['clip_len'] = self.clip_len
results['frame_interval'] = self.frame_interval
Expand All @@ -186,6 +189,8 @@ class DenseSampleFrames(SampleFrames):
frame_interval (int): Temporal interval of adjacent sampled frames.
Default: 1.
num_clips (int): Number of clips to be sampled. Default: 1.
start_index (int): Specify a start index for frames in consideration of
different filename format. Default: 1.
sample_range (int): Total sample range for dense sample.
Default: 64.
num_sample_positions (int): Number of sample start positions, Which is
Expand All @@ -200,6 +205,7 @@ def __init__(self,
clip_len,
frame_interval=1,
num_clips=1,
start_index=1,
sample_range=64,
num_sample_positions=10,
temporal_jitter=False,
Expand All @@ -209,6 +215,7 @@ def __init__(self,
clip_len,
frame_interval,
num_clips,
start_index,
temporal_jitter,
out_of_bound_opt=out_of_bound_opt,
test_mode=test_mode)
Expand Down Expand Up @@ -585,10 +592,6 @@ def __call__(self, results):
results['frame_inds'] = np.squeeze(results['frame_inds'])

for frame_idx in results['frame_inds']:
# temporary solution for frame index offset.
# TODO: add offset attributes in datasets.
if frame_idx == 0:
frame_idx += 1
if modality == 'RGB':
filepath = osp.join(directory, filename_tmpl.format(frame_idx))
img_bytes = self.file_client.get(filepath)
Expand Down
Binary file added tests/data/test_imgs/img_00006.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/test_imgs/img_00007.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/test_imgs/img_00008.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/test_imgs/img_00009.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/test_imgs/img_00010.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
202 changes: 195 additions & 7 deletions tests/test_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,28 @@ def check_monotonous(arr):
assert len(sample_frames_results['frame_inds']) == 8
sample_frames_results = sample_frames(frame_result)
assert len(sample_frames_results['frame_inds']) == 8
assert_array_equal(sample_frames_results['frame_inds'],
np.array([1, 2, 2, 3, 4, 5, 5, 6]))

# Sample Frame with no temporal_jitter to get clip_offsets
# clip_len=1, frame_interval=1, num_clips=8, start_index=0
video_result = copy.deepcopy(self.video_results)
frame_result = copy.deepcopy(self.frame_results)
frame_result['total_frames'] = 6
config = dict(
clip_len=1,
frame_interval=1,
num_clips=8,
start_index=0,
temporal_jitter=False,
test_mode=True)
sample_frames = SampleFrames(**config)
sample_frames_results = sample_frames(video_result)
assert self.check_keys_contain(sample_frames_results.keys(),
target_keys)
assert len(sample_frames_results['frame_inds']) == 8
sample_frames_results = sample_frames(frame_result)
assert len(sample_frames_results['frame_inds']) == 8
assert_array_equal(sample_frames_results['frame_inds'],
np.array([0, 1, 1, 2, 3, 4, 4, 5]))

Expand All @@ -205,7 +227,7 @@ def check_monotonous(arr):
sample_frames_results = sample_frames(frame_result)
assert len(sample_frames_results['frame_inds']) == 6
assert_array_equal(sample_frames_results['frame_inds'],
[0, 1, 2, 3, 4, 0])
[1, 2, 3, 4, 5, 1])

# Sample Frame with no temporal_jitter to get avg_interval <= 0
# clip_len=12, frame_interval=1, num_clips=20
Expand Down Expand Up @@ -245,7 +267,7 @@ def check_monotonous(arr):
sample_frames_results = sample_frames(frame_result)
assert len(sample_frames_results['frame_inds']) == 8
assert_array_equal(sample_frames_results['frame_inds'],
np.array([0, 1, 2, 2, 3, 4, 4, 5]))
np.array([1, 2, 3, 3, 4, 5, 5, 6]))

# Sample Frame with no temporal_jitter to get clip_offsets zero
# clip_len=12, frame_interval=1, num_clips=2
Expand Down Expand Up @@ -411,14 +433,57 @@ def test_pyav_init(self):
def test_pyav_decode(self):
target_keys = ['frame_inds', 'imgs', 'original_shape']

# test PyAV with 2 dim input
# test PyAV with 2 dim input and start_index = 0
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(0, self.total_frames,
2)[:, np.newaxis]
pyav_init = PyAVInit()
pyav_init_result = pyav_init(video_result)
video_result['video_reader'] = pyav_init_result['video_reader']

pyav_decode = PyAVDecode()
pyav_decode_result = pyav_decode(video_result)
assert self.check_keys_contain(pyav_decode_result.keys(), target_keys)
assert pyav_decode_result['original_shape'] == (256, 340)
assert np.shape(pyav_decode_result['imgs']) == (len(
video_result['frame_inds']), 256, 340, 3)

# test PyAV with 1 dim input and start_index = 0
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(0, self.total_frames, 5)
pyav_init = PyAVInit()
pyav_init_result = pyav_init(video_result)
video_result['video_reader'] = pyav_init_result['video_reader']

pyav_decode = PyAVDecode()
pyav_decode_result = pyav_decode(video_result)
assert self.check_keys_contain(pyav_decode_result.keys(), target_keys)
assert pyav_decode_result['original_shape'] == (256, 340)
assert np.shape(pyav_decode_result['imgs']) == (len(
video_result['frame_inds']), 256, 340, 3)

# PyAV with multi thread and start_index = 0
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(0, self.total_frames, 5)
pyav_init = PyAVInit()
pyav_init_result = pyav_init(video_result)
video_result['video_reader'] = pyav_init_result['video_reader']

pyav_decode = PyAVDecode(multi_thread=True)
pyav_decode_result = pyav_decode(video_result)
assert self.check_keys_contain(pyav_decode_result.keys(), target_keys)
assert pyav_decode_result['original_shape'] == (256, 340)
assert np.shape(pyav_decode_result['imgs']) == (len(
video_result['frame_inds']), 256, 340, 3)

# test PyAV with 2 dim input
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(1, self.total_frames,
2)[:, np.newaxis]
pyav_init = PyAVInit()
pyav_init_result = pyav_init(video_result)
video_result['video_reader'] = pyav_init_result['video_reader']

pyav_decode = PyAVDecode()
pyav_decode_result = pyav_decode(video_result)
assert self.check_keys_contain(pyav_decode_result.keys(), target_keys)
Expand Down Expand Up @@ -469,9 +534,40 @@ def test_decord_init(self):
def test_decord_decode(self):
target_keys = ['frame_inds', 'imgs', 'original_shape']

# test Decord with 2 dim input
# test Decord with 2 dim input and start_index = 0
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(1, self.total_frames,
video_result['frame_inds'] = np.arange(0, self.total_frames,
3)[:, np.newaxis]
decord_init = DecordInit()
decord_init_result = decord_init(video_result)
video_result['video_reader'] = decord_init_result['video_reader']

decord_decode = DecordDecode()
decord_decode_result = decord_decode(video_result)
assert self.check_keys_contain(decord_decode_result.keys(),
target_keys)
assert decord_decode_result['original_shape'] == (256, 340)
assert np.shape(decord_decode_result['imgs']) == (len(
video_result['frame_inds']), 256, 340, 3)

# test Decord with 1 dim input and start_index = 0
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(0, self.total_frames, 3)
decord_init = DecordInit()
decord_init_result = decord_init(video_result)
video_result['video_reader'] = decord_init_result['video_reader']

decord_decode = DecordDecode()
decord_decode_result = decord_decode(video_result)
assert self.check_keys_contain(decord_decode_result.keys(),
target_keys)
assert decord_decode_result['original_shape'] == (256, 340)
assert np.shape(decord_decode_result['imgs']) == (len(
video_result['frame_inds']), 256, 340, 3)

# test Decord with 2 dim input and start_index = 0
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(0, self.total_frames,
3)[:, np.newaxis]
decord_init = DecordInit()
decord_init_result = decord_init(video_result)
Expand Down Expand Up @@ -512,7 +608,7 @@ def test_opencv_init(self):
def test_opencv_decode(self):
target_keys = ['frame_inds', 'imgs', 'original_shape']

# test OpenCV with 2 dim input
# test OpenCV with 2 dim input when start_index = 0
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(0, self.total_frames,
2)[:, np.newaxis]
Expand All @@ -528,6 +624,29 @@ def test_opencv_decode(self):
assert np.shape(opencv_decode_result['imgs']) == (len(
video_result['frame_inds']), 256, 340, 3)

# test OpenCV with 2 dim input
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(1, self.total_frames,
2)[:, np.newaxis]
opencv_init = OpenCVInit()
opencv_init_result = opencv_init(video_result)
video_result['video_reader'] = opencv_init_result['video_reader']

opencv_decode = OpenCVDecode()
opencv_decode_result = opencv_decode(video_result)
assert self.check_keys_contain(opencv_decode_result.keys(),
target_keys)
assert opencv_decode_result['original_shape'] == (256, 340)
assert np.shape(opencv_decode_result['imgs']) == (len(
video_result['frame_inds']), 256, 340, 3)

# test OpenCV with 1 dim input when start_index = 0
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(0, self.total_frames, 3)
opencv_init = OpenCVInit()
opencv_init_result = opencv_init(video_result)
video_result['video_reader'] = opencv_init_result['video_reader']

# test OpenCV with 1 dim input
video_result = copy.deepcopy(self.video_results)
video_result['frame_inds'] = np.arange(1, self.total_frames, 3)
Expand All @@ -546,10 +665,37 @@ def test_opencv_decode(self):
def test_frame_selector(self):
target_keys = ['frame_inds', 'imgs', 'original_shape', 'modality']

# test frame selector with 2 dim input
# test frame selector with 2 dim input when start_index = 0
inputs = copy.deepcopy(self.frame_results)
inputs['frame_inds'] = np.arange(0, self.total_frames, 2)[:,
np.newaxis]
# since the test images start with index 1, we plus 1 to frame_inds
# in order to pass the CI
inputs['frame_inds'] = inputs['frame_inds'] + 1
frame_selector = FrameSelector(io_backend='disk')
results = frame_selector(inputs)
assert self.check_keys_contain(results.keys(), target_keys)
assert np.shape(results['imgs']) == (len(inputs['frame_inds']), 240,
320, 3)
assert results['original_shape'] == (240, 320)

# test frame selector with 2 dim input
inputs = copy.deepcopy(self.frame_results)
inputs['frame_inds'] = np.arange(1, self.total_frames, 2)[:,
np.newaxis]
frame_selector = FrameSelector(io_backend='disk')
results = frame_selector(inputs)
assert self.check_keys_contain(results.keys(), target_keys)
assert np.shape(results['imgs']) == (len(inputs['frame_inds']), 240,
320, 3)
assert results['original_shape'] == (240, 320)

# test frame selector with 1 dim input when start_index = 0
inputs = copy.deepcopy(self.frame_results)
inputs['frame_inds'] = np.arange(0, self.total_frames, 5)
# since the test images start with index 1, we plus 1 to frame_inds
# in order to pass the CI
inputs['frame_inds'] = inputs['frame_inds'] + 1
frame_selector = FrameSelector(io_backend='disk')
results = frame_selector(inputs)
assert self.check_keys_contain(results.keys(), target_keys)
Expand All @@ -567,6 +713,19 @@ def test_frame_selector(self):
320, 3)
assert results['original_shape'] == (240, 320)

# test frame selector with 1 dim input when start_index = 0
inputs = copy.deepcopy(self.frame_results)
inputs['frame_inds'] = np.arange(0, self.total_frames, 2)
# since the test images start with index 1, we plus 1 to frame_inds
# in order to pass the CI
inputs['frame_inds'] = inputs['frame_inds'] + 1
frame_selector = FrameSelector(io_backend='disk')
results = frame_selector(inputs)
assert self.check_keys_contain(results.keys(), target_keys)
assert np.shape(results['imgs']) == (len(inputs['frame_inds']), 240,
320, 3)
assert results['original_shape'] == (240, 320)

# test frame selector with 1 dim input
inputs = copy.deepcopy(self.frame_results)
inputs['frame_inds'] = np.arange(1, self.total_frames, 2)
Expand All @@ -577,6 +736,20 @@ def test_frame_selector(self):
320, 3)
assert results['original_shape'] == (240, 320)

# test frame selector with 1 dim input for flow images
# when start_index = 0
inputs = copy.deepcopy(self.flow_frame_results)
inputs['frame_inds'] = np.arange(0, self.total_frames, 2)
# since the test images start with index 1, we plus 1 to frame_inds
# in order to pass the CI
inputs['frame_inds'] = inputs['frame_inds'] + 1
frame_selector = FrameSelector(io_backend='disk')
results = frame_selector(inputs)
assert self.check_keys_contain(results.keys(), target_keys)
assert np.shape(results['imgs']) == (len(inputs['frame_inds']) * 2,
240, 320)
assert results['original_shape'] == (240, 320)

# test frame selector with 1 dim input for flow images
inputs = copy.deepcopy(self.flow_frame_results)
inputs['frame_inds'] = np.arange(1, self.total_frames, 2)
Expand All @@ -587,6 +760,21 @@ def test_frame_selector(self):
240, 320)
assert results['original_shape'] == (240, 320)

# test frame selector in turbojpeg decording backend
# when start_index = 0
inputs = copy.deepcopy(self.frame_results)
inputs['frame_inds'] = np.arange(0, self.total_frames, 5)
# since the test images start with index 1, we plus 1 to frame_inds
# in order to pass the CI
inputs['frame_inds'] = inputs['frame_inds'] + 1
frame_selector = FrameSelector(
io_backend='disk', decoding_backend='turbojpeg')
results = frame_selector(inputs)
assert self.check_keys_contain(results.keys(), target_keys)
assert np.shape(results['imgs']) == (len(inputs['frame_inds']), 240,
320, 3)
assert results['original_shape'] == (240, 320)

# test frame selector in turbojpeg decording backend
inputs = copy.deepcopy(self.frame_results)
inputs['frame_inds'] = np.arange(1, self.total_frames, 5)
Expand Down

0 comments on commit ad1364b

Please sign in to comment.