forked from OpenGVLab/Ask-Anything
-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.py
54 lines (44 loc) · 1.81 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from decord import VideoReader
from decord import cpu
import numpy as np
import torchvision.transforms as transforms
from transforms import (
GroupNormalize, GroupScale, GroupCenterCrop,
Stack, ToTorchFormatTensor
)
def loadvideo_decord(sample, sample_rate_scale=1,new_width=384, new_height=384, clip_len=8, frame_sample_rate=2,num_segment=1):
fname = sample
vr = VideoReader(fname, width=new_width, height=new_height,
num_threads=1, ctx=cpu(0))
# handle temporal segments
converted_len = int(clip_len * frame_sample_rate)
seg_len = len(vr) //num_segment
duration = max(len(vr) // vr.get_avg_fps(),8)
all_index = []
for i in range(num_segment):
index = np.linspace(0, seg_len, num=int(duration))
index = np.clip(index, 0, seg_len - 1).astype(np.int64)
index = index + i*seg_len
all_index.extend(list(index))
all_index = all_index[::int(sample_rate_scale)]
vr.seek(0)
buffer = vr.get_batch(all_index).asnumpy()
return buffer
def loadvideo_decord_origin(sample, sample_rate_scale=1,new_width=384, new_height=384, clip_len=8, frame_sample_rate=2,num_segment=1):
fname = sample
vr = VideoReader(fname,
num_threads=1, ctx=cpu(0))
# handle temporal segments
converted_len = int(clip_len * frame_sample_rate)
seg_len = len(vr) //num_segment
duration = max(len(vr) // vr.get_avg_fps(),8)
all_index = []
for i in range(num_segment):
index = np.linspace(0, seg_len, num=int(duration))
index = np.clip(index, 0, seg_len - 1).astype(np.int64)
index = index + i*seg_len
all_index.extend(list(index))
all_index = all_index[::int(sample_rate_scale)]
vr.seek(0)
buffer = vr.get_batch(all_index).asnumpy()
return buffer