Skip to content

Commit

Permalink
Merge pull request #1062 from luxonis/fix/video_recording
Browse files Browse the repository at this point in the history
Improve mp4 video recording
  • Loading branch information
daniilpastukhov authored Jun 26, 2023
2 parents 207299f + 1a9b6ea commit f0684a0
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 35 deletions.
12 changes: 12 additions & 0 deletions depthai_sdk/sdk_tests/components/nn/test_nn_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,15 @@ def callback(packet):
if not oak_camera.poll():
raise RuntimeError('Polling failed')
time.sleep(0.1)


def test_encoded_output():
with OakCamera() as oak_camera:
camera = oak_camera.create_camera('color', '1080p', encode='h264')

oak_camera.callback(camera.out.encoded, lambda x: print(x))
oak_camera.start(blocking=False)

for i in range(10):
oak_camera.poll()
time.sleep(0.1)
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
import time

import depthai as dai
import pytest

from depthai_sdk.oak_camera import OakCamera
import depthai as dai


def test_stereo_output():
with OakCamera() as oak_camera:
if dai.CameraBoardSocket.LEFT not in oak_camera.sensors:
pytest.skip('Looks like camera does not have mono pair, skipping...')
else:
stereo = oak_camera.create_stereo('400p')
stereo = oak_camera.create_stereo('800p', encode='h264')

oak_camera.callback([stereo.out.depth, stereo.out.disparity,
stereo.out.rectified_left, stereo.out.rectified_right], callback=lambda x: None)
stereo.out.rectified_left, stereo.out.rectified_right,
stereo.out.encoded], callback=lambda x: None)
oak_camera.start(blocking=False)

for i in range(10):
Expand Down
46 changes: 31 additions & 15 deletions depthai_sdk/sdk_tests/test_examples.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,43 @@
import os
import subprocess
import sys
import time
from pathlib import Path

import cv2
from pyvirtualdisplay import Display

EXAMPLES_DIR = Path(__file__).parent.parent.parent / "examples"
EXAMPLES_DIR = Path(__file__).parents[1] / 'examples'
os.environ['DISPLAY'] = '' # Hide the display

# Create a temporary directory for the tests
Path('/tmp/depthai_sdk_tests').mkdir(exist_ok=True)
os.chdir('/tmp/depthai_sdk_tests')


def test_examples():
python_executable = Path(sys.executable)
for example in EXAMPLES_DIR.rglob("**/*.py"):
print(list(EXAMPLES_DIR.rglob("**/*.py")))
print(EXAMPLES_DIR.absolute())
for example in [list(EXAMPLES_DIR.rglob("**/*.py"))[0]]:
print(f"Running example: {example.name}")

result = subprocess.Popen(f"{python_executable} {example}", stdout=subprocess.PIPE, stderr=subprocess.PIPE,
env={"DISPLAY": ""}, shell=True)

time.sleep(5)
result.kill()
time.sleep(5)
print('Stderr: ', result.stderr.read().decode())

# if result.returncode and result.returncode != 0:
# assert False, f"{example} raised an exception: {result.stderr}"

cv2.destroyAllWindows()
print(f'{EXAMPLES_DIR.parent}:{EXAMPLES_DIR.parent}/depthai_sdk')
with Display(visible=False, size=(800, 600)):
result = subprocess.Popen(f"{python_executable} {example}",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env={
'DISPLAY': '',
'PYTHONPATH': f'{os.environ["PYTHONPATH"]}:{EXAMPLES_DIR.parent}'
},
shell=True)

time.sleep(5)
result.kill()
time.sleep(5)
print('Stderr: ', result.stderr.read().decode())

# if result.returncode and result.returncode != 0:
# assert False, f"{example} raised an exception: {result.stderr}"

cv2.destroyAllWindows()
2 changes: 1 addition & 1 deletion depthai_sdk/src/depthai_sdk/recorders/video_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def update(self, path: Path, device: dai.Device, xouts: List['XoutFrames']):
else:
try:
from .video_writers.av_writer import AvWriter
self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps)
self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps, xout._frame_shape)
except Exception as e:
# TODO here can be other errors, not only import error
logging.warning(f'Exception while creating AvWriter: {e}.'
Expand Down
147 changes: 131 additions & 16 deletions depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,128 @@
from datetime import timedelta
import os
from fractions import Fraction
from pathlib import Path
from typing import Tuple, Union

import depthai as dai
import numpy as np

from depthai_sdk.recorders.video_writers import BaseWriter
from depthai_sdk.recorders.video_writers.utils import create_writer_dir
from .base_writer import BaseWriter

START_CODE_PREFIX = b"\x00\x00\x01"
KEYFRAME_NAL_TYPE = 5
NAL_TYPE_BITS = 5 # nal unit type is encoded in lower 5 bits


def is_keyframe(encoded_frame: np.array) -> bool:
"""
Check if encoded frame is a keyframe.
Args:
encoded_frame: Encoded frame.
Returns:
True if encoded frame is a keyframe, False otherwise.
"""
byte_stream = bytearray(encoded_frame)
size = len(byte_stream)

pos = 0
while pos < size:
retpos = byte_stream.find(START_CODE_PREFIX, pos)
if retpos == -1:
return False

# Skip start code
pos = retpos + 3

# Extract the first 5 bits
type_ = byte_stream[pos] >> 3

if type_ == KEYFRAME_NAL_TYPE:
return True

return False


class AvWriter(BaseWriter):
def __init__(self, path: Path, name: str, fourcc: str, fps: float):
def __init__(self, path: Path, name: str, fourcc: str, fps: float, frame_shape: Tuple[int, int]):
"""
Args:
path: Path to the folder where the file will be created.
name: Name of the file without extension.
fourcc: Stream codec.
fps: Frames per second of the stream.
frame_shape: Width and height of the frames.
"""
super().__init__(path, name)

self.start_ts = None
self.frame_shape = frame_shape

self._fps = fps
self._fourcc = fourcc
self._stream = None

def _create_stream(self, fourcc, fps) -> None:
"""Create stream in file with given fourcc and fps, works in-place."""
stream = self._file.add_stream(fourcc, rate=int(fps))
stream.time_base = Fraction(1, 1000 * 1000) # Microseconds
def _create_stream(self, fourcc: str, fps: float) -> None:
"""
Create stream in file with given fourcc and fps, works in-place.
Args:
fourcc: Stream codec.
fps: Frames per second of the stream.
"""
self._stream = self._file.add_stream(fourcc, rate=int(fps))
self._stream.time_base = Fraction(1, 1000 * 1000) # Microseconds

# We need to set pixel format for MJEPG, for H264/H265 it's yuv420p by default
if fourcc == 'mjpeg':
stream.pix_fmt = 'yuvj420p'
self._stream.pix_fmt = 'yuvj420p'

if self.frame_shape is not None:
self._stream.width = self.frame_shape[0]
self._stream.height = self.frame_shape[1]

def create_file_for_buffer(self, subfolder: str, buf_name: str): # independent of type of frames
def create_file_for_buffer(self, subfolder: str, buf_name: str) -> None: # independent of type of frames
self.create_file(subfolder)

def create_file(self, subfolder: str):
def create_file(self, subfolder: str) -> None:
"""
Create file for writing frames.
Args:
subfolder: Subfolder relative to the main folder where the file will be created.
"""
path_to_file = create_writer_dir(self.path / subfolder, self.name, 'mp4')
self._create_file(path_to_file)

def _create_file(self, path_to_file: str):
def _create_file(self, path_to_file: str) -> None:
"""
Create av container for writing frames.
Args:
path_to_file: Path to the file.
"""
global av
import av as av
self._file = av.open(path_to_file, 'w')
import av
self._file = av.open(str(Path(path_to_file).with_suffix('.h264')), 'w')
self._create_stream(self._fourcc, self._fps)

def write(self, frame: dai.ImgFrame) -> None:
"""
Write packet bytes to h264 file.
Args:
frame: ImgFrame from depthai pipeline.
"""
if self._file is None:
self.create_file(subfolder='')

packet = av.Packet(frame.getData()) # Create new packet with byte array
frame_data = frame.getData()

if self.start_ts is None and not is_keyframe(frame_data):
return

packet = av.Packet(frame_data) # Create new packet with byte array

# Set frame timestamp
if self.start_ts is None:
Expand All @@ -51,9 +131,44 @@ def write(self, frame: dai.ImgFrame) -> None:
ts = int((frame.getTimestampDevice() - self.start_ts).total_seconds() * 1e6) # To microsec
packet.dts = ts
packet.pts = ts

self._file.mux_one(packet) # Mux the Packet into container

def close(self):
def close(self) -> None:
"""
Close the file and remux it to mp4.
"""
if self._file is not None:
p = self._stream.encode(None)
self._file.mux(p)
self._file.close()

# Remux the stream to finalize the output file
self.remux_video(str(self._file.name))

def remux_video(self, input_file: Union[Path, str]) -> None:
"""
Remuxes h264 file to mp4.
Args:
input_file: path to h264 file.
"""

mp4_file = str(Path(input_file).with_suffix('.mp4'))

with av.open(mp4_file, "w", format="mp4") as output_container, \
av.open(input_file, "r", format="h264") as input_container:
input_stream = input_container.streams[0]
output_stream = output_container.add_stream(template=input_stream, rate=self._fps)

if self.frame_shape:
output_stream.width = self.frame_shape[0]
output_stream.height = self.frame_shape[1]

frame_time = (1 / self._fps) * input_stream.time_base.denominator
for i, packet in enumerate(input_container.demux(input_stream)):
packet.dts = i * frame_time
packet.pts = i * frame_time
packet.stream = output_stream
output_container.mux_one(packet)

os.remove(input_file)

0 comments on commit f0684a0

Please sign in to comment.