diff --git a/depthai_sdk/sdk_tests/components/nn/test_nn_component.py b/depthai_sdk/sdk_tests/components/nn/test_nn_component.py index 5e95f5556..e5156d092 100644 --- a/depthai_sdk/sdk_tests/components/nn/test_nn_component.py +++ b/depthai_sdk/sdk_tests/components/nn/test_nn_component.py @@ -91,3 +91,15 @@ def callback(packet): if not oak_camera.poll(): raise RuntimeError('Polling failed') time.sleep(0.1) + + +def test_encoded_output(): + with OakCamera() as oak_camera: + camera = oak_camera.create_camera('color', '1080p', encode='h264') + + oak_camera.callback(camera.out.encoded, lambda x: print(x)) + oak_camera.start(blocking=False) + + for i in range(10): + oak_camera.poll() + time.sleep(0.1) diff --git a/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py b/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py index 2b7bf91c3..f0ef137de 100644 --- a/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py +++ b/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py @@ -1,8 +1,9 @@ import time +import depthai as dai import pytest + from depthai_sdk.oak_camera import OakCamera -import depthai as dai def test_stereo_output(): @@ -10,10 +11,11 @@ def test_stereo_output(): if dai.CameraBoardSocket.LEFT not in oak_camera.sensors: pytest.skip('Looks like camera does not have mono pair, skipping...') else: - stereo = oak_camera.create_stereo('400p') + stereo = oak_camera.create_stereo('800p', encode='h264') oak_camera.callback([stereo.out.depth, stereo.out.disparity, - stereo.out.rectified_left, stereo.out.rectified_right], callback=lambda x: None) + stereo.out.rectified_left, stereo.out.rectified_right, + stereo.out.encoded], callback=lambda x: None) oak_camera.start(blocking=False) for i in range(10): diff --git a/depthai_sdk/sdk_tests/test_examples.py b/depthai_sdk/sdk_tests/test_examples.py index 9bb6946db..efca9213c 100644 --- a/depthai_sdk/sdk_tests/test_examples.py +++ b/depthai_sdk/sdk_tests/test_examples.py @@ -1,27 +1,43 @@ +import os import subprocess import sys import time from pathlib import Path import cv2 +from pyvirtualdisplay import Display -EXAMPLES_DIR = Path(__file__).parent.parent.parent / "examples" +EXAMPLES_DIR = Path(__file__).parents[1] / 'examples' +os.environ['DISPLAY'] = '' # Hide the display + +# Create a temporary directory for the tests +Path('/tmp/depthai_sdk_tests').mkdir(exist_ok=True) +os.chdir('/tmp/depthai_sdk_tests') def test_examples(): python_executable = Path(sys.executable) - for example in EXAMPLES_DIR.rglob("**/*.py"): + print(list(EXAMPLES_DIR.rglob("**/*.py"))) + print(EXAMPLES_DIR.absolute()) + for example in [list(EXAMPLES_DIR.rglob("**/*.py"))[0]]: print(f"Running example: {example.name}") - - result = subprocess.Popen(f"{python_executable} {example}", stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env={"DISPLAY": ""}, shell=True) - - time.sleep(5) - result.kill() - time.sleep(5) - print('Stderr: ', result.stderr.read().decode()) - - # if result.returncode and result.returncode != 0: - # assert False, f"{example} raised an exception: {result.stderr}" - - cv2.destroyAllWindows() + print(f'{EXAMPLES_DIR.parent}:{EXAMPLES_DIR.parent}/depthai_sdk') + with Display(visible=False, size=(800, 600)): + result = subprocess.Popen(f"{python_executable} {example}", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env={ + 'DISPLAY': '', + 'PYTHONPATH': f'{os.environ["PYTHONPATH"]}:{EXAMPLES_DIR.parent}' + }, + shell=True) + + time.sleep(5) + result.kill() + time.sleep(5) + print('Stderr: ', result.stderr.read().decode()) + + # if result.returncode and result.returncode != 0: + # assert False, f"{example} raised an exception: {result.stderr}" + + cv2.destroyAllWindows() diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py index d6f2b7330..0582736bd 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py +++ b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py @@ -50,7 +50,7 @@ def update(self, path: Path, device: dai.Device, xouts: List['XoutFrames']): else: try: from .video_writers.av_writer import AvWriter - self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps) + self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps, xout._frame_shape) except Exception as e: # TODO here can be other errors, not only import error logging.warning(f'Exception while creating AvWriter: {e}.' diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py index caec89b19..3b0838d5d 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py +++ b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py @@ -1,48 +1,128 @@ -from datetime import timedelta +import os from fractions import Fraction from pathlib import Path +from typing import Tuple, Union import depthai as dai +import numpy as np -from depthai_sdk.recorders.video_writers import BaseWriter from depthai_sdk.recorders.video_writers.utils import create_writer_dir +from .base_writer import BaseWriter + +START_CODE_PREFIX = b"\x00\x00\x01" +KEYFRAME_NAL_TYPE = 5 +NAL_TYPE_BITS = 5 # nal unit type is encoded in lower 5 bits + + +def is_keyframe(encoded_frame: np.array) -> bool: + """ + Check if encoded frame is a keyframe. + Args: + encoded_frame: Encoded frame. + + Returns: + True if encoded frame is a keyframe, False otherwise. + """ + byte_stream = bytearray(encoded_frame) + size = len(byte_stream) + + pos = 0 + while pos < size: + retpos = byte_stream.find(START_CODE_PREFIX, pos) + if retpos == -1: + return False + + # Skip start code + pos = retpos + 3 + + # Extract the first 5 bits + type_ = byte_stream[pos] >> 3 + + if type_ == KEYFRAME_NAL_TYPE: + return True + + return False class AvWriter(BaseWriter): - def __init__(self, path: Path, name: str, fourcc: str, fps: float): + def __init__(self, path: Path, name: str, fourcc: str, fps: float, frame_shape: Tuple[int, int]): + """ + Args: + path: Path to the folder where the file will be created. + name: Name of the file without extension. + fourcc: Stream codec. + fps: Frames per second of the stream. + frame_shape: Width and height of the frames. + """ super().__init__(path, name) self.start_ts = None + self.frame_shape = frame_shape + self._fps = fps self._fourcc = fourcc + self._stream = None - def _create_stream(self, fourcc, fps) -> None: - """Create stream in file with given fourcc and fps, works in-place.""" - stream = self._file.add_stream(fourcc, rate=int(fps)) - stream.time_base = Fraction(1, 1000 * 1000) # Microseconds + def _create_stream(self, fourcc: str, fps: float) -> None: + """ + Create stream in file with given fourcc and fps, works in-place. + + Args: + fourcc: Stream codec. + fps: Frames per second of the stream. + """ + self._stream = self._file.add_stream(fourcc, rate=int(fps)) + self._stream.time_base = Fraction(1, 1000 * 1000) # Microseconds # We need to set pixel format for MJEPG, for H264/H265 it's yuv420p by default if fourcc == 'mjpeg': - stream.pix_fmt = 'yuvj420p' + self._stream.pix_fmt = 'yuvj420p' + + if self.frame_shape is not None: + self._stream.width = self.frame_shape[0] + self._stream.height = self.frame_shape[1] - def create_file_for_buffer(self, subfolder: str, buf_name: str): # independent of type of frames + def create_file_for_buffer(self, subfolder: str, buf_name: str) -> None: # independent of type of frames self.create_file(subfolder) - def create_file(self, subfolder: str): + def create_file(self, subfolder: str) -> None: + """ + Create file for writing frames. + + Args: + subfolder: Subfolder relative to the main folder where the file will be created. + """ path_to_file = create_writer_dir(self.path / subfolder, self.name, 'mp4') self._create_file(path_to_file) - def _create_file(self, path_to_file: str): + def _create_file(self, path_to_file: str) -> None: + """ + Create av container for writing frames. + + Args: + path_to_file: Path to the file. + """ global av - import av as av - self._file = av.open(path_to_file, 'w') + import av + self._file = av.open(str(Path(path_to_file).with_suffix('.h264')), 'w') self._create_stream(self._fourcc, self._fps) def write(self, frame: dai.ImgFrame) -> None: + """ + Write packet bytes to h264 file. + + Args: + frame: ImgFrame from depthai pipeline. + """ if self._file is None: self.create_file(subfolder='') - packet = av.Packet(frame.getData()) # Create new packet with byte array + frame_data = frame.getData() + + if self.start_ts is None and not is_keyframe(frame_data): + return + + packet = av.Packet(frame_data) # Create new packet with byte array # Set frame timestamp if self.start_ts is None: @@ -51,9 +131,44 @@ def write(self, frame: dai.ImgFrame) -> None: ts = int((frame.getTimestampDevice() - self.start_ts).total_seconds() * 1e6) # To microsec packet.dts = ts packet.pts = ts - self._file.mux_one(packet) # Mux the Packet into container - def close(self): + def close(self) -> None: + """ + Close the file and remux it to mp4. + """ if self._file is not None: + p = self._stream.encode(None) + self._file.mux(p) self._file.close() + + # Remux the stream to finalize the output file + self.remux_video(str(self._file.name)) + + def remux_video(self, input_file: Union[Path, str]) -> None: + """ + Remuxes h264 file to mp4. + + Args: + input_file: path to h264 file. + """ + + mp4_file = str(Path(input_file).with_suffix('.mp4')) + + with av.open(mp4_file, "w", format="mp4") as output_container, \ + av.open(input_file, "r", format="h264") as input_container: + input_stream = input_container.streams[0] + output_stream = output_container.add_stream(template=input_stream, rate=self._fps) + + if self.frame_shape: + output_stream.width = self.frame_shape[0] + output_stream.height = self.frame_shape[1] + + frame_time = (1 / self._fps) * input_stream.time_base.denominator + for i, packet in enumerate(input_container.demux(input_stream)): + packet.dts = i * frame_time + packet.pts = i * frame_time + packet.stream = output_stream + output_container.mux_one(packet) + + os.remove(input_file)