Merge pull request #1062 from luxonis/fix/video_recording

Improve mp4 video recording
luxonis · Jun 26, 2023 · f0684a0 · f0684a0
2 parents 207299f + 1a9b6ea
commit f0684a0
Show file tree

Hide file tree

Showing 5 changed files with 180 additions and 35 deletions.
diff --git a/depthai_sdk/sdk_tests/components/nn/test_nn_component.py b/depthai_sdk/sdk_tests/components/nn/test_nn_component.py
@@ -91,3 +91,15 @@ def callback(packet):
             if not oak_camera.poll():
                 raise RuntimeError('Polling failed')
             time.sleep(0.1)
+
+
+def test_encoded_output():
+    with OakCamera() as oak_camera:
+        camera = oak_camera.create_camera('color', '1080p', encode='h264')
+
+        oak_camera.callback(camera.out.encoded, lambda x: print(x))
+        oak_camera.start(blocking=False)
+
+        for i in range(10):
+            oak_camera.poll()
+            time.sleep(0.1)
diff --git a/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py b/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py
@@ -1,19 +1,21 @@
 import time
 
+import depthai as dai
 import pytest
+
 from depthai_sdk.oak_camera import OakCamera
-import depthai as dai
 
 
 def test_stereo_output():
     with OakCamera() as oak_camera:
         if dai.CameraBoardSocket.LEFT not in oak_camera.sensors:
             pytest.skip('Looks like camera does not have mono pair, skipping...')
         else:
-            stereo = oak_camera.create_stereo('400p')
+            stereo = oak_camera.create_stereo('800p', encode='h264')
 
             oak_camera.callback([stereo.out.depth, stereo.out.disparity,
-                                 stereo.out.rectified_left, stereo.out.rectified_right], callback=lambda x: None)
+                                 stereo.out.rectified_left, stereo.out.rectified_right,
+                                 stereo.out.encoded], callback=lambda x: None)
             oak_camera.start(blocking=False)
 
             for i in range(10):

diff --git a/depthai_sdk/sdk_tests/test_examples.py b/depthai_sdk/sdk_tests/test_examples.py
@@ -1,27 +1,43 @@
+import os
 import subprocess
 import sys
 import time
 from pathlib import Path
 
 import cv2
+from pyvirtualdisplay import Display
 
-EXAMPLES_DIR = Path(__file__).parent.parent.parent / "examples"
+EXAMPLES_DIR = Path(__file__).parents[1] / 'examples'
+os.environ['DISPLAY'] = ''  # Hide the display
+
+# Create a temporary directory for the tests
+Path('/tmp/depthai_sdk_tests').mkdir(exist_ok=True)
+os.chdir('/tmp/depthai_sdk_tests')
 
 
 def test_examples():
     python_executable = Path(sys.executable)
-    for example in EXAMPLES_DIR.rglob("**/*.py"):
+    print(list(EXAMPLES_DIR.rglob("**/*.py")))
+    print(EXAMPLES_DIR.absolute())
+    for example in [list(EXAMPLES_DIR.rglob("**/*.py"))[0]]:
         print(f"Running example: {example.name}")
-
-        result = subprocess.Popen(f"{python_executable} {example}", stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                                  env={"DISPLAY": ""}, shell=True)
-
-        time.sleep(5)
-        result.kill()
-        time.sleep(5)
-        print('Stderr: ', result.stderr.read().decode())
-
-        # if result.returncode and result.returncode != 0:
-        #     assert False, f"{example} raised an exception: {result.stderr}"
-
-        cv2.destroyAllWindows()
+        print(f'{EXAMPLES_DIR.parent}:{EXAMPLES_DIR.parent}/depthai_sdk')
+        with Display(visible=False, size=(800, 600)):
+            result = subprocess.Popen(f"{python_executable} {example}",
+                                      stdout=subprocess.PIPE,
+                                      stderr=subprocess.PIPE,
+                                      env={
+                                          'DISPLAY': '',
+                                          'PYTHONPATH': f'{os.environ["PYTHONPATH"]}:{EXAMPLES_DIR.parent}'
+                                      },
+                                      shell=True)
+
+            time.sleep(5)
+            result.kill()
+            time.sleep(5)
+            print('Stderr: ', result.stderr.read().decode())
+
+            # if result.returncode and result.returncode != 0:
+            #     assert False, f"{example} raised an exception: {result.stderr}"
+
+            cv2.destroyAllWindows()
diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py
@@ -50,7 +50,7 @@ def update(self, path: Path, device: dai.Device, xouts: List['XoutFrames']):
             else:
                 try:
                     from .video_writers.av_writer import AvWriter
-                    self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps)
+                    self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps, xout._frame_shape)
                 except Exception as e:
                     # TODO here can be other errors, not only import error
                     logging.warning(f'Exception while creating AvWriter: {e}.'

diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py
@@ -1,48 +1,128 @@
-from datetime import timedelta
+import os
 from fractions import Fraction
 from pathlib import Path
+from typing import Tuple, Union
 
 import depthai as dai
+import numpy as np
 
-from depthai_sdk.recorders.video_writers import BaseWriter
 from depthai_sdk.recorders.video_writers.utils import create_writer_dir
+from .base_writer import BaseWriter
+
+START_CODE_PREFIX = b"\x00\x00\x01"
+KEYFRAME_NAL_TYPE = 5
+NAL_TYPE_BITS = 5  # nal unit type is encoded in lower 5 bits
+
+
+def is_keyframe(encoded_frame: np.array) -> bool:
+    """
+    Check if encoded frame is a keyframe.
+    Args:
+        encoded_frame: Encoded frame.
+
+    Returns:
+        True if encoded frame is a keyframe, False otherwise.
+    """
+    byte_stream = bytearray(encoded_frame)
+    size = len(byte_stream)
+
+    pos = 0
+    while pos < size:
+        retpos = byte_stream.find(START_CODE_PREFIX, pos)
+        if retpos == -1:
+            return False
+
+        # Skip start code
+        pos = retpos + 3
+
+        # Extract the first 5 bits
+        type_ = byte_stream[pos] >> 3
+
+        if type_ == KEYFRAME_NAL_TYPE:
+            return True
+
+    return False
 
 
 class AvWriter(BaseWriter):
-    def __init__(self, path: Path, name: str, fourcc: str, fps: float):
+    def __init__(self, path: Path, name: str, fourcc: str, fps: float, frame_shape: Tuple[int, int]):
+        """
+        Args:
+            path: Path to the folder where the file will be created.
+            name: Name of the file without extension.
+            fourcc: Stream codec.
+            fps: Frames per second of the stream.
+            frame_shape: Width and height of the frames.
+        """
         super().__init__(path, name)
 
         self.start_ts = None
+        self.frame_shape = frame_shape
+
         self._fps = fps
         self._fourcc = fourcc
+        self._stream = None
 
-    def _create_stream(self, fourcc, fps) -> None:
-        """Create stream in file with given fourcc and fps, works in-place."""
-        stream = self._file.add_stream(fourcc, rate=int(fps))
-        stream.time_base = Fraction(1, 1000 * 1000)  # Microseconds
+    def _create_stream(self, fourcc: str, fps: float) -> None:
+        """
+        Create stream in file with given fourcc and fps, works in-place.
+
+        Args:
+            fourcc: Stream codec.
+            fps: Frames per second of the stream.
+        """
+        self._stream = self._file.add_stream(fourcc, rate=int(fps))
+        self._stream.time_base = Fraction(1, 1000 * 1000)  # Microseconds
 
         # We need to set pixel format for MJEPG, for H264/H265 it's yuv420p by default
         if fourcc == 'mjpeg':
-            stream.pix_fmt = 'yuvj420p'
+            self._stream.pix_fmt = 'yuvj420p'
+
+        if self.frame_shape is not None:
+            self._stream.width = self.frame_shape[0]
+            self._stream.height = self.frame_shape[1]
 
-    def create_file_for_buffer(self, subfolder: str, buf_name: str):  # independent of type of frames
+    def create_file_for_buffer(self, subfolder: str, buf_name: str) -> None:  # independent of type of frames
         self.create_file(subfolder)
 
-    def create_file(self, subfolder: str):
+    def create_file(self, subfolder: str) -> None:
+        """
+        Create file for writing frames.
+
+        Args:
+            subfolder: Subfolder relative to the main folder where the file will be created.
+        """
         path_to_file = create_writer_dir(self.path / subfolder, self.name, 'mp4')
         self._create_file(path_to_file)
 
-    def _create_file(self, path_to_file: str):
+    def _create_file(self, path_to_file: str) -> None:
+        """
+        Create av container for writing frames.
+
+        Args:
+            path_to_file: Path to the file.
+        """
         global av
-        import av as av
-        self._file = av.open(path_to_file, 'w')
+        import av
+        self._file = av.open(str(Path(path_to_file).with_suffix('.h264')), 'w')
         self._create_stream(self._fourcc, self._fps)
 
     def write(self, frame: dai.ImgFrame) -> None:
+        """
+        Write packet bytes to h264 file.
+
+        Args:
+            frame: ImgFrame from depthai pipeline.
+        """
         if self._file is None:
             self.create_file(subfolder='')
 
-        packet = av.Packet(frame.getData())  # Create new packet with byte array
+        frame_data = frame.getData()
+
+        if self.start_ts is None and not is_keyframe(frame_data):
+            return
+
+        packet = av.Packet(frame_data)  # Create new packet with byte array
 
         # Set frame timestamp
         if self.start_ts is None:
@@ -51,9 +131,44 @@ def write(self, frame: dai.ImgFrame) -> None:
         ts = int((frame.getTimestampDevice() - self.start_ts).total_seconds() * 1e6)  # To microsec
         packet.dts = ts
         packet.pts = ts
-
         self._file.mux_one(packet)  # Mux the Packet into container
 
-    def close(self):
+    def close(self) -> None:
+        """
+        Close the file and remux it to mp4.
+        """
         if self._file is not None:
+            p = self._stream.encode(None)
+            self._file.mux(p)
             self._file.close()
+
+        # Remux the stream to finalize the output file
+        self.remux_video(str(self._file.name))
+
+    def remux_video(self, input_file: Union[Path, str]) -> None:
+        """
+        Remuxes h264 file to mp4.
+
+        Args:
+            input_file: path to h264 file.
+        """
+
+        mp4_file = str(Path(input_file).with_suffix('.mp4'))
+
+        with av.open(mp4_file, "w", format="mp4") as output_container, \
+                av.open(input_file, "r", format="h264") as input_container:
+            input_stream = input_container.streams[0]
+            output_stream = output_container.add_stream(template=input_stream, rate=self._fps)
+
+            if self.frame_shape:
+                output_stream.width = self.frame_shape[0]
+                output_stream.height = self.frame_shape[1]
+
+            frame_time = (1 / self._fps) * input_stream.time_base.denominator
+            for i, packet in enumerate(input_container.demux(input_stream)):
+                packet.dts = i * frame_time
+                packet.pts = i * frame_time
+                packet.stream = output_stream
+                output_container.mux_one(packet)
+
+        os.remove(input_file)