luxonis · borbrudar · Jul 11, 2024 · Jul 12, 2024 · Jul 12, 2024 · Jul 15, 2024
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,8 @@
+[submodule "depthai-core"]
+	path = depthai-core
+	url = https://github.com/luxonis/depthai-core.git
+	branch = v3_develop
+[submodule "gen3-gaze-estimation-cpp/depthai-core"]
+	path = gen3-gaze-estimation-cpp/depthai-core
+	url = https://github.com/luxonis/depthai-core.git
+	branch = v3_develop
diff --git a/depthai-core b/depthai-core
diff --git a/gen2-gaze-estimation/MultiMsgSync.py b/gen2-gaze-estimation/MultiMsgSync.py
@@ -39,8 +39,8 @@ def get_msgs(self):
         seq_remove = [] # Arr of sequence numbers to get deleted
 
         for seq, msgs in self.msgs.items():
+            print(seq)
             seq_remove.append(seq) # Will get removed from dict if we find synced msgs pair
-
             # Check if we have both detections and color frame with this sequence number
             if "color" in msgs and "len" in msgs:
 

diff --git a/gen2-gaze-estimation/face-detection-retail-0004.blob b/gen2-gaze-estimation/face-detection-retail-0004.blob
diff --git a/gen2-gaze-estimation/main.py b/gen2-gaze-estimation/main.py
@@ -40,11 +40,13 @@ def create_output(name: str, output: dai.Node.Output):
 print("Creating Face Detection Neural Network...")
 face_det_nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
 face_det_nn.setConfidenceThreshold(0.5)
-face_det_nn.setBlobPath(blobconverter.from_zoo(
-    name="face-detection-retail-0004",
-    shaves=6,
-    version=openvino_version
-))
+#face_det_nn.setBlobPath(blobconverter.from_zoo(
+#    name="face-detection-retail-0004",
+#    shaves=6,
+#    version=openvino_version
+#))
+face_det_nn.setBlobPath("face-detection-retail-0004.blob")
+
 # Link Face ImageManip -> Face detection NN node
 face_det_manip.out.link(face_det_nn.input)
 
@@ -148,7 +150,6 @@ def create_output(name: str, output: dai.Node.Output):
 script.inputs['none'].setQueueSize(1)
 
 create_output('gaze', gaze_nn.out)
-
 #==================================================
 
 with dai.Device(pipeline) as device:
@@ -170,6 +171,7 @@ def create_output(name: str, output: dai.Node.Output):
 
         msgs = sync.get_msgs()
         if msgs is not None:
+            print("adasd")
             frame = msgs["color"].getCvFrame()
             dets = msgs["detection"].detections
             for i, detection in enumerate(dets):
@@ -195,4 +197,4 @@ def create_output(name: str, output: dai.Node.Output):
             cv2.imshow("Lasers", frame)
 
         if cv2.waitKey(1) == ord('q'):
-            break
+            break
diff --git a/gen3-gaze-estimation-cpp/.gitignore b/gen3-gaze-estimation-cpp/.gitignore
@@ -0,0 +1,2 @@
+.vscode/
+build/
diff --git a/gen3-gaze-estimation-cpp/.gitmodules b/gen3-gaze-estimation-cpp/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "depthai-core"]
+	path = depthai-core
+	url = https://github.com/luxonis/depthai-core.git
+	branch = v3_develop
diff --git a/gen3-gaze-estimation-cpp/CMakeLists.txt b/gen3-gaze-estimation-cpp/CMakeLists.txt
@@ -0,0 +1,55 @@
+cmake_minimum_required(VERSION 3.4)
+
+# Add depthai-core dependency
+add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/depthai-core EXCLUDE_FROM_ALL)
+
+# Create a project with name 'gen3'
+set(TARGET_NAME gen3)
+project(${TARGET_NAME})
+
+# Dependencies (optional, only used for example)
+find_package(OpenCV REQUIRED)
+
+# Add source files
+add_executable("${TARGET_NAME}"
+    src/main.cpp
+)
+
+# Link with libraries
+target_link_libraries(${TARGET_NAME}
+    PUBLIC
+        depthai::core
+        ${OpenCV_LIBS} # optional, used for example
+)
+
+# Copy files to /build 
+file(COPY script.py DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
+file(COPY face-detection-retail-0004.blob DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
+file(COPY gaze-estimation-adas-0002.blob DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
+file(COPY head-pose-estimation-adas-0001.blob DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
+file(COPY landmarks-regression-retail-0009.blob DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
+
+
+# Set some errors
+if(NOT MSVC)
+    target_compile_options(${TARGET_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Werror=return-type>)
+endif()
+
+# Set compiler features (c++14)
+set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD 17)
+
+
+# Windows - Add runtime dependencies
+if(WIN32)
+    if(CMAKE_VERSION VERSION_LESS "3.21")
+        message(WARNING "CMake version less than 3.21 - automatic DLL handling not available. Make sure to copy required DLLs to the same folder as .exe")
+    else()
+        # TARGET_RUNTIME_DLLS generator expression available since CMake 3.21
+        set(depthai_dll_libraries "$<TARGET_RUNTIME_DLLS:${TARGET_NAME}>")
+        # Copy the required dlls
+        add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND
+            ${CMAKE_COMMAND} -E copy_if_different ${depthai_dll_libraries} $<TARGET_FILE_DIR:${TARGET_NAME}>
+            COMMAND_EXPAND_LISTS
+        )
+    endif()
+endif()
diff --git a/gen3-gaze-estimation-cpp/depthai-core b/gen3-gaze-estimation-cpp/depthai-core
diff --git a/gen3-gaze-estimation-cpp/face-detection-retail-0004.blob b/gen3-gaze-estimation-cpp/face-detection-retail-0004.blob
diff --git a/gen3-gaze-estimation-cpp/gaze-estimation-adas-0002.blob b/gen3-gaze-estimation-cpp/gaze-estimation-adas-0002.blob
diff --git a/gen3-gaze-estimation-cpp/head-pose-estimation-adas-0001.blob b/gen3-gaze-estimation-cpp/head-pose-estimation-adas-0001.blob
diff --git a/gen3-gaze-estimation-cpp/landmarks-regression-retail-0009.blob b/gen3-gaze-estimation-cpp/landmarks-regression-retail-0009.blob
diff --git a/gen3-gaze-estimation-cpp/script.py b/gen3-gaze-estimation-cpp/script.py
@@ -0,0 +1,152 @@
+import time
+sync = {} # Dict of messages
+
+def find_in_dict(target_seq, name):
+    if str(target_seq) in sync:
+        return sync[str(target_seq)][name]
+
+def add_to_dict(det, seq, name):
+    sync[str(seq)][name] = det
+
+def correct_bb(bb):
+    if bb.xmin < 0: bb.xmin = 0.001
+    if bb.ymin < 0: bb.ymin = 0.001
+    if bb.xmax > 1: bb.xmax = 0.999
+    if bb.ymax > 1: bb.ymax = 0.999
+
+def check_gaze_est(seq):
+    dict = sync[str(seq)]
+
+    if "left" in dict and "right" in dict and "angles" in dict:
+        # node.warn("GOT ALL 3")
+        # Send to gaze estimation NN
+        node.io['to_gaze_left'].send(dict['left'])
+        node.io['to_gaze_right'].send(dict['right'])
+        head_pose = NNData(6)
+        head_pose.setLayer("head_pose_angles", dict['angles'])
+        node.io['to_gaze_head'].send(head_pose)
+
+        # Clear previous results
+        for i, sq in enumerate(sync):
+            del sync[str(seq)]
+            if str(seq) == str(sq):
+                return
+
+PAD = 0.15
+PAD2x = PAD * 2
+def get_eye_coords(x, y, det):
+    xdelta = det.xmax - det.xmin
+    ydelta = det.ymax - det.ymin
+
+    xmin = x - PAD
+    xmax = xmin + PAD2x
+    ymin = y - PAD
+    ymax = ymin + PAD2x
+
+    xmin2 = det.xmin + xdelta * xmin
+    xmax2 = det.xmin + xdelta * xmax
+    ymin2 = det.ymin + ydelta * ymin
+    ymax2 = det.ymin + ydelta * ymax
+    ret = (xmin2, ymin2, xmax2, ymax2)
+    # node.warn(f"Eye: {x}/{y}, Crop eyes: {ret}, det {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}")
+    return ret
+
+while True:
+    time.sleep(0.001)
+
+    preview = node.io['preview'].tryGet()
+    if preview is not None:
+        sync[str(preview.getSequenceNum())] = {
+            "frame": preview
+        }
+        # node.warn(f"New frame, {len(sync)}")
+
+    face_dets = node.io['face_det_in'].tryGet()
+    if face_dets is not None:
+        passthrough = node.io['face_pass'].get()
+        seq = passthrough.getSequenceNum()
+
+        # No detections, carry on
+        if len(face_dets.detections) == 0:
+            del sync[str(seq)]
+            continue
+
+        #node.warn(f"New detection {seq}")
+        if len(sync) == 0: continue
+        img = find_in_dict(seq, "frame")
+        if img is None: continue
+
+        add_to_dict(face_dets.detections[0], seq, "detections")
+
+        for det in face_dets.detections:
+            correct_bb(det)
+
+            # To head post estimation model
+            cfg1 = ImageManipConfig()
+            cfg1.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
+            cfg1.setResize(60, 60)
+            cfg1.setKeepAspectRatio(False)
+            node.io['headpose_cfg'].send(cfg1)
+            node.io['headpose_img'].send(img)
+
+            # To face landmark detection model
+            cfg2 = ImageManipConfig()
+            cfg2.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
+            cfg2.setResize(48, 48)
+            cfg2.setKeepAspectRatio(False)
+            node.io['landmark_cfg'].send(cfg2)
+            node.io['landmark_img'].send(img)
+            break # Only 1 face at the time currently supported
+
+    headpose = node.io['headpose_in'].tryGet()
+    if headpose is not None:
+        passthrough = node.io['headpose_pass'].get()
+        seq = passthrough.getSequenceNum()
+        # Face rotation in degrees
+        y = headpose.getLayerFp16('angle_y_fc')[0]
+        p = headpose.getLayerFp16('angle_p_fc')[0]
+        r = headpose.getLayerFp16('angle_r_fc')[0]
+        angles = [y,p,r]
+        # node.warn(f"angles {angles}")
+        add_to_dict(angles, seq, "angles")
+        check_gaze_est(seq)
+
+    landmark_in = node.io['landmark_in'].tryGet()
+    if landmark_in is not None:
+        passthrough = node.io['landmark_pass'].get()
+        seq = passthrough.getSequenceNum()
+
+        img = find_in_dict(seq, "frame")
+        det = find_in_dict(seq, "detections")
+        if img is None or det is None: continue
+
+        landmarks = landmark_in.getFirstLayerFp16()
+
+        # We need to crop left and right eye out of the face frame
+        left_cfg = ImageManipConfig()
+        left_cfg.setCropRect(*get_eye_coords(landmarks[0], landmarks[1], det))
+        left_cfg.setResize(60, 60)
+        left_cfg.setKeepAspectRatio(False)
+        node.io['left_manip_cfg'].send(left_cfg)
+        node.io['left_manip_img'].send(img)
+
+        right_cfg = ImageManipConfig()
+        right_cfg.setCropRect(*get_eye_coords(landmarks[2], landmarks[3], det))
+        right_cfg.setResize(60, 60)
+        right_cfg.setKeepAspectRatio(False)
+        node.io['right_manip_cfg'].send(right_cfg)
+        node.io['right_manip_img'].send(img)
+
+    left_eye = node.io['left_eye_in'].tryGet()
+    if left_eye is not None:
+        # node.warn("LEFT EYE GOT")
+        seq = left_eye.getSequenceNum()
+        add_to_dict(left_eye, seq, "left")
+        check_gaze_est(seq)
+
+    right_eye = node.io['right_eye_in'].tryGet()
+    if right_eye is not None:
+        # node.warn("RIGHT EYE GOT")
+        seq = right_eye.getSequenceNum()
+        add_to_dict(right_eye, seq, "right")
+        check_gaze_est(seq)
diff --git a/gen3-gaze-estimation-cpp/src/MultiMsgSync.cpp b/gen3-gaze-estimation-cpp/src/MultiMsgSync.cpp
@@ -0,0 +1,57 @@
+// Color frames (ImgFrame), object detection (ImgDetections) and age/gender gaze (NNData)
+// messages arrive to the host all with some additional delay.
+// For each ImgFrame there's one ImgDetections msg, which has multiple detections, and for each
+// detection there's a NNData msg which contains age/gender gaze results.//
+// How it works:
+// Every ImgFrame, ImgDetections and NNData message has it's own sequence number, by which we can sync messages.
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <depthai/depthai.hpp>
+
+class TwoStageHostSeqSync{
+    public:
+    TwoStageHostSeqSync(){
+        msgs.clear();
+    }
+    // name: color,detection or gaze
+    void add_msg(std::shared_ptr<dai::MessageQueue> msg, std::string name){
+        int64_t f = -1;
+        if(name == "gaze" || name == "landmarks")
+            f = msg->get<dai::NNData>()->getSequenceNum();
+        else if(name == "color")
+            f = msg->get<dai::ImgFrame>()->getSequenceNum();
+        else f = msg->get<dai::ImgDetections>()->getSequenceNum();
+        auto seq = std::to_string(f); 
+        msgs[seq][name].push_back(msg);
+    }   
+
+    std::pair<std::map<std::string,std::vector<std::shared_ptr<dai::MessageQueue>>>,int> get_msgs(){
+        //std::cout<<"msgs size: "<<msgs.size()<<"\n";
+        std::vector<std::string> seq_remove;
+
+        for(auto it = msgs.begin(); it != msgs.end();it++){
+            auto seq = it->first;
+            auto r_msgs = it->second;
+
+            seq_remove.push_back(seq); // Will get removed from dict if we find synced msgs pairs
+            // Check if we have both detections and color frame with this sequence number
+            if(r_msgs.count("color") > 0 && r_msgs.count("detection") > 0){
+                // Check if all detected objects (faces) have finished gaze (age/gender) inference
+                if(0 < r_msgs["gaze"].size()){
+                    // We have synced msgs, remove previous msgs (memory cleaning)
+                    for(auto rm : seq_remove){
+                        msgs[rm].clear();
+                    }
+                    return {r_msgs,0}; // Returned synced msgs
+                }
+            }
+        }
+        return {msgs["-1"],-1}; // No synced msgs
+    }
+
+    private:
+        std::map<std::string,std::map<std::string,std::vector<std::shared_ptr<dai::MessageQueue>>>> msgs;
+};
diff --git a/gen3-gaze-estimation-cpp/src/bbox.cpp b/gen3-gaze-estimation-cpp/src/bbox.cpp
@@ -0,0 +1,53 @@
+#include <array>
+#include <vector>
+#include <depthai/depthai.hpp>
+
+class Point{
+    //Used within the BoundingBox class when dealing with points.
+    public:
+    Point(float x,float y) : x(x),y(y){}    
+    //Denormalize the point to pixel coordinates (0..frame width, 0..frame height)
+    std::array<int,2> denormalize(std::vector<int> frame_shape){
+        return {(int)(x * (float)frame_shape[1]), int(y * (float)frame_shape[0])};
+    }
+
+    private:
+    float x,y;
+};
+
+
+class BoundingBox{
+    //This class helps with bounding box calculations. It can be used to calculate relative bounding boxes,
+    //map points from relative to absolute coordinates and vice versa, crop frames, etc.
+    public:
+    BoundingBox(dai::ImgDetection bbox){
+        xmin = bbox.xmin,ymin = bbox.ymin,xmax = bbox.xmax,ymax = bbox.ymax;
+        width = xmax-xmin,height=ymax-ymin;
+    }
+
+
+    std::array<int,4> denormalize(std::vector<int> frame_shape){
+        /*
+        Denormalize the bounding box to pixel coordinates (0..frame width, 0..frame height).
+        Useful when you want to draw the bounding box on the frame.
+
+        */
+        return {
+            (int)(frame_shape[1] * xmin),(int)(frame_shape[0] * ymin),
+            (int)(frame_shape[1] * xmax),(int)(frame_shape[0] * ymax)
+        };
+    }
+
+    Point map_point(float x,float y){
+        /*
+        Useful when you have a point inside the bounding box, and you want to map it to the frame.
+        Example: You run face detection, create BoundingBox from the result, and also run
+        facial landmarks detection on the cropped frame of the face. The landmarks are relative
+        to the face bounding box, but you want to draw them on the original frame.
+        */
+        float mapped_x = xmin + width * x, mapped_y = ymin + height * y;
+        return Point(mapped_x, mapped_y);
+    }
+    private:
+    float xmin,ymin,xmax,ymax,width,height;
+};