merge_ensemble_results.py

#!/usr/bin/env python3
# Copyright © Niantic, Inc. 2022.

import logging
from argparse import ArgumentParser
from collections import defaultdict
from dataclasses import dataclass, field
from pathlib import Path
from typing import List

_logger = logging.getLogger(__name__)


@dataclass
class FrameResult:
    inlier_count: int = 0
    quaternion: List[float] = field(default_factory=lambda: [1, 0, 0, 0])
    translation: List[float] = field(default_factory=lambda: [0, 0, 0])
    r_err: float = 0
    t_err: float = 0


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    parser = ArgumentParser(
        description="Merge results created by multiple nets trained on clustered datasets, "
        "keeping the best pose for each image (in terms of inlier count)."
    )
    parser.add_argument(
        "poses_path",
        type=Path,
        help="Path to a folder containing the estimated poses for each network.",
    )
    parser.add_argument(
        "out_file",
        type=Path,
        help="Path to the output file containing the best pose for each image.",
    )
    parser.add_argument(
        "--poses_suffix",
        type=str,
        default=".txt",
        help="Suffix to select a subset of pose files.",
    )

    args = parser.parse_args()

    poses_path: Path = args.poses_path
    out_file: Path = args.out_file

    pose_files = sorted(poses_path.glob(f"poses_*{args.poses_suffix}"))
    _logger.info(f"Found {len(pose_files)} pose files.")

    frame_poses = defaultdict(FrameResult)

    for in_file in pose_files:
        _logger.info(f"Parsing results from: {in_file}")
        with in_file.open("r") as f:
            for line in f.readlines():
                current_result = FrameResult()
                (
                    img,
                    current_result.quaternion[0],
                    current_result.quaternion[1],
                    current_result.quaternion[2],
                    current_result.quaternion[3],
                    current_result.translation[0],
                    current_result.translation[1],
                    current_result.translation[2],
                    current_result.r_err,
                    current_result.t_err,
                    current_result.inlier_count,
                ) = line.split()

                # Convert to the appropriate datatypes.
                current_result.inlier_count = int(current_result.inlier_count)
                current_result.quaternion = [
                    float(x) for x in current_result.quaternion
                ]
                current_result.translation = [
                    float(x) for x in current_result.translation
                ]
                current_result.r_err = float(current_result.r_err)
                current_result.t_err = float(current_result.t_err)

                # Update global dict if needed.
                if frame_poses[img].inlier_count < current_result.inlier_count:
                    frame_poses[img] = current_result

    _logger.info(f"Found results for {len(frame_poses)} query frames.")

    # Save the output.
    with out_file.open("w") as f:
        for img_name in sorted(frame_poses.keys()):
            frame_result = frame_poses[img_name]
            f.write(
                f"{img_name} "
                f"{' '.join(str(x) for x in frame_result.quaternion)} "
                f"{' '.join(str(x) for x in frame_result.translation)} "
                f"{frame_result.r_err} {frame_result.t_err} {frame_result.inlier_count}\n"
            )
    _logger.info(f"Saved merged poses to: {out_file}")