be2rlab · warmhammer · Jul 5, 2024 · Jul 5, 2024 · Jul 5, 2024 · Jul 5, 2024
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,6 @@ Readme_files/*
 concept-graphs/yolov8l-world.pt
 concept-graphs/outputs/*
 assets.zip
+.venv
+.ipynb_checkpoints
+__pycache__
diff --git a/concept-graphs/conceptgraph/dataset/dataconfigs/replica/replica.yaml b/concept-graphs/conceptgraph/dataset/dataconfigs/replica/replica.yaml
@@ -21,13 +21,4 @@ mapping:
   mapping_window_size: 5
   pixels: 1000
   iters_first: 1500
-  iters: 60
-camera_params:
-  image_height: 680
-  image_width: 1200
-  fx: 600.0
-  fy: 600.0
-  cx: 599.5
-  cy: 339.5
-  png_depth_scale: 6553.5 #for depth image in png format
-  crop_edge: 0
+  iters: 60
diff --git a/concept-graphs/conceptgraph/dataset/dataconfigs/replica/replica_cad.yaml b/concept-graphs/conceptgraph/dataset/dataconfigs/replica/replica_cad.yaml
@@ -0,0 +1,24 @@
+dataset_name: 'replica_cad'
+meshing:
+  eval_rec: True
+tracking:
+  vis_freq: 50
+  vis_inside_freq: 25
+  ignore_edge_W: 100
+  ignore_edge_H: 100
+  seperate_LR: False
+  const_speed_assumption: True
+  lr: 0.001
+  pixels: 200
+  iters: 10
+mapping:
+  every_frame: 5
+  vis_freq: 50
+  vis_inside_freq: 30
+  mesh_freq: 50
+  ckpt_freq: 500
+  keyframe_every: 50
+  mapping_window_size: 5
+  pixels: 1000
+  iters_first: 1500
+  iters: 60
diff --git a/concept-graphs/conceptgraph/dataset/dataconfigs/replica/replica_cad_semantic.yaml b/concept-graphs/conceptgraph/dataset/dataconfigs/replica/replica_cad_semantic.yaml
@@ -0,0 +1,24 @@
+dataset_name: 'replica_cad'
+meshing:
+  eval_rec: True
+tracking:
+  vis_freq: 50
+  vis_inside_freq: 25
+  ignore_edge_W: 100
+  ignore_edge_H: 100
+  seperate_LR: False
+  const_speed_assumption: True
+  lr: 0.001
+  pixels: 200
+  iters: 10
+mapping:
+  every_frame: 5
+  vis_freq: 50
+  vis_inside_freq: 30
+  mesh_freq: 50
+  ckpt_freq: 500
+  keyframe_every: 50
+  mapping_window_size: 5
+  pixels: 1000
+  iters_first: 1500
+  iters: 60
diff --git a/concept-graphs/conceptgraph/dataset/dataconfigs/replica/replica_semantic.yaml b/concept-graphs/conceptgraph/dataset/dataconfigs/replica/replica_semantic.yaml
@@ -0,0 +1,33 @@
+dataset_name: 'semantic_replica'
+meshing:
+  eval_rec: True
+tracking:
+  vis_freq: 50
+  vis_inside_freq: 25
+  ignore_edge_W: 100
+  ignore_edge_H: 100
+  seperate_LR: False
+  const_speed_assumption: True
+  lr: 0.001
+  pixels: 200
+  iters: 10
+mapping:
+  every_frame: 5
+  vis_freq: 50
+  vis_inside_freq: 30
+  mesh_freq: 50
+  ckpt_freq: 500
+  keyframe_every: 50
+  mapping_window_size: 5
+  pixels: 1000
+  iters_first: 1500
+  iters: 60
+camera_params:
+  image_height: 480
+  image_width: 640
+  fx: 320.0
+  fy: 320.0
+  cx: 319.5
+  cy: 239.5
+  png_depth_scale: 1000 # for depth image in png format
+  crop_edge: 0
diff --git a/concept-graphs/conceptgraph/dataset/datasets_common.py b/concept-graphs/conceptgraph/dataset/datasets_common.py
@@ -20,8 +20,10 @@
 import torch
 import torch.nn.functional as F
 import yaml
+from PIL import Image
 from natsort import natsorted
 from scipy.spatial.transform import Rotation as R
+from torchvision.transforms.functional import pil_to_tensor
 
 from gradslam.datasets import datautils
 from gradslam.geometry.geometryutils import relative_transformation
@@ -43,6 +45,7 @@ def as_intrinsics_matrix(intrinsics):
     K[1, 2] = intrinsics[3]
     return K
 
+
 def from_intrinsics_matrix(K: torch.Tensor) -> tuple[float, float, float, float]:
     '''
     Get fx, fy, cx, cy from the intrinsics matrix
@@ -481,6 +484,138 @@ def load_poses(self):
     def read_embedding_from_file(self, embedding_file_path):
         embedding = torch.load(embedding_file_path)
         return embedding.permute(0, 2, 3, 1)  # (1, H, W, embedding_dim)
+
+
+class SemanticReplicaDataset(GradSLAMDataset):
+    def __init__(
+        self,
+        config_dict,
+        basedir,
+        sequence,
+        stride: Optional[int] = None,
+        start: Optional[int] = 0,
+        end: Optional[int] = -1,
+        desired_height: Optional[int] = 480,
+        desired_width: Optional[int] = 640,
+        load_embeddings: Optional[bool] = False,
+        embedding_dir: Optional[str] = "semantic_class",
+        embedding_dim: Optional[int] = 512,
+        **kwargs,
+    ):
+        self.input_folder = os.path.join(basedir, sequence)
+        self.pose_path = os.path.join(self.input_folder, "Sequence_1/traj_w_c.txt")
+        super().__init__(
+            config_dict,
+            stride=stride,
+            start=start,
+            end=end,
+            desired_height=desired_height,
+            desired_width=desired_width,
+            load_embeddings=load_embeddings,
+            embedding_dir=embedding_dir,
+            embedding_dim=embedding_dim,
+            **kwargs,
+        )
+
+    def get_filepaths(self):
+        color_paths = natsorted(glob.glob(f"{self.input_folder}/Sequence_1/rgb/rgb*.png"))
+        depth_paths = natsorted(glob.glob(f"{self.input_folder}/Sequence_1/depth/depth*.png"))
+        embedding_paths = None
+        if self.load_embeddings:
+            embedding_paths = natsorted(
+                glob.glob(f"{self.input_folder}/Sequence_1/{self.embedding_dir}/semantic_class*.png")
+            )
+
+        return color_paths, depth_paths, embedding_paths
+
+    def load_poses(self):
+        poses = []
+        with open(self.pose_path, "r") as f:
+            lines = f.readlines()
+        for i in range(self.num_imgs):
+            line = lines[i]
+            # c2w = np.diag([1, -1, -1, 1]) @ np.array(list(map(float, line.split()))).reshape(4, 4)
+            c2w = np.array(list(map(float, line.split()))).reshape(4, 4)
+            # c2w[:3, 1] *= -1
+            # c2w[:3, 2] *= -1
+            c2w = torch.from_numpy(c2w).float()
+            poses.append(c2w)
+        return poses
+
+    def read_embedding_from_file(self, embedding_file_path):
+        with torch.no_grad():
+            semantic_image = Image.open(embedding_file_path)
+            semantic_tensor = pil_to_tensor(semantic_image)
+
+            embedding = F.one_hot(semantic_tensor.long(), num_classes=self.embedding_dim)
+
+        return embedding
+
+
+class ReplicaCADDataset(GradSLAMDataset):
+    def __init__(
+        self,
+        config_dict,
+        basedir,
+        sequence,
+        stride: Optional[int] = None,
+        start: Optional[int] = 0,
+        end: Optional[int] = -1,
+        desired_height: Optional[int] = 480,
+        desired_width: Optional[int] = 640,
+        load_embeddings: Optional[bool] = False,
+        embedding_dir: Optional[str] = "semantic_class",
+        embedding_dim: Optional[int] = 512,
+        **kwargs,
+    ):
+        self.input_folder = os.path.join(basedir, sequence)
+        self.pose_path = os.path.join(self.input_folder, "traj.txt")
+        super().__init__(
+            config_dict,
+            stride=stride,
+            start=start,
+            end=end,
+            desired_height=desired_height,
+            desired_width=desired_width,
+            load_embeddings=load_embeddings,
+            embedding_dir=embedding_dir,
+            embedding_dim=embedding_dim,
+            **kwargs,
+        )
+
+    def get_filepaths(self):
+        color_paths = natsorted(glob.glob(f"{self.input_folder}/results/frame*.jpg"))
+        depth_paths = natsorted(glob.glob(f"{self.input_folder}/results/depth*.png"))
+        embedding_paths = None
+        if self.load_embeddings:
+            embedding_paths = natsorted(
+                glob.glob(f"{self.input_folder}/results/semantic*.png")
+            )
+
+        return color_paths, depth_paths, embedding_paths
+
+    def load_poses(self):
+        poses = []
+        with open(self.pose_path, "r") as f:
+            lines = f.readlines()
+        for i in range(self.num_imgs):
+            line = lines[i]
+            # c2w = np.diag([1, -1, -1, 1]) @ np.array(list(map(float, line.split()))).reshape(4, 4)
+            c2w = np.array(list(map(float, line.split()))).reshape(4, 4)
+            # c2w[:3, 1] *= -1
+            # c2w[:3, 2] *= -1
+            c2w = torch.from_numpy(c2w).float()
+            poses.append(c2w)
+        return poses
+
+    def read_embedding_from_file(self, embedding_file_path):
+        with torch.no_grad():
+            semantic_image = Image.open(embedding_file_path).convert('I')
+            semantic_tensor = pil_to_tensor(semantic_image)
+
+            embedding = F.one_hot(semantic_tensor.long(), num_classes=self.embedding_dim)
+
+        return embedding
 
 
 class ScannetDataset(GradSLAMDataset):
@@ -607,6 +742,7 @@ def read_embedding_from_file(self, embedding_file_path):
             embedding = torch.load(embedding_file_path, map_location="cpu")
         return embedding.permute(0, 2, 3, 1)  # (1, H, W, embedding_dim)
 
+
 class AzureKinectDataset(GradSLAMDataset):
     def __init__(
         self,
@@ -1003,6 +1139,7 @@ def load_poses(self):
 
         return poses
 
+
 def load_dataset_config(path, default_path=None):
     """
     Loads config file.
@@ -1087,6 +1224,7 @@ def common_dataset_to_batch(dataset):
         embeddings = embeddings.float()
     return colors, depths, intrinsics, poses, embeddings
 
+
 @measure_time
 def get_dataset(dataconfig, basedir, sequence, **kwargs):
     config_dict = load_dataset_config(dataconfig)
@@ -1096,6 +1234,10 @@ def get_dataset(dataconfig, basedir, sequence, **kwargs):
         return ReplicaDataset(config_dict, basedir, sequence, **kwargs)
     elif config_dict["dataset_name"].lower() in ["light"]:
         return ReplicaDataset(config_dict, basedir, sequence, **kwargs)
+    elif config_dict["dataset_name"].lower() in ["semantic_replica"]:
+        return SemanticReplicaDataset(config_dict, basedir, sequence, **kwargs)
+    elif config_dict["dataset_name"].lower() in ["replica_cad"]:
+        return ReplicaCADDataset(config_dict, basedir, sequence, **kwargs)
     elif config_dict["dataset_name"].lower() in ["azure", "azurekinect"]:
         return AzureKinectDataset(config_dict, basedir, sequence, **kwargs)
     elif config_dict["dataset_name"].lower() in ["scannet"]:

diff --git a/concept-graphs/conceptgraph/scripts/eval_replica_semseg.py b/concept-graphs/conceptgraph/scripts/eval_replica_semseg.py
@@ -13,7 +13,8 @@
 
 import open_clip
 
-from chamferdist.chamfer import knn_points
+# from chamferdist.chamfer import knn_points
+from pytorch3d.ops.knn import knn_points
 from gradslam.structures.pointclouds import Pointclouds
 
 from conceptgraph.dataset.replica_constants import (
@@ -54,6 +55,7 @@ def get_parser():
     )
     return parser
 
+
 def eval_replica(
     scene_id: str,
     scene_id_: str,
@@ -86,8 +88,8 @@ def eval_replica(
     assert gt_class.min() >= 0
     assert gt_class.max() < len(REPLICA_EXISTING_CLASSES)
 
-    # transform pred_xyz and gt_xyz according to the first pose in gt_poses
-    gt_xyz = gt_xyz @ gt_poses[0, :3, :3].t() + gt_poses[0, :3, 3]
+    # # transform pred_xyz and gt_xyz according to the first pose in gt_poses
+    # gt_xyz = gt_xyz @ gt_poses[0, :3, :3].t() + gt_poses[0, :3, 3]
 
     # Get the set of classes that are used for evaluation
     all_class_index = np.arange(len(class_names))
@@ -197,6 +199,7 @@ def eval_replica(
 
     # Resample the pred_xyz and pred_class based on slam_nn_in_pred
     pred_xyz = slam_xyz
+    # pred_xyz = pred_xyz @ gt_poses[0, :3, :3].t() + gt_poses[0, :3, 3]
     pred_class = pred_class[idx_slam_to_pred.cpu()]
     pred_color = pred_color[idx_slam_to_pred.cpu()]
 
@@ -207,6 +210,16 @@ def eval_replica(
     # pred_pcd.colors = o3d.utility.Vector3dVector(class2color[pred_class.numpy()])
     # o3d.visualization.draw_geometries([pred_pcd])
 
+    # # GT point cloud in open3d
+    # print("GT pointcloud")
+    # gt_pcd = o3d.geometry.PointCloud()
+    # gt_pcd.points = o3d.utility.Vector3dVector(gt_xyz.numpy())
+    # gt_pcd.colors = o3d.utility.Vector3dVector(class2color[gt_class.numpy()])
+    # o3d.visualization.draw_geometries([gt_pcd])
+
+    # print("Merged pointcloud")
+    # o3d.visualization.draw_geometries([gt_pcd, pred_pcd])
+
     # Compute the associations between the predicted and ground truth point clouds
     idx_pred_to_gt, idx_gt_to_pred = compute_pred_gt_associations(
         pred_xyz.unsqueeze(0).cuda().contiguous().float(),
@@ -233,7 +246,8 @@ def eval_replica(
     assert confmatrix.sum(0)[ignore_index].sum() == 0
     assert confmatrix.sum(1)[ignore_index].sum() == 0
 
-    '''Visualization for debugging'''
+    # '''Visualization for debugging'''
+    # print('GT point cloud in open3d')
     # class2color = get_random_colors(len(class_names))
 
     # # GT point cloud in open3d