datasets/nuscenes: fix frame_time scaling factor

d4l3k · Sep 11, 2023 · 1600b33 · 1600b33
1 parent 7510446
commit 1600b33
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 6 deletions.
diff --git a/.gitignore b/.gitignore
@@ -133,3 +133,4 @@ dmypy.json
 experiments/
 
 profile*.svg
+*.pt
diff --git a/torchdrive/datasets/nuscenes_dataset.py b/torchdrive/datasets/nuscenes_dataset.py
@@ -153,7 +153,8 @@ def _getitem(self, sample_data: SampleData) -> Dict[str, object]:
 
         cam_T = rotation_mat.inverse().matmul(cam_T)
 
-        timestamp = sample_data["timestamp"]
+        # timestamp is in microseconds, need to convert it to seconds
+        timestamp = sample_data["timestamp"] / 1e6
 
         # Get the image
         img_path = os.path.join(self.dataroot, sample_data["filename"])  # current image
@@ -387,6 +388,7 @@ def __getitem__(self, idx: int) -> Optional[Batch]:
         )
         for batch in dl:
             torch.save(batch, "nuscenes_batch.pt")
+            print(batch)
             break
     elif cmd == "bulk":
         ds = NuscenesDataset(dataroot, version=version)

diff --git a/torchdrive/tasks/voxel.py b/torchdrive/tasks/voxel.py
@@ -440,9 +440,17 @@ def _losses(
                         align_corners=False,
                     )
 
+                    if ctx.log_text:
+                        ctx.add_scalars(
+                            f"semantic_vel/{cam}/abs",
+                            {
+                                "max": semantic_vel.abs().amax(),
+                                "mean": semantic_vel.abs().mean(),
+                            }
+                        )
                     if ctx.log_img:
                         ctx.add_image(
-                            f"{cam}/semantic_vel",
+                            f"semantic_vel/{cam}",
                             normalize_img(semantic_vel[0]),
                         )
                         ctx.add_image(
@@ -552,7 +560,7 @@ def _losses(
                             semantic_classes=cam_sem.float().sigmoid(),
                             semantic_target=semantic_targets[cam],
                             mask=primary_mask,
-                            per_pixel_weights=per_pixel_weights * 0.5,
+                            per_pixel_weights=per_pixel_weights * 0.1,
                         )
                         losses[f"semantic-cam/{cam}"] = semantic_loss.mean(
                             dim=(1, 2, 3)
@@ -573,7 +581,7 @@ def _losses(
                         frame_time=frame_time,
                         primary_color=primary_color,
                         primary_mask=primary_mask,
-                        per_pixel_weights=per_pixel_weights * 0.5,
+                        per_pixel_weights=per_pixel_weights * 0.1,
                     )
 
                     camera_overlap = self.camera_overlap
@@ -588,7 +596,7 @@ def _losses(
                             cam_features=primary_colors,
                             cam_masks=primary_masks,
                             cam_pix_weights=cam_pix_weights,
-                            loss_scale=0.5,
+                            loss_scale=0.1,
                             primary_depth=cam_depth,
                             losses=losses,
                             h=h,
@@ -675,6 +683,9 @@ def _sfm_loss(
             world_to_src_cam = batch.world_to_cam(cam, src_frame)
             time = frame_time[:, src_frame]
 
+            if ctx.log_text:
+                ctx.add_scalar(f"frame_time_max/{offset}", time.abs().amax())
+
             src_color = batch.color[cam][:, src_frame]
             src_color = F.interpolate(
                 src_color.float(),
@@ -847,7 +858,7 @@ def _semantic_loss(
             scales=3,
             mask=mask,
         )
-        sem_loss = sem_loss * per_pixel_weights * 100 * 1000 * 1000
+        sem_loss = sem_loss * per_pixel_weights * 100 * 1000
 
         if ctx.log_text:
             pred_min, pred_max = semantic_classes.aminmax()