From a2e78141156328f9259e64f9288ee1e865bb9e14 Mon Sep 17 00:00:00 2001
From: Leonardo Gigli <leonardo.gigli@thecrossproduct.com>
Date: Thu, 18 Apr 2024 17:46:10 +0200
Subject: [PATCH] updated lidar_hd_pre_transform function; the goal to work
 also with point clouds that have custom sets of features

---
 configs/datamodule/hdf5_datamodule.yaml       |  3 +
 .../20220607_151_dalles_proto.yaml            |  4 +
 docs/source/apidoc/default_config.yml         |  3 +
 myria3d/pctl/points_pre_transform/lidar_hd.py | 81 ++++++++++---------
 4 files changed, 52 insertions(+), 39 deletions(-)

diff --git a/configs/datamodule/hdf5_datamodule.yaml b/configs/datamodule/hdf5_datamodule.yaml
index acbd93ed..c0151300 100755
--- a/configs/datamodule/hdf5_datamodule.yaml
+++ b/configs/datamodule/hdf5_datamodule.yaml
@@ -11,6 +11,9 @@ points_pre_transform:
   _target_: functools.partial
   _args_:
     - "${get_method:myria3d.pctl.points_pre_transform.lidar_hd.lidar_hd_pre_transform}"
+  pos_keys: ${dataset_description.pos_keys}
+  features_keys: ${dataset_description.features_keys}
+  color_keys: ${dataset_description.color_keys}
 
 pre_filter:
   _target_: functools.partial
diff --git a/configs/dataset_description/20220607_151_dalles_proto.yaml b/configs/dataset_description/20220607_151_dalles_proto.yaml
index f6d60956..24918c56 100644
--- a/configs/dataset_description/20220607_151_dalles_proto.yaml
+++ b/configs/dataset_description/20220607_151_dalles_proto.yaml
@@ -13,6 +13,10 @@ classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1,
 # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
 classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above}
 
+pos_keys: ["X", "Y", "Z"]
+features_keys: ["Intensity", "ReturnNumber", "NumberOfReturns"]
+color_keys: ["Red", "Green", "Blue", "Infrared"]
+
 # class_weights for the CrossEntropyLoss with format "[[w1,w2,w3...,wk]]" with w_i a float e.g. 1.0
 # Balanced CE: arbitrary weights based on heuristic.
 # class_weights: [2.5,1.0,1.0,5.0,20.0,20.0,20.0] normalized so they sum to 7 to preserve scale of CELoss
diff --git a/docs/source/apidoc/default_config.yml b/docs/source/apidoc/default_config.yml
index eb6d004a..2053a9cf 100644
--- a/docs/source/apidoc/default_config.yml
+++ b/docs/source/apidoc/default_config.yml
@@ -111,6 +111,9 @@ datamodule:
     _target_: functools.partial
     _args_:
     - ${get_method:myria3d.pctl.points_pre_transform.lidar_hd.lidar_hd_pre_transform}
+    pos_keys: ${dataset_description.pos_keys}
+    features_keys: ${dataset_description.features_keys}
+    color_keys: ${dataset_description.color_keys}
   pre_filter:
     _target_: functools.partial
     _args_:
diff --git a/myria3d/pctl/points_pre_transform/lidar_hd.py b/myria3d/pctl/points_pre_transform/lidar_hd.py
index dcd7e4ad..73e47ea7 100644
--- a/myria3d/pctl/points_pre_transform/lidar_hd.py
+++ b/myria3d/pctl/points_pre_transform/lidar_hd.py
@@ -1,79 +1,82 @@
 # function to turn points loaded via pdal into a pyg Data object, with additional channels
+from typing import List
 import numpy as np
+import torch
 from torch_geometric.data import Data
 
 COLORS_NORMALIZATION_MAX_VALUE = 255.0 * 256.0
 RETURN_NUMBER_NORMALIZATION_MAX_VALUE = 7.0
 
 
-def lidar_hd_pre_transform(points):
+def lidar_hd_pre_transform(points, pos_keys: List[str], features_keys: List[str], color_keys: List[str]):
     """Turn pdal points into torch-geometric Data object.
 
     Builds a composite (average) color channel on the fly.     Calculate NDVI on the fly.
 
     Args:
         las_filepath (str): path to the LAS file.
-
+        pos_keys (List[str]): list of keys for positions and base features
+        features_keys (List[str]): list of keys for
     Returns:
         Data: the point cloud formatted for later deep learning training.
 
     """
+
+    features = pos_keys + features_keys + color_keys
     # Positions and base features
-    pos = np.asarray([points["X"], points["Y"], points["Z"]], dtype=np.float32).transpose()
+    pos = np.asarray([points[k] for k in pos_keys], dtype=np.float32).transpose()
     # normalization
-    occluded_points = points["ReturnNumber"] > 1
+    if "ReturnNumber" in features:
+        occluded_points = points["ReturnNumber"] > 1
+        points["ReturnNumber"] = (points["ReturnNumber"]) / (RETURN_NUMBER_NORMALIZATION_MAX_VALUE)
+        points["NumberOfReturns"] = (points["NumberOfReturns"]) / (
+            RETURN_NUMBER_NORMALIZATION_MAX_VALUE
+        )
+    else:
+        occluded_points = np.zeros(pos.shape[0], dtype=np.bool_)
 
-    points["ReturnNumber"] = (points["ReturnNumber"]) / (RETURN_NUMBER_NORMALIZATION_MAX_VALUE)
-    points["NumberOfReturns"] = (points["NumberOfReturns"]) / (
-        RETURN_NUMBER_NORMALIZATION_MAX_VALUE
-    )
-
-    for color in ["Red", "Green", "Blue", "Infrared"]:
+    for color in color_keys:
         assert points[color].max() <= COLORS_NORMALIZATION_MAX_VALUE
         points[color][:] = points[color] / COLORS_NORMALIZATION_MAX_VALUE
         points[color][occluded_points] = 0.0
 
     # Additional features :
     # Average color, that will be normalized on the fly based on single-sample
-    rgb_avg = (
-        np.asarray([points["Red"], points["Green"], points["Blue"]], dtype=np.float32)
-        .transpose()
-        .mean(axis=1)
-    )
+    if "Red" in color_keys and "Green" in color_keys and "Blue" in color_keys:
+        rgb_avg = (
+            np.asarray([points["Red"], points["Green"], points["Blue"]], dtype=np.float32)
+            .transpose()
+            .mean(axis=1)
+        )
+    else:
+        rgb_avg = None
 
     # NDVI
-    ndvi = (points["Infrared"] - points["Red"]) / (points["Infrared"] + points["Red"] + 10**-6)
+    if "Infrared" in color_keys and "Red" in color_keys:
+        ndvi = (points["Infrared"] - points["Red"]) / (points["Infrared"] + points["Red"] + 10**-6)
+    else:
+        ndvi = None
+
+    additional_color_features = []
+    additional_color_keys = []
+    if rgb_avg is not None:
+        additional_color_features.append(rgb_avg)
+        additional_color_keys.append("rgb_avg")
+    if ndvi is not None:
+        additional_color_features.append(ndvi)
+        additional_color_keys.append("ndvi")
 
-    # todo
     x = np.stack(
         [
             points[name]
-            for name in [
-                "Intensity",
-                "ReturnNumber",
-                "NumberOfReturns",
-                "Red",
-                "Green",
-                "Blue",
-                "Infrared",
-            ]
+            for name in features_keys + color_keys
         ]
-        + [rgb_avg, ndvi],
+        + additional_color_features,
         axis=0,
     ).transpose()
-    x_features_names = [
-        "Intensity",
-        "ReturnNumber",
-        "NumberOfReturns",
-        "Red",
-        "Green",
-        "Blue",
-        "Infrared",
-        "rgb_avg",
-        "ndvi",
-    ]
+    x_features_names = [s.encode('utf-8') for s in (features_keys + color_keys + additional_color_keys)]
     y = points["Classification"]
 
-    data = Data(pos=pos, x=x, y=y, x_features_names=x_features_names)
+    data = Data(pos=torch.from_numpy(pos), x=torch.from_numpy(x), y=y, x_features_names=x_features_names)
 
     return data