IGNF · CharlesGaydon · Jun 20, 2022 · Jun 17, 2022 · Jun 17, 2022 · Jun 17, 2022
diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
@@ -49,7 +49,7 @@ jobs:
         python -m 
         myria3d.predict
         --config-path /inputs/
-        --config-name predict_config_V2.1.0.yaml
+        --config-name predict_config_V2.3.0.yaml
         predict.src_las=/inputs/792000_6272000_subset_buildings.las
         predict.output_dir=/outputs/
         predict.ckpt_path=/inputs/RandLaNet_Buildings_B2V0.5_epoch_033.ckpt

diff --git a/configs/datamodule/dataset_description/20220607_151_dalles_proto.yaml b/configs/datamodule/dataset_description/20220607_151_dalles_proto.yaml
@@ -0,0 +1,26 @@
+_convert_: all  # For omegaconf struct to be converted to python dictionnaries
+# classification_preprocessing_dict = {source_class_code_int: target_class_code_int},
+# 3: medium vegetation -> vegetation
+# 4: high vegetation -> vegetation
+# 160: antenna -> lasting_above
+# 161: wind_turbines -> lasting_above
+# 162: pylon -> lasting_above
+
+# Expectded classification dict:
+# classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64}
+
+# Temporary classification dict to care of abnormal classes that are to be corrected in the dataset.
+classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1}
+# classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
+classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above}
+
+# Input and output dims of neural net are dataset dependant:
+d_in: 12
+num_classes: 7
+
+# loading functions for data object prepared via myria3d/datamodule/data.py
+# Nota: In this function, artefacts as well as synthetic points (65, 66) are filtered out.
+load_las_func:
+  _target_: functools.partial
+  _args_:
+    - "${get_method:myria3d.data.loading.FrenchLidarDataLogic.load_las}"
diff --git a/configs/datamodule/transforms/default.yaml b/configs/datamodule/transforms/default.yaml
@@ -1,5 +1,5 @@
 defaults:
-  - preparations: default.yaml
+  - preparations: train.yaml
   - augmentations: default.yaml
   - normalizations: default.yaml
 

diff --git a/configs/datamodule/transforms/preparations/predict.yaml b/configs/datamodule/transforms/preparations/predict.yaml
@@ -0,0 +1,23 @@
+ToTensor:
+  _target_: myria3d.data.transforms.ToTensor
+
+CopyFullPos:
+  _target_: myria3d.data.transforms.CopyFullPos
+
+GridSampling:
+  _target_: torch_geometric.transforms.GridSampling
+  _args_:
+    - 0.25
+
+FixedPoints:
+  _target_: torch_geometric.transforms.FixedPoints
+  _args_:
+    - 12500
+  replace: False
+  allow_duplicates: True
+
+CopySampledPos:
+  _target_: myria3d.data.transforms.CopySampledPos
+
+Center:
+  _target_: torch_geometric.transforms.Center
diff --git a/configs/datamodule/transforms/preparations/test.yaml b/configs/datamodule/transforms/preparations/test.yaml
@@ -0,0 +1,33 @@
+ToTensor:
+  _target_: myria3d.data.transforms.ToTensor
+
+TargetTransform:
+  _target_: myria3d.data.transforms.TargetTransform
+  _args_:
+    - ${datamodule.dataset_description.classification_preprocessing_dict}
+    - ${datamodule.dataset_description.classification_dict}
+
+CopyFullPreparedTargets:
+  _target_: myria3d.data.transforms.CopyFullPreparedTargets
+
+CopyFullPos:
+  _target_: myria3d.data.transforms.CopyFullPos
+
+GridSampling:
+  _target_: torch_geometric.transforms.GridSampling
+  _args_:
+    - 0.25
+
+FixedPoints:
+  _target_: torch_geometric.transforms.FixedPoints
+  _args_:
+    - 12500
+  replace: False
+  allow_duplicates: True
+
+# For interpolation
+CopySampledPos:
+  _target_: myria3d.data.transforms.CopySampledPos
+
+Center:
+  _target_: torch_geometric.transforms.Center
diff --git a/...dule/transforms/preparations/default.yaml → ...module/transforms/preparations/train.yaml b/...dule/transforms/preparations/default.yaml → ...module/transforms/preparations/train.yaml
@@ -1,6 +1,3 @@
-EmptySubtileFilter:
-  _target_: myria3d.data.transforms.EmptySubtileFilter
-
 ToTensor:
   _target_: myria3d.data.transforms.ToTensor
 
@@ -22,8 +19,5 @@ FixedPoints:
   replace: False
   allow_duplicates: True
 
-CopySampledPos:
-  _target_: myria3d.data.transforms.CopySampledPos
-
 Center:
   _target_: torch_geometric.transforms.Center
diff --git a/configs/experiment/RandLaNet_base_run_FR.yaml b/configs/experiment/RandLaNet_base_run_FR.yaml
@@ -2,9 +2,6 @@
 defaults:
   - override /datamodule/dataset_description: 20220504_proto23dalles
 
-datamodule:
-  augment: true
-
 logger:
   comet:
     experiment_name: "RandLaNet - FR Data"
@@ -15,7 +12,7 @@ trainer:
   min_epochs: 30
   max_epochs: 150
   check_val_every_n_epoch: 1
-  val_check_interval: 0.25 
+  val_check_interval: 1.0
   # gpus: [1]
 
 callbacks:

diff --git a/configs/experiment/predict.yaml b/configs/experiment/predict.yaml
@@ -0,0 +1,26 @@
+# @package _global_
+
+# Those are the parameters you need to override from a training configuration in order to perform
+# an inference.
+# This file should not be used directly if the trained model's config is different than the default config.
+
+defaults:
+  - override /datamodule/transforms/preparations: predict.yaml
+
+seed: 12345
+
+task:
+  task_name: "predict"
+
+predict:
+  src_las: "/path/to/input.las"  # Any glob pattern can be used to predict on multiple files.
+  output_dir: "/path/to/output_dir/"  # Predictions are saved in a new file which shares src_las basename.
+  ckpt_path: "/path/to/lightning_model.ckpt"  # Checkpoint of trained model.
+  # + other params in configs/predict/default.yaml
+
+hydra:
+  verbose: false
+
+datamodule:
+  # larger batch size for speed
+  batch_size: 50 
diff --git a/configs/experiment/evaluate_test_data.yaml → configs/experiment/test.yaml b/configs/experiment/evaluate_test_data.yaml → configs/experiment/test.yaml
@@ -4,6 +4,9 @@
 
 # all parameters below will be merged with parameters from default configurations
 
+defaults:
+  - override /datamodule/transforms/preparations: test.yaml
+
 seed: 12345
 
 task:
@@ -13,8 +16,10 @@ hydra:
   verbose: false
 
 datamodule:
-  # test_data_dir: "path/to_dir/with/annotated/files.las"  # override
-  batch_size: 50
+  # override if needed
+  # test_data_dir: "path/to_dir/with/annotated/files.las"
+  # larger batch size for speed
+  batch_size: 50 
 
 model:
   ckpt_path: "path/to/checkpoint"  # override

diff --git a/configs/predict/default.yaml b/configs/predict/default.yaml
@@ -3,7 +3,7 @@ output_dir: "/path/to/output_dir/"  # Predictions are saved in a new file which
 ckpt_path: "/path/to/lightning_model.ckpt"  # Checkpoint of trained model.
 gpus: 0  # 0 for none, 1 for one, [gpu_id] to specify which gpu to use e.g [1]
 
-# Speifying the ouptut:
+# Specifying the ouptut:
 # A list of string matching class names to select specific probas to save
 # OR keyword "all" to save all probabilities.
 # In addition, these dimensions are always created:

diff --git a/docs/source/apidoc/default_config.yml b/docs/source/apidoc/default_config.yml
@@ -6,8 +6,8 @@ ignore_warnings: true
 trainer:
   _target_: pytorch_lightning.Trainer
   gpus: 0
-  min_epochs: 30
-  max_epochs: 30
+  min_epochs: 40
+  max_epochs: 40
   log_every_n_steps: 1
   check_val_every_n_epoch: 1
   weights_summary: null
@@ -31,10 +31,51 @@ datamodule:
       _target_: functools.partial
       _args_:
       - ${get_method:myria3d.data.loading.FrenchLidarDataLogic.load_las}
+  transforms:
+    preparations:
+      ToTensor:
+        _target_: myria3d.data.transforms.ToTensor
+      TargetTransform:
+        _target_: myria3d.data.transforms.TargetTransform
+        _args_:
+        - ${datamodule.dataset_description.classification_preprocessing_dict}
+        - ${datamodule.dataset_description.classification_dict}
+      GridSampling:
+        _target_: torch_geometric.transforms.GridSampling
+        _args_:
+        - 0.25
+      FixedPoints:
+        _target_: torch_geometric.transforms.FixedPoints
+        _args_:
+        - 12500
+        replace: false
+        allow_duplicates: true
+      Center:
+        _target_: torch_geometric.transforms.Center
+    augmentations:
+      x_flip:
+        _target_: torch_geometric.transforms.RandomFlip
+        _args_:
+        - 0
+        p: 0.5
+      y_flip:
+        _target_: torch_geometric.transforms.RandomFlip
+        _args_:
+        - 1
+        p: 0.5
+    normalizations:
+      NormalizePos:
+        _target_: myria3d.data.transforms.NormalizePos
+      StandardizeFeatures:
+        _target_: myria3d.data.transforms.StandardizeFeatures
+    augmentations_list: '${oc.dict.values: datamodule.transforms.augmentations}'
+    preparations_list: '${oc.dict.values: datamodule.transforms.preparations}'
+    normalizations_list: '${oc.dict.values: datamodule.transforms.normalizations}'
   _target_: myria3d.data.datamodule.DataModule
   prepared_data_dir: ${oc.env:PREPARED_DATA_DIR}
   test_data_dir: ${datamodule.prepared_data_dir}/test/
   batch_size: 16
+  prefetch_factor: 2
   num_workers: 1
   subtile_width_meters: 50
   subtile_overlap: ${predict.subtile_overlap}

diff --git a/...rce/apidoc/lidar_multiclass.callbacks.rst → docs/source/apidoc/myria3d.callbacks.rst b/...rce/apidoc/lidar_multiclass.callbacks.rst → docs/source/apidoc/myria3d.callbacks.rst
diff --git a/docs/source/apidoc/lidar_multiclass.data.rst → docs/source/apidoc/myria3d.data.rst b/docs/source/apidoc/lidar_multiclass.data.rst → docs/source/apidoc/myria3d.data.rst
diff --git a/.../source/apidoc/lidar_multiclass.model.rst → docs/source/apidoc/myria3d.model.rst b/.../source/apidoc/lidar_multiclass.model.rst → docs/source/apidoc/myria3d.model.rst
diff --git a/...pidoc/lidar_multiclass.models.modules.rst → .../source/apidoc/myria3d.models.modules.rst b/...pidoc/lidar_multiclass.models.modules.rst → .../source/apidoc/myria3d.models.modules.rst
@@ -4,7 +4,7 @@ myria3d.models.modules
 PointNet
 -------------
 
-.. autoclass:: myria3d.models.modules.point_net.PointNet
+.. autoclass:: myria3d.models.modules.point_net2.PointNet2
    :members:
 
 RandLA-Net

diff --git a/.../source/apidoc/lidar_multiclass.utils.rst → docs/source/apidoc/myria3d.utils.rst b/.../source/apidoc/lidar_multiclass.utils.rst → docs/source/apidoc/myria3d.utils.rst
diff --git a/docs/source/background/data_optimization.md b/docs/source/background/data_optimization.md
diff --git a/docs/source/background/general_design.md b/docs/source/background/general_design.md
@@ -0,0 +1,32 @@
+# General design of the package
+
+Here are a few challenges relative to training 3D segmentation models for Aerial High Density Lidar, and the strategies we adopt in Myria3D in order to face them.
+
+
+## Subsampling is important to improve point cloud structure
+
+**Situation**:
+- Point Cloud data, and aerial Lidar data, represent rich data with a level of detail that might hinder the detection of  objects with generally simple structures such as buildings, ground, and trees. On the other hand, smaller, more variate objects might benefit from denser point clouds.
+- Another point to consider is that some 3D semantic segmentation architectures - including the RandLa-Net architecture we leverage - need fixed size point cloud. This means that either subsampling or padding are required. This kind of implementation reduces flexibility and is suboptimal, but to our knowledge there are no alternative RandLa-Net implementation in pytorch that can accept different-size point clouds within the same batch. 
+
+**Strategy**:
+- We leverage torch_geometric [GridSampling](https://pytorch-geometric.readthedocs.io/en/latest/modules/transforms.html#torch_geometric.transforms.GridSampling) and [FixedPoints](https://pytorch-geometric.readthedocs.io/en/latest/modules/transforms.html#torch_geometric.transforms.FixedPoints) to (i) simplify local point structures with a 0.25m resolution, and (ii) get a fixed size point cloud that can be fed to the mmodel. Grid Sampling has the effect of reducing point cloud size by around a third, with most reductions expected to occur in vegetation.
+
+## Speed is of the essence
+**Situation**:
+
+- Short training time allow for faster iterations, more frequent feedback on architecture design, less time spent on doomed solutions. As a result, we want to be parcimonious in terms of the operation performed during a train forward pass of a model.
+
+**Strategy**:
+- During training and validation phases, we perform supervision and back-propagation on the subsampled point cloud directly. Our hypothesis is that the gain we may expect from interpolating predicted logits to the full point cloud (usually from N'=~12500 to N~30000 on a average for a 50mx50m sample) is tiny compared to the computational cost of such operation (time of a forward pass multiplied from x5 to x10 with a batch size of 32 on CPU).
+
+
+## Evaluation is key to select the right approach
+
+**Situation**:
+
+- Evaluation of models must be reliable in order to compare solutions. For semantic segmentation models on point cloud, this means that performance metrics (i.e. mean and by-class Intersection-over-Union) should be computed based on a confusion matrix that is computed from all points in all point clouds in the test dataset.
+
+**Strategy**:
+- During test phase, we **do** interpolate logits back to the each sample (point cloud) before computing performance metrics. Interestingly, this enable to compare different subsampling approaches and interpolation methods in a robust way. The interpolation step is triggered in `eval` mode only, and is also leveraged during inference.
+- This differentiated approach between `train` and `eval` modes has the drawback of requiring full (non-subsampled) positions and targets as well as subsampled, non-normalized positions to be copied and saved at data preparation time to allow for the interpolation. 
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -27,10 +27,8 @@ Myria3D > Documentation
    :caption: Background
 
    background/interpolation
-   background/data_optimization
+   background/general_design
 
-.. TODO: assure that all dosctrings are in third-personn mode.
-.. TODO: find a way to document hydra config ; perhaps by switching to a full dataclasses mode.
 
 .. toctree::
    :maxdepth: 1

diff --git a/docs/source/tutorials/make_predictions.md b/docs/source/tutorials/make_predictions.md
@@ -5,7 +5,7 @@ Refer to [this tutorial](./setup_install.md) for how to setup a virtual environm
 To run inference, you will need:
 - A source cloud point in LAS format on which to infer classes and probabilites.
 - A checkpoint of a trained lightning module implementing model logic (class `myria3d.models.model.Model`)
-- A minimal yaml configuration specifying parameters. We use [hydra](https://hydra.cc/) to manage configurations, and this yaml results from the model training. The `datamodule` and `model` parameters groups must match datset characteristics and model training settings.  The `predict` parameters group specifies path to models and data as well as batch size (N=50 works well, the larger the faster) and use of gpu (optionnal).
+- A minimal yaml configuration specifying parameters. We use [hydra](https://hydra.cc/) to manage configurations, and this yaml results from the model training. The `datamodule` and `model` parameters groups must match dataset characteristics and model training settings.  The `predict` parameters group specifies path to models and data as well as batch size (N=50 works well, the larger the faster) and use of gpu (optionnal). For hints on what to modify, see the `experiment/predict.yaml` file.
 
 ## Run inference from installed package
 

diff --git a/myria3d/callbacks/logging_callbacks.py b/myria3d/callbacks/logging_callbacks.py
@@ -86,9 +86,9 @@ def on_test_batch_end(
         batch_idx: int,
         dataloader_idx: int,
     ):
-        """Log IoU for each class. Loop in case of multiple files in a single batch."""
+        """Log IoU for each class."""
         logits = outputs["logits"]
-        targets = batch.y
+        targets = outputs["targets"]
         self.log_iou(logits, targets, "test", self.test_iou_by_class_dict)
 
     def log_iou(self, logits, targets, phase: str, iou_dict):

diff --git a/myria3d/data/datamodule.py b/myria3d/data/datamodule.py
@@ -26,24 +26,25 @@ class DataModule(LightningDataModule):
 
     def __init__(self, **kwargs):
         super().__init__()
-        # TODO: try to use save_hyperparameters to lightne this code.
+        # paths
         self.prepared_data_dir = kwargs.get("prepared_data_dir")
         self.test_data_dir = kwargs.get("test_data_dir")
-
+        # compute
         self.num_workers = kwargs.get("num_workers", 0)
-
+        self.prefetch_factor = kwargs.get("prefetch_factor", 2)
+        # data preparation
         self.subtile_width_meters = kwargs.get("subtile_width_meters", 50)
         self.subtile_overlap = kwargs.get("subtile_overlap", 0)
         self.batch_size = kwargs.get("batch_size", 32)
-        self.prefetch_factor = kwargs.get("prefetch_factor", 2)
         self.augmentation_transforms = kwargs.get("augmentation_transforms", [])
-
+        # segmentation task
         self.dataset_description = kwargs.get("dataset_description")
         self.classification_dict = self.dataset_description.get("classification_dict")
         self.classification_preprocessing_dict = self.dataset_description.get(
             "classification_preprocessing_dict"
         )
         self.load_las = self.dataset_description.get("load_las_func")
+        # transforms
         t = kwargs.get("transforms")
         self.preparation_transforms = t.get("preparations_list")
         self.augmentation_transforms = t.get("augmentations_list")
@@ -233,6 +234,8 @@ def yield_transformed_subtile_data(self):
             # TODO: change to process time function
             for xy_min_corner in centers:
                 data = self.extract_subtile_from_tile_data(tile_data, xy_min_corner)
+                if len(data.pos) < 50:
+                    continue
                 if self.transform:
                     data = self.transform(data)
                 if data and (len(data.pos) > 50):

diff --git a/myria3d/data/loading.py b/myria3d/data/loading.py
@@ -124,7 +124,8 @@ def split_and_save(self, filepath: str, output_subdir_path: str) -> None:
                     # Ignore if empty
                     break
                 subtile_data = self._extract_by_y(data_x_band)
-                self._save(subtile_data, output_subdir_path, idx)
+                if subtile_data and subtile_data.pos.shape[0] > 50:
+                    self._save(subtile_data, output_subdir_path, idx)
                 idx += 1
 
     def _find_file_in_dir(self, input_data_dir: str, basename: str) -> str: