From da1ad291ea23374a3b4c967c6239d786ba10a068 Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Mon, 16 Jan 2023 12:17:55 +0100
Subject: [PATCH 01/10] Points Budget is the new defaut config

---
 configs/config.yaml                           |  2 +-
 configs/datamodule/transforms/default.yaml    |  2 +-
 .../{default.yaml => fixed_num_points.yaml}   |  0
 ...om_subsampling.yaml => points_budget.yaml} |  0
 ...yG-Overfit.yaml => RandLaNet-Overfit.yaml} |  2 +-
 .../RandLaNet-PyG-Overfit-NoRS.yaml           | 25 -------------------
 ...ml => RandLaNet_base_run_FR-MultiGPU.yaml} |  2 +-
 ...-SQRTOfInverseFreqencyClassWeighting.yaml} |  5 ++--
 ...t_NoRS.yaml => RandLaNet_base_run_FR.yaml} |  8 +++---
 docs/source/guides/train_new_model.md         | 10 ++++----
 10 files changed, 16 insertions(+), 40 deletions(-)
 rename configs/datamodule/transforms/preparations/{default.yaml => fixed_num_points.yaml} (100%)
 rename configs/datamodule/transforms/preparations/{no_random_subsampling.yaml => points_budget.yaml} (100%)
 rename configs/experiment/{RandLaNet-PyG-Overfit.yaml => RandLaNet-Overfit.yaml} (94%)
 delete mode 100755 configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml
 rename configs/experiment/{RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml => RandLaNet_base_run_FR-MultiGPU.yaml} (84%)
 rename configs/experiment/{RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW.yaml => RandLaNet_base_run_FR-SQRTOfInverseFreqencyClassWeighting.yaml} (52%)
 rename configs/experiment/{RandLaNet_base_run_FR_pyg_randla_net_NoRS.yaml => RandLaNet_base_run_FR.yaml} (57%)

diff --git a/configs/config.yaml b/configs/config.yaml
index 134841b2..80463192 100755
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -29,7 +29,7 @@ defaults:
   - datamodule: hdf5_datamodule.yaml
   - dataset_description: 20220607_151_dalles_proto.yaml  # describes input features and classes
   - callbacks: default.yaml # set this to null if you don't want to use callbacks
-  - model: pyg_randla_net_model.yaml  # other option is pyg_randla_net_model
+  - model: pyg_randla_net_model.yaml
   
   - logger: comet # set logger here or use command line (e.g. `python run.py logger=wandb`)
   - task: default.yaml
diff --git a/configs/datamodule/transforms/default.yaml b/configs/datamodule/transforms/default.yaml
index c5f2e980..59605c5c 100644
--- a/configs/datamodule/transforms/default.yaml
+++ b/configs/datamodule/transforms/default.yaml
@@ -1,5 +1,5 @@
 defaults:
-  - preparations: default.yaml
+  - preparations: points_budget.yaml
   - augmentations: none.yaml
   - normalizations: default.yaml
 
diff --git a/configs/datamodule/transforms/preparations/default.yaml b/configs/datamodule/transforms/preparations/fixed_num_points.yaml
similarity index 100%
rename from configs/datamodule/transforms/preparations/default.yaml
rename to configs/datamodule/transforms/preparations/fixed_num_points.yaml
diff --git a/configs/datamodule/transforms/preparations/no_random_subsampling.yaml b/configs/datamodule/transforms/preparations/points_budget.yaml
similarity index 100%
rename from configs/datamodule/transforms/preparations/no_random_subsampling.yaml
rename to configs/datamodule/transforms/preparations/points_budget.yaml
diff --git a/configs/experiment/RandLaNet-PyG-Overfit.yaml b/configs/experiment/RandLaNet-Overfit.yaml
similarity index 94%
rename from configs/experiment/RandLaNet-PyG-Overfit.yaml
rename to configs/experiment/RandLaNet-Overfit.yaml
index e017fbac..384b1667 100755
--- a/configs/experiment/RandLaNet-PyG-Overfit.yaml
+++ b/configs/experiment/RandLaNet-Overfit.yaml
@@ -8,7 +8,7 @@ defaults:
 
 logger:
   comet:
-    experiment_name: "RandLaNetOverfit"
+    experiment_name: "RandLaNet-Overfit"
 
 trainer:
   min_epochs: 100
diff --git a/configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml b/configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml
deleted file mode 100755
index 5d298370..00000000
--- a/configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-# @package _global_
-
-# Nota : call "python myria3d/pctl/dataset/toy_dataset.py" to create a toy dataset before running this.
-defaults:
-  - override /model: pyg_randla_net_model.yaml
-  - override /datamodule/transforms/preparations: no_random_subsampling.yaml
-
-
-logger:
-  comet:
-    experiment_name: "RandLaNetOverfit"
-
-trainer:
-  min_epochs: 100
-  max_epochs: 100
-  overfit_batches: 1
-  num_sanity_val_steps: 0
-
-datamodule:
-  batch_size: 12
-  num_workers: 2
-  # runtime.cwd is where application is run from e.g. where run.py is.
-  data_dir: "${hydra:runtime.cwd}/tests/data/"
-  split_csv_path: "${hydra:runtime.cwd}/tests/data/toy_dataset_src/toy_dataset_split.csv"
-  hdf5_file_path: "${hydra:runtime.cwd}/tests/data/toy_dataset.hdf5"
diff --git a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml b/configs/experiment/RandLaNet_base_run_FR-MultiGPU.yaml
similarity index 84%
rename from configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml
rename to configs/experiment/RandLaNet_base_run_FR-MultiGPU.yaml
index 20cffcf5..5a9e8727 100755
--- a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml
+++ b/configs/experiment/RandLaNet_base_run_FR-MultiGPU.yaml
@@ -1,6 +1,6 @@
 # @package _global_
 defaults:
-  - RandLaNet_base_run_FR_pyg_randla_net.yaml
+  - RandLaNet_base_run_FR.yaml
 
 logger:
   comet:
diff --git a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW.yaml b/configs/experiment/RandLaNet_base_run_FR-SQRTOfInverseFreqencyClassWeighting.yaml
similarity index 52%
rename from configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW.yaml
rename to configs/experiment/RandLaNet_base_run_FR-SQRTOfInverseFreqencyClassWeighting.yaml
index 9bd08ddb..131ce2b1 100755
--- a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW.yaml
+++ b/configs/experiment/RandLaNet_base_run_FR-SQRTOfInverseFreqencyClassWeighting.yaml
@@ -1,11 +1,12 @@
 # @package _global_
 defaults:
-  - RandLaNet_base_run_FR_pyg_randla_net.yaml
+  - RandLaNet_base_run_FR.yaml
   - override /model/criterion: WeightedCrossEntropyLoss.yaml
 
 logger:
   comet:
-    experiment_name: "RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW"
+    experiment_name: "RandLaNet_base_run_FR-SQRT-ICFW"
 
 dataset_description:
+  # Sqrt(Inverse Frequency) of classes in defaut dataset (a.k.a. `151proto`).
   class_weights: [0.19,0.08,0.08,0.36,1.13,3.11,2.05]
\ No newline at end of file
diff --git a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net_NoRS.yaml b/configs/experiment/RandLaNet_base_run_FR.yaml
similarity index 57%
rename from configs/experiment/RandLaNet_base_run_FR_pyg_randla_net_NoRS.yaml
rename to configs/experiment/RandLaNet_base_run_FR.yaml
index 0ec696e5..7799ed7d 100755
--- a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net_NoRS.yaml
+++ b/configs/experiment/RandLaNet_base_run_FR.yaml
@@ -1,11 +1,11 @@
 # @package _global_
 defaults:
-  - RandLaNet_base_run_FR_pyg_randla_net.yaml
-  - override /datamodule/transforms/preparations: no_random_subsampling.yaml
+  - RandLaNet_base_run_FR.yaml
+  - override /datamodule/transforms/augmentations: light.yaml
 
 logger:
   comet:
-    experiment_name: "RandLaNet_base_run_FR_pyg_randla_net_NoRS-(BS10xMAX40000pts)"
+    experiment_name: "RandLaNet_base_run_FR-(BatchSize10xBudget(300pts-40000pts))"
 
 
 # Smaller BS : 10 x 40 000 (max) == 400 000 pts i.e. previous budget of 32 x 12 500pts.
@@ -16,5 +16,5 @@ trainer:
   num_sanity_val_steps: 2
   min_epochs: 100
   max_epochs: 150
-  accumulate_grad_batches: 3  # b/c larger clouds will not fit in memory with original BS.
+  accumulate_grad_batches: 3  # b/c larger clouds will not fit in memory with original Batch Size
   # gpus: [1]
diff --git a/docs/source/guides/train_new_model.md b/docs/source/guides/train_new_model.md
index 12fa7cea..64bcead0 100644
--- a/docs/source/guides/train_new_model.md
+++ b/docs/source/guides/train_new_model.md
@@ -17,18 +17,18 @@ To test your setup and logging capabilities, you can try overfitting on a single
 To overfit on a single batch for 30 epochs, run:
 
 ```bash
-python run.py experiment=RandLaNetDebug
+python run.py experiment=RandLaNet-Overfit
 ```
 
 ## Training
 
-Define your experiment hyperparameters in an experiment file in the `configs/experiment` folder. You may stem from one of the provided experiment file (e.g. `RandLaNet_base_run_FR_pyg_randla_net.yaml`). In particular, you will need to define `dataset_description` to specify your classification task - see config `20220607_151_dalles_proto.yaml` for an example.
+Define your experiment hyperparameters in an experiment file in the `configs/experiment` folder. You may stem from one of the provided experiment file (e.g. `RandLaNet_base_run_FR.yaml`). In particular, you will need to define `dataset_description` to specify your classification task - see config `20220607_151_dalles_proto.yaml` for an example.
 
 
 To run the full training and validation for French Lidar HD, run:
 
 ```bash
-python run.py experiment=RandLaNet_base_run_FR_pyg_randla_net
+python run.py experiment=RandLaNet_base_run_FR
 ```
 
 After training, you model best checkpoints and hydra config will be saved in a `DATE/TIME/` subfolder of the `LOGS_DIR` you specified, with an associated hydra `config.yaml`.
@@ -40,7 +40,7 @@ You can perfom this automatically before training by setting `trainer.auto_lr_fi
 
 ### Multi-GPUs
 
-Multi-GPUs training is supported. Refer to e.g. experiment file `RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml` for pytorch lightning flags to activate it. 
+Multi-GPUs training is supported. Refer to e.g. experiment file `RandLaNet_base_run_FR-MultiGPU.yaml` for pytorch lightning flags to activate it. 
 Multi-GPUs training effectively reduces training time by the number of GPUs used. Batch size might need to be reduced to keep a constant number of steps per epoch in DDP.
 
 ## Testing the model
@@ -57,7 +57,7 @@ task.task_name="test" \
 model.ckpt_path={/path/to/checkpoint.ckpt} \
 trainer.gpus={0 for none, [i] to use GPU number i} \
 ```
-ARguments `config-path` and `config-name` means you are using the saved configuration from your training, which contains the path to the prepared HDF5 dataset. 
+Arguments `config-path` and `config-name` means you are using the saved configuration from your training, which contains the path to the prepared HDF5 dataset. 
 
 If you are using defaut configurations, you can call test using a custom experiment:
 

From f1e5f73ed58711a5af9590bc5d6d94098cb9c8de Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Mon, 16 Jan 2023 14:35:52 +0100
Subject: [PATCH 02/10] Implement DropPointsByClass as well as its tests - make
 it the defaut behavior

Tests pass for DropPointsByClass

Copy full position BEFORE droping by class to avoid error at knn_interpolate

More test for DropPointsByClass creation

Flake8
---
 .../preparations/fixed_num_points.yaml        | 15 +++++
 .../preparations/points_budget.yaml           | 15 +++++
 ...0220204_BuildingValidation_and_Ground.yaml |  1 +
 .../20220607_151_dalles_proto.yaml            |  8 ++-
 docs/source/tutorials/make_predictions.md     |  8 ++-
 docs/source/tutorials/prepare_dataset.md      | 13 +++--
 myria3d/pctl/transforms/transforms.py         | 56 +++++++++----------
 tests/myria3d/data/test_transforms.py         | 36 ++++++++++--
 8 files changed, 112 insertions(+), 40 deletions(-)

diff --git a/configs/datamodule/transforms/preparations/fixed_num_points.yaml b/configs/datamodule/transforms/preparations/fixed_num_points.yaml
index 94d0709c..410b372f 100644
--- a/configs/datamodule/transforms/preparations/fixed_num_points.yaml
+++ b/configs/datamodule/transforms/preparations/fixed_num_points.yaml
@@ -1,12 +1,17 @@
 # default preparations with grid sampling and random sampling.
 
 train:
+
   TargetTransform:
     _target_: myria3d.pctl.transforms.transforms.TargetTransform
     _args_:
       - ${dataset_description.classification_preprocessing_dict}
       - ${dataset_description.classification_dict}
 
+  DropPointsByClass:
+    _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
+    classes_to_drop: ${dataset_description.classes_to_drop}
+
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
     _args_:
@@ -23,12 +28,17 @@ train:
     _target_: torch_geometric.transforms.Center
 
 eval:
+
   TargetTransform:
     _target_: myria3d.pctl.transforms.transforms.TargetTransform
     _args_:
       - ${dataset_description.classification_preprocessing_dict}
       - ${dataset_description.classification_dict}
 
+  DropPointsByClass:
+    _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
+    classes_to_drop: ${dataset_description.classes_to_drop}
+
   CopyFullPreparedTargets:
     _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
 
@@ -55,9 +65,14 @@ eval:
     _target_: torch_geometric.transforms.Center
   
 predict:
+
   CopyFullPos:
     _target_: myria3d.pctl.transforms.transforms.CopyFullPos
 
+  DropPointsByClass:
+    _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
+    classes_to_drop: ${dataset_description.classes_to_drop}
+
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
     _args_:
diff --git a/configs/datamodule/transforms/preparations/points_budget.yaml b/configs/datamodule/transforms/preparations/points_budget.yaml
index 88b3ccbe..e954aea1 100644
--- a/configs/datamodule/transforms/preparations/points_budget.yaml
+++ b/configs/datamodule/transforms/preparations/points_budget.yaml
@@ -1,12 +1,17 @@
 # default preparations with grid sampling and random sampling.
 
 train:
+
   TargetTransform:
     _target_: myria3d.pctl.transforms.transforms.TargetTransform
     _args_:
       - ${dataset_description.classification_preprocessing_dict}
       - ${dataset_description.classification_dict}
 
+  DropPointsByClass:
+    _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
+    classes_to_drop: ${dataset_description.classes_to_drop}
+
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
     _args_:
@@ -26,6 +31,7 @@ train:
     _target_: torch_geometric.transforms.Center
 
 eval:
+
   TargetTransform:
     _target_: myria3d.pctl.transforms.transforms.TargetTransform
     _args_:
@@ -35,6 +41,10 @@ eval:
   CopyFullPreparedTargets:
     _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
 
+  DropPointsByClass:
+    _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
+    classes_to_drop: ${dataset_description.classes_to_drop}
+
   CopyFullPos:
     _target_: myria3d.pctl.transforms.transforms.CopyFullPos
 
@@ -61,9 +71,14 @@ eval:
     _target_: torch_geometric.transforms.Center
   
 predict:
+
   CopyFullPos:
     _target_: myria3d.pctl.transforms.transforms.CopyFullPos
 
+  DropPointsByClass:
+    _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
+    classes_to_drop: ${dataset_description.classes_to_drop}
+
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
     _args_:
diff --git a/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml b/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml
index 8f955119..6d7a68b2 100644
--- a/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml
+++ b/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml
@@ -2,6 +2,7 @@ _convert_: all  # For omegaconf struct to be converted to python dictionnaries
 # classification_preprocessing_dict = {source_class_code_int: target_class_code_int},
 classification_preprocessing_dict: {59: 6, 50: 1}
 # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
+classes_to_drop: []
 classification_dict: {1: "unclassified", 2: "ground", 6: "building"}
 
 # Input and output dims of neural net are dataset dependant:
diff --git a/configs/dataset_description/20220607_151_dalles_proto.yaml b/configs/dataset_description/20220607_151_dalles_proto.yaml
index ee9f5704..cddb1bc1 100644
--- a/configs/dataset_description/20220607_151_dalles_proto.yaml
+++ b/configs/dataset_description/20220607_151_dalles_proto.yaml
@@ -6,10 +6,12 @@ _convert_: all  # For omegaconf struct to be converted to python dictionnaries
 # 161: wind_turbines -> lasting_above
 # 162: pylon -> lasting_above
 
-# Expectded classification dict:
+# Noise points have class 65 - this will be the defaut for inference in production.
+# Dropped points will be ignored in the inference process but still included in the final output cloud.
+classes_to_drop: [65]
+# Reduced classification dict:
 # classification_preprocessing_dict: {3: 5, 4: 5, 64:1, 65:1, 160: 64, 161: 64, 162: 64}
-
-# Additionnaly, artefacts as well as synthetic points (65, 66) are set to "unclassified"
+# Complete classification dict since some trash classes are leftover.
 classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 64: 1, 65: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1}
 
 # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
diff --git a/docs/source/tutorials/make_predictions.md b/docs/source/tutorials/make_predictions.md
index a1218e74..41abd538 100644
--- a/docs/source/tutorials/make_predictions.md
+++ b/docs/source/tutorials/make_predictions.md
@@ -75,4 +75,10 @@ One can control for which classes to save the probabilities. This is achieved wi
 To improve spatial regularity of the predicted probabilities, one can make inference on square receptive fields that have a non-null overlap with each other. This has the effect of smoothing out irregular predictions. The resulting classification is better looking, with more homogeneous predictions at the object level.
 
 To define an overlap between successive 50m*50m receptive fields, set `predict.subtile_overlap={value}`.
-This, however, comes with a large computation price. For instance, `predict.subtile_overlap=25` means a 25m overlap on both x and y axes, which multiplies inference time by a factor of 4.
\ No newline at end of file
+This, however, comes with a large computation price. For instance, `predict.subtile_overlap=25` means a 25m overlap on both x and y axes, which multiplies inference time by a factor of 4.
+
+### Ignoring artefacts points during inference
+
+Lidar acquisition may have produced artefacts points. If these points were identified with one (or several) classification code(s), they can be ignored during inference. These points will still be present in the output cloud, but will not negatively disturb model inference. Note that they will still have class probabilities, obtained from their non-artefacts closest neighboors.
+
+In the configuration, data transforms are used to drop these points according to the `dataset_description.classes_to_drop` parameter. By default, `dataset_description.classes_to_drop=[65]` where 65 is the defaut code used to flag artefact. Note: you may need to use quotes when overriding this parameter via CLI.
diff --git a/docs/source/tutorials/prepare_dataset.md b/docs/source/tutorials/prepare_dataset.md
index efe56286..c9646672 100644
--- a/docs/source/tutorials/prepare_dataset.md
+++ b/docs/source/tutorials/prepare_dataset.md
@@ -6,10 +6,16 @@ The loading function is dataset dependant, and is `lidar_hd_pre_transform` by de
 
 It is adapted to the French Lidar HD data provided by IGN (see [the official page](https://geoservices.ign.fr/lidarhd) - link in French). Return number and color information (RGBI) are scaled to 0-1 interval, a NDVI and an average color ((R+G+B)/3) dimension are created, and points that may be occluded (as indicated by higher return number) have their color set to 0.
 
-You may want to implement your own logic (e.g. with custom, additional features) in directory `points_pre_transform`. It then needs to be referenced similarly to `lidar_hd_pre_transform`.
+You may want to implement your own logic (e.g. with custom, additional features) in directory `points_pre_transform`. It then needs to be referenced similarly to `lidar_hd_pre_transform`. 
 
+If you use your own classification convention , you will need to create a `dataset_description` configuration (for an example see `configs/dataset_description/20220607_151_dalles_proto.yaml`).
 
-## Using your own data
+Additionnaly, you can control cloud sampling parameters via two configurations:
+- `configs/datamodule/transforms/preparations/points_budget.yaml`: (defaut) allows variable cloud size within lower and higher boundaries. 
+- `configs/datamodule/transforms/preparations/fixed_num_points.yaml`: (alternative) samples all clouds to a fixed size, allowing for duplicated points.
+
+
+## Preparing the dataset
 
 Input point clouds need to be splitted in subtiles that can be digested by segmentation models. We found that a receptive field of 50m*50m was a good balance between context and memory intensity. For faster training, this split can be done once, to avoid loading large file in memory multiple times.
 
@@ -21,8 +27,7 @@ These will be composed into a single file dataset for which you can specify a pa
 
 Once this is done, you do not need sources anymore, and simply specifying the path to the HDF5 dataset is enough.
 
-
-It's also possible to create the hdf5 file without a whole training, just fill the `datamodule.hdf5_file_path` parameter as before to specify the file path, but use `task=create_hdf5` instead of `task=fit`.
+It's also possible to create the hdf5 file without training any model: just fill the `datamodule.hdf5_file_path` parameter as before to specify the file path, but use `task=create_hdf5` instead of `task=fit`.
 
 
 ## Getting started quickly with a toy dataset
diff --git a/myria3d/pctl/transforms/transforms.py b/myria3d/pctl/transforms/transforms.py
index 46613b63..da7d565f 100755
--- a/myria3d/pctl/transforms/transforms.py
+++ b/myria3d/pctl/transforms/transforms.py
@@ -26,16 +26,13 @@ def __call__(self, data: Data):
 
 
 def subsample_data(data, num_nodes, choice):
+    # TODO: get num_nodes from data.num_nodes instead to simplify signature
     for key, item in data:
         if key == "num_nodes":
             data.num_nodes = choice.size(0)
         elif bool(re.search("edge", key)):
             continue
-        elif (
-            torch.is_tensor(item)
-            and item.size(0) == num_nodes
-            and item.size(0) != 1
-        ):
+        elif torch.is_tensor(item) and item.size(0) == num_nodes and item.size(0) != 1:
             data[key] = item[choice]
     return data
 
@@ -67,10 +64,7 @@ def __call__(self, data):
             return data
 
         choice = torch.cat(
-            [
-                torch.randperm(num_nodes)
-                for _ in range(math.ceil(self.num / num_nodes))
-            ],
+            [torch.randperm(num_nodes) for _ in range(math.ceil(self.num / num_nodes))],
             dim=0,
         )[: self.num]
 
@@ -124,9 +118,7 @@ def __call__(self, data: Data):
         data.x[:, idx] = self.standardize_channel(data.x[:, idx])
         return data
 
-    def standardize_channel(
-        self, channel_data: torch.Tensor, clamp_sigma: int = 3
-    ):
+    def standardize_channel(self, channel_data: torch.Tensor, clamp_sigma: int = 3):
         """Sample-wise standardization y* = (y-y_mean)/y_std. clamping to ignore large values."""
         mean = channel_data.mean()
         std = channel_data.std() + 10**-6
@@ -189,9 +181,7 @@ def __init__(
 
         # Set to attribute to log potential type errors
         self.classification_dict = classification_dict
-        self.classification_preprocessing_dict = (
-            classification_preprocessing_dict
-        )
+        self.classification_preprocessing_dict = classification_preprocessing_dict
 
     def __call__(self, data: Data):
         data.y = self.transform(data.y)
@@ -218,20 +208,30 @@ def transform(self, y):
 
     def _set_preprocessing_mapper(self, classification_preprocessing_dict):
         """Set mapper from source classification code to another code."""
-        d = {
-            key: value
-            for key, value in classification_preprocessing_dict.items()
-        }
-        self.preprocessing_mapper = np.vectorize(
-            lambda class_code: d.get(class_code, class_code)
-        )
+        d = {key: value for key, value in classification_preprocessing_dict.items()}
+        self.preprocessing_mapper = np.vectorize(lambda class_code: d.get(class_code, class_code))
 
     def _set_mapper(self, classification_dict):
         """Set mapper from source classification code to consecutive integers."""
-        d = {
-            class_code: class_index
-            for class_index, class_code in enumerate(
-                classification_dict.keys()
-            )
-        }
+        d = {class_code: class_index for class_index, class_code in enumerate(classification_dict.keys())}
         self.mapper = np.vectorize(lambda class_code: d.get(class_code))
+
+
+class DropPointsByClass(BaseTransform):
+    """Drop points"""
+
+    def __init__(self, classes_to_drop=None):
+        self.classes_to_drop = classes_to_drop
+        if np.isscalar(self.classes_to_drop):
+            self.classes_to_drop = [self.classes_to_drop]
+        if self.classes_to_drop:
+            self.classes_to_drop = torch.Tensor(self.classes_to_drop)
+
+    def __call__(self, data):
+        if self.classes_to_drop:
+            choice = torch.logical_not(torch.isin(data.y, self.classes_to_drop))
+            data = subsample_data(data, num_nodes=data.num_nodes, choice=choice)
+        return data
+
+    def __repr__(self):
+        return "{}()".format(self.__class__.__name__)
diff --git a/tests/myria3d/data/test_transforms.py b/tests/myria3d/data/test_transforms.py
index 157f6603..f3779907 100644
--- a/tests/myria3d/data/test_transforms.py
+++ b/tests/myria3d/data/test_transforms.py
@@ -1,8 +1,9 @@
 import numpy as np
 import pytest
 import torch_geometric
+import torch
 
-from myria3d.pctl.transforms.transforms import TargetTransform
+from myria3d.pctl.transforms.transforms import TargetTransform, DropPointsByClass
 
 
 def test_TargetTransform_with_valid_config():
@@ -22,10 +23,37 @@ def test_TargetTransform_throws_type_error_if_invalid_classification_dict():
     classification_dict = {1: "unclassified", 2: "ground", 6: "building"}
     tt = TargetTransform(classification_preprocessing_dict, classification_dict)
 
-    invalid_input_data = torch_geometric.data.Data(
-        x=None, y=np.array([1, 1, 1, 2, 99999, 1])
-    )
+    invalid_input_data = torch_geometric.data.Data(x=None, y=np.array([1, 1, 1, 2, 99999, 1]))
     with pytest.raises(TypeError):
         # error content:
         # int() argument must be a string, a bytes-like object or a number, not 'NoneType'
         _ = tt(invalid_input_data)
+
+
+def test_DropPointsByClass():
+    # points with class 65 are droped.
+    y = torch.Tensor([1, 65, 65, 2, 65])
+    x = torch.rand((5, 3))
+    data = torch_geometric.data.Data(x=x, y=y)
+    drop_transforms = DropPointsByClass([65])
+    transformed_data = drop_transforms(data)
+    assert torch.equal(transformed_data.y, torch.Tensor([1, 2]))
+    assert transformed_data.x.size(0) == 2
+
+    # No modification
+    x = torch.rand((3, 3))
+    y = torch.Tensor([1, 2, 3])
+    data = torch_geometric.data.Data(x=x, y=y)
+    transformed_data = drop_transforms(data)
+    assert torch.equal(data.x, transformed_data.x)
+    assert torch.equal(data.y, transformed_data.y)
+
+
+def test_DropPointsByClass_creation():
+    scalar = 42
+    a = DropPointsByClass(scalar)
+    b = DropPointsByClass([scalar])
+    assert torch.equal(a.classes_to_drop, b.classes_to_drop)
+
+    c = DropPointsByClass(None)
+    assert c.classes_to_drop is None

From 836fbe6f860a2a20e1b2b4102ae4de10196bb959 Mon Sep 17 00:00:00 2001
From: CharlesGaydon <charles.gaydon@gmail.com>
Date: Mon, 16 Jan 2023 16:23:45 +0100
Subject: [PATCH 03/10] Givre access to input classification during inference
 for DropPointsByClass

---
 myria3d/pctl/dataset/iterable.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/myria3d/pctl/dataset/iterable.py b/myria3d/pctl/dataset/iterable.py
index 7dcad706..24d077a6 100644
--- a/myria3d/pctl/dataset/iterable.py
+++ b/myria3d/pctl/dataset/iterable.py
@@ -20,12 +20,8 @@ class InferenceDataset(IterableDataset):
     def __init__(
         self,
         las_file: str,
-        points_pre_transform: Callable[
-            [ArrayLike], Data
-        ] = lidar_hd_pre_transform,
-        pre_filter: Optional[
-            Callable[[Data], bool]
-        ] = pre_filter_below_n_points,
+        points_pre_transform: Callable[[ArrayLike], Data] = lidar_hd_pre_transform,
+        pre_filter: Optional[Callable[[Data], bool]] = pre_filter_below_n_points,
         transform: Optional[Callable[[Data], Data]] = None,
         tile_width: Number = 1000,
         subtile_width: Number = 50,
@@ -57,7 +53,7 @@ def get_iterator(self):
         ):
             sample_data = self.points_pre_transform(sample_points)
             sample_data["x"] = torch.from_numpy(sample_data["x"])
-            # sample_data["y"] = torch.from_numpy(sample_data["y"])  # No need in inference.
+            sample_data["y"] = torch.LongTensor(sample_data["y"])  # Need input classification for DropPointsByClass
             sample_data["pos"] = torch.from_numpy(sample_data["pos"])
             # for final interpolation - should be kept as a np.ndarray to be batched as a list later.
             sample_data["idx_in_original_cloud"] = idx_in_original_cloud

From 669ee4cd4f7343f447134f9fa66aa4ce696814bb Mon Sep 17 00:00:00 2001
From: CharlesGaydon <charles.gaydon@gmail.com>
Date: Mon, 16 Jan 2023 17:45:32 +0100
Subject: [PATCH 04/10] Do not map code for artefact to unclassified class in
 order to ignore it

---
 configs/dataset_description/20220607_151_dalles_proto.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/dataset_description/20220607_151_dalles_proto.yaml b/configs/dataset_description/20220607_151_dalles_proto.yaml
index cddb1bc1..9bf378f7 100644
--- a/configs/dataset_description/20220607_151_dalles_proto.yaml
+++ b/configs/dataset_description/20220607_151_dalles_proto.yaml
@@ -12,7 +12,7 @@ classes_to_drop: [65]
 # Reduced classification dict:
 # classification_preprocessing_dict: {3: 5, 4: 5, 64:1, 65:1, 160: 64, 161: 64, 162: 64}
 # Complete classification dict since some trash classes are leftover.
-classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 64: 1, 65: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1}
+classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 64: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1}
 
 # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
 classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above}

From 8591ff25194e8315e132c833cb833a49ded985ba Mon Sep 17 00:00:00 2001
From: CharlesGaydon <charles.gaydon@gmail.com>
Date: Tue, 31 Jan 2023 09:30:55 +0100
Subject: [PATCH 05/10] C=65 is default code to be ignored. Its class will
 remain an artefact

---
 .../preparations/fixed_num_points.yaml        | 13 +++-----
 .../preparations/points_budget.yaml           | 16 ++++-----
 ...0220204_BuildingValidation_and_Ground.yaml |  1 -
 .../20220607_151_dalles_proto.yaml            |  9 ++---
 configs/model/criterion/CrossEntropyLoss.yaml |  3 +-
 .../criterion/WeightedCrossEntropyLoss.yaml   |  1 +
 docs/source/tutorials/make_predictions.md     |  4 +--
 myria3d/models/interpolation.py               | 33 ++++++++++++-------
 myria3d/pctl/transforms/transforms.py         | 24 ++++++--------
 9 files changed, 49 insertions(+), 55 deletions(-)

diff --git a/configs/datamodule/transforms/preparations/fixed_num_points.yaml b/configs/datamodule/transforms/preparations/fixed_num_points.yaml
index 410b372f..a7534fa2 100644
--- a/configs/datamodule/transforms/preparations/fixed_num_points.yaml
+++ b/configs/datamodule/transforms/preparations/fixed_num_points.yaml
@@ -10,7 +10,6 @@ train:
 
   DropPointsByClass:
     _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
-    classes_to_drop: ${dataset_description.classes_to_drop}
 
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
@@ -29,21 +28,20 @@ train:
 
 eval:
 
+  CopyFullPos:
+    _target_: myria3d.pctl.transforms.transforms.CopyFullPos
+
   TargetTransform:
     _target_: myria3d.pctl.transforms.transforms.TargetTransform
     _args_:
       - ${dataset_description.classification_preprocessing_dict}
       - ${dataset_description.classification_dict}
 
-  DropPointsByClass:
-    _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
-    classes_to_drop: ${dataset_description.classes_to_drop}
-
   CopyFullPreparedTargets:
     _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
 
-  CopyFullPos:
-    _target_: myria3d.pctl.transforms.transforms.CopyFullPos
+  DropPointsByClass:
+    _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
 
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
@@ -71,7 +69,6 @@ predict:
 
   DropPointsByClass:
     _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
-    classes_to_drop: ${dataset_description.classes_to_drop}
 
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
diff --git a/configs/datamodule/transforms/preparations/points_budget.yaml b/configs/datamodule/transforms/preparations/points_budget.yaml
index e954aea1..f34b843d 100644
--- a/configs/datamodule/transforms/preparations/points_budget.yaml
+++ b/configs/datamodule/transforms/preparations/points_budget.yaml
@@ -10,7 +10,6 @@ train:
 
   DropPointsByClass:
     _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
-    classes_to_drop: ${dataset_description.classes_to_drop}
 
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
@@ -38,16 +37,15 @@ eval:
       - ${dataset_description.classification_preprocessing_dict}
       - ${dataset_description.classification_dict}
 
-  CopyFullPreparedTargets:
-    _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
-
   DropPointsByClass:
     _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
-    classes_to_drop: ${dataset_description.classes_to_drop}
 
   CopyFullPos:
     _target_: myria3d.pctl.transforms.transforms.CopyFullPos
 
+  CopyFullPreparedTargets:
+    _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
+
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
     _args_:
@@ -63,7 +61,6 @@ eval:
     _args_:
       - 40000
 
-  # For interpolation
   CopySampledPos:
     _target_: myria3d.pctl.transforms.transforms.CopySampledPos
 
@@ -72,12 +69,11 @@ eval:
   
 predict:
 
-  CopyFullPos:
-    _target_: myria3d.pctl.transforms.transforms.CopyFullPos
-
   DropPointsByClass:
     _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
-    classes_to_drop: ${dataset_description.classes_to_drop}
+
+  CopyFullPos:
+    _target_: myria3d.pctl.transforms.transforms.CopyFullPos
 
   GridSampling:
     _target_: torch_geometric.transforms.GridSampling
diff --git a/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml b/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml
index 6d7a68b2..8f955119 100644
--- a/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml
+++ b/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml
@@ -2,7 +2,6 @@ _convert_: all  # For omegaconf struct to be converted to python dictionnaries
 # classification_preprocessing_dict = {source_class_code_int: target_class_code_int},
 classification_preprocessing_dict: {59: 6, 50: 1}
 # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
-classes_to_drop: []
 classification_dict: {1: "unclassified", 2: "ground", 6: "building"}
 
 # Input and output dims of neural net are dataset dependant:
diff --git a/configs/dataset_description/20220607_151_dalles_proto.yaml b/configs/dataset_description/20220607_151_dalles_proto.yaml
index 9bf378f7..f1d1de24 100644
--- a/configs/dataset_description/20220607_151_dalles_proto.yaml
+++ b/configs/dataset_description/20220607_151_dalles_proto.yaml
@@ -5,13 +5,8 @@ _convert_: all  # For omegaconf struct to be converted to python dictionnaries
 # 160: antenna -> lasting_above
 # 161: wind_turbines -> lasting_above
 # 162: pylon -> lasting_above
-
-# Noise points have class 65 - this will be the defaut for inference in production.
-# Dropped points will be ignored in the inference process but still included in the final output cloud.
-classes_to_drop: [65]
-# Reduced classification dict:
-# classification_preprocessing_dict: {3: 5, 4: 5, 64:1, 65:1, 160: 64, 161: 64, 162: 64}
-# Complete classification dict since some trash classes are leftover.
+# 65: noise --> -1 (to ignore them in inference process, but tey will still be included in the final output cloud).
+# Some trash classes were left in this dataset We do not drop them (i.e. map them to -1) to avoid unintended conflict in production.
 classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 64: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1}
 
 # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
diff --git a/configs/model/criterion/CrossEntropyLoss.yaml b/configs/model/criterion/CrossEntropyLoss.yaml
index 5cd60a5b..63f22ffe 100644
--- a/configs/model/criterion/CrossEntropyLoss.yaml
+++ b/configs/model/criterion/CrossEntropyLoss.yaml
@@ -1,2 +1,3 @@
 _target_: torch.nn.CrossEntropyLoss
-label_smoothing: 0.0
\ No newline at end of file
+label_smoothing: 0.0
+ignore_index: 65  # artefacts are mapped to 65 by convention
\ No newline at end of file
diff --git a/configs/model/criterion/WeightedCrossEntropyLoss.yaml b/configs/model/criterion/WeightedCrossEntropyLoss.yaml
index 76d04362..893dc4bf 100644
--- a/configs/model/criterion/WeightedCrossEntropyLoss.yaml
+++ b/configs/model/criterion/WeightedCrossEntropyLoss.yaml
@@ -1,5 +1,6 @@
 _target_: torch.nn.CrossEntropyLoss
 label_smoothing: 0.0
+ignore_index: 65  # artefacts are mapped to 65 by convention
 weight:
   _target_: torch.FloatTensor
   _args_: 
diff --git a/docs/source/tutorials/make_predictions.md b/docs/source/tutorials/make_predictions.md
index 41abd538..e04926ea 100644
--- a/docs/source/tutorials/make_predictions.md
+++ b/docs/source/tutorials/make_predictions.md
@@ -79,6 +79,6 @@ This, however, comes with a large computation price. For instance, `predict.subt
 
 ### Ignoring artefacts points during inference
 
-Lidar acquisition may have produced artefacts points. If these points were identified with one (or several) classification code(s), they can be ignored during inference. These points will still be present in the output cloud, but will not negatively disturb model inference. Note that they will still have class probabilities, obtained from their non-artefacts closest neighboors.
+Lidar acquisition may have produced artefacts points. If these points were identified with one (or several) classification code(s), they can be ignored during inference. These points will still be present in the output cloud, but will not negatively disturb model inference. They will keep their original class in the predicted classification dim. They will have null probas and entropy.
 
-In the configuration, data transforms are used to drop these points according to the `dataset_description.classes_to_drop` parameter. By default, `dataset_description.classes_to_drop=[65]` where 65 is the defaut code used to flag artefact. Note: you may need to use quotes when overriding this parameter via CLI.
+In the configuration, data transforms are used to drop points with a class 65. By convention, 65 will flag Lidar artefacts points. Additional classes may be mapped to 65 to be ignored during inference as well, via the `dataset_description.classification_preprocessing_dict` parameter. Note: you may need to use quotes when overriding this parameter via CLI.
diff --git a/myria3d/models/interpolation.py b/myria3d/models/interpolation.py
index b5c85fca..c2394ac5 100644
--- a/myria3d/models/interpolation.py
+++ b/myria3d/models/interpolation.py
@@ -59,21 +59,23 @@ def load_full_las_for_update(self, src_las: str) -> np.ndarray:
         Args:
             filepath (str): Path to LAS for which predictions are made.
         """
-        # self.current_f = filepath
+        # We do not reset the dims we create channel.
+        # Slight risk of interaction with previous values, but it is expected that all non-artefacts values are updated.
+
         pipeline = get_pdal_reader(src_las)
         for proba_channel_to_create in self.probas_to_save:
             pipeline |= pdal.Filter.ferry(dimensions=f"=>{proba_channel_to_create}")
             pipeline |= pdal.Filter.assign(value=f"{proba_channel_to_create}=0")
 
         if self.predicted_classification_channel:
-            # Copy from Classification to preserve data type.
+            # Copy from Classification to preserve data type
+            # Also preserves values of artefacts.
             if self.predicted_classification_channel != "Classification":
                 pipeline |= pdal.Filter.ferry(dimensions=f"Classification=>{self.predicted_classification_channel}")
-            # Reset channel.
-            pipeline |= pdal.Filter.assign(value=f"{self.predicted_classification_channel}=0")
 
         if self.entropy_channel:
-            pipeline |= pdal.Filter.ferry(dimensions=f"=>{self.entropy_channel}") | pdal.Filter.assign(value=f"{self.entropy_channel}=0")
+            pipeline |= pdal.Filter.ferry(dimensions=f"=>{self.entropy_channel}")
+            pipeline |= pdal.Filter.assign(value=f"{self.entropy_channel}=0")
 
         pipeline.execute()
         return pipeline.arrays[0]
@@ -99,12 +101,14 @@ def reduce_predicted_logits(self, nb_points) -> torch.Tensor:
         del self.idx_in_full_cloud_list
 
         # We scatter_sum logits based on idx, in case there are multiple predictions for a point.
-        # scatter_sum reorders logitsbased on index,they therefore match las order.
+        # scatter_sum reorders logits based on index,they therefore match las order.
         reduced_logits = torch.zeros((nb_points, logits.size(1)))
         scatter_sum(logits, torch.from_numpy(idx_in_full_cloud), out=reduced_logits, dim=0)
         # reduced_logits contains logits ordered by their idx in original cloud !
-        # Warning : some points may not contain any predictions if they were in small areas.
-        return reduced_logits
+        # We need to select the points for which we have a prediction via idx_in_full_cloud.
+        # NB1 : some points may not contain any predictions if they were in small areas.
+
+        return reduced_logits[idx_in_full_cloud], idx_in_full_cloud
 
     @torch.no_grad()
     def reduce_predictions_and_save(self, raw_path: str, output_dir: str) -> str:
@@ -122,7 +126,7 @@ def reduce_predictions_and_save(self, raw_path: str, output_dir: str) -> str:
         basename = os.path.basename(raw_path)
         # Read number of points only from las metadata in order to minimize memory usage
         nb_points = get_pdal_info_metadata(raw_path)["count"]
-        logits = self.reduce_predicted_logits(nb_points)
+        logits, idx_in_full_cloud = self.reduce_predicted_logits(nb_points)
 
         probas = torch.nn.Softmax(dim=1)(logits)
 
@@ -137,10 +141,12 @@ def reduce_predictions_and_save(self, raw_path: str, output_dir: str) -> str:
 
         for idx, class_name in enumerate(self.classification_dict.values()):
             if class_name in self.probas_to_save:
-                las[class_name] = probas[:, idx]
+                # NB: Values for which we do not have a prediction (i.e. artefacts) get null probabilities.
+                las[class_name][idx_in_full_cloud] = probas[:, idx]
 
         if self.predicted_classification_channel:
-            las[self.predicted_classification_channel] = preds
+            # NB: Values for which we do not have a prediction (i.e. artefacts) keep their original class.
+            las[self.predicted_classification_channel][idx_in_full_cloud] = preds
             log.info(
                 f"Saving predicted classes to channel {self.predicted_classification_channel}."
                 "Channel name can be changed by setting `predict.interpolator.predicted_classification_channel`."
@@ -148,11 +154,14 @@ def reduce_predictions_and_save(self, raw_path: str, output_dir: str) -> str:
             del preds
 
         if self.entropy_channel:
-            las[self.entropy_channel] = Categorical(probs=probas).entropy()
+            # NB: Values for which we do not have a prediction (i.e. artefacts) get null entropy.
+            las[self.entropy_channel][idx_in_full_cloud] = Categorical(probs=probas).entropy()
             log.info(
                 f"Saving Shannon entropy of probabilities to channel {self.entropy_channel}."
                 "Channel name can be changed by setting `predict.interpolator.entropy_channel`"
             )
+        del idx_in_full_cloud
+
         os.makedirs(output_dir, exist_ok=True)
         out_f = os.path.join(output_dir, basename)
         out_f = os.path.abspath(out_f)
diff --git a/myria3d/pctl/transforms/transforms.py b/myria3d/pctl/transforms/transforms.py
index da7d565f..ad9f665c 100755
--- a/myria3d/pctl/transforms/transforms.py
+++ b/myria3d/pctl/transforms/transforms.py
@@ -11,6 +11,8 @@
 
 log = utils.get_logger(__name__)
 
+COMMON_CODE_FOR_ALL_ARTEFACTS = 65
+
 
 class ToTensor(BaseTransform):
     """Turn np.arrays specified by their keys into Tensor."""
@@ -214,24 +216,18 @@ def _set_preprocessing_mapper(self, classification_preprocessing_dict):
     def _set_mapper(self, classification_dict):
         """Set mapper from source classification code to consecutive integers."""
         d = {class_code: class_index for class_index, class_code in enumerate(classification_dict.keys())}
+        d.update({65: 65})  # code -1 is for artefacts and is used in DropPointsByClass.
         self.mapper = np.vectorize(lambda class_code: d.get(class_code))
 
 
 class DropPointsByClass(BaseTransform):
-    """Drop points"""
-
-    def __init__(self, classes_to_drop=None):
-        self.classes_to_drop = classes_to_drop
-        if np.isscalar(self.classes_to_drop):
-            self.classes_to_drop = [self.classes_to_drop]
-        if self.classes_to_drop:
-            self.classes_to_drop = torch.Tensor(self.classes_to_drop)
+    """Drop points with class -1 (i.e. artefacts that would have been mapped to code -1)"""
 
     def __call__(self, data):
-        if self.classes_to_drop:
-            choice = torch.logical_not(torch.isin(data.y, self.classes_to_drop))
-            data = subsample_data(data, num_nodes=data.num_nodes, choice=choice)
+        points_to_drop = torch.isin(data.y, COMMON_CODE_FOR_ALL_ARTEFACTS)
+        if points_to_drop.sum() > 0:
+            points_to_keep = torch.logical_not(points_to_drop)
+            data = subsample_data(data, num_nodes=data.num_nodes, choice=points_to_keep)
+            # Here we also subsample these idx since we do not need to interpolate these points back
+            data.idx_in_original_cloud = data.idx_in_original_cloud[points_to_keep]
         return data
-
-    def __repr__(self):
-        return "{}()".format(self.__class__.__name__)

From 4d3a0f28e9d98460e6523f959fc86863790f3e4e Mon Sep 17 00:00:00 2001
From: CharlesGaydon <charles.gaydon@gmail.com>
Date: Tue, 31 Jan 2023 09:37:43 +0100
Subject: [PATCH 06/10] Update default config

---
 .github/workflows/cicd.yaml                   |  2 +-
 docs/source/apidoc/default_config.yml         | 48 ++++++++++++-------
 package_metadata.yaml                         |  2 +-
 run.py                                        |  2 +-
 ..._Myria3DV3.1.0_predict_config_V3.3.0.yaml} | 13 +++--
 5 files changed, 42 insertions(+), 25 deletions(-)
 rename trained_model_assets/{proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml => proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml} (95%)

diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
index b00bbc76..d723cd97 100644
--- a/.github/workflows/cicd.yaml
+++ b/.github/workflows/cicd.yaml
@@ -49,7 +49,7 @@ jobs:
         myria3d
         python run.py
         --config-path /inputs/
-        --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0
+        --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0
         predict.ckpt_path=/inputs/proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt
         predict.src_las=/inputs/792000_6272000_subset_buildings.las
         predict.output_dir=/outputs/
diff --git a/docs/source/apidoc/default_config.yml b/docs/source/apidoc/default_config.yml
index dad8a66b..acee234b 100644
--- a/docs/source/apidoc/default_config.yml
+++ b/docs/source/apidoc/default_config.yml
@@ -25,16 +25,20 @@ datamodule:
           _args_:
           - ${dataset_description.classification_preprocessing_dict}
           - ${dataset_description.classification_dict}
+        DropPointsByClass:
+          _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
         GridSampling:
           _target_: torch_geometric.transforms.GridSampling
           _args_:
           - 0.25
-        FixedPoints:
-          _target_: torch_geometric.transforms.FixedPoints
+        MinimumNumNodes:
+          _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes
           _args_:
-          - 12500
-          replace: false
-          allow_duplicates: true
+          - 300
+        MaximumNumNodes:
+          _target_: myria3d.pctl.transforms.transforms.MaximumNumNodes
+          _args_:
+          - 40000
         Center:
           _target_: torch_geometric.transforms.Center
       eval:
@@ -43,37 +47,45 @@ datamodule:
           _args_:
           - ${dataset_description.classification_preprocessing_dict}
           - ${dataset_description.classification_dict}
-        CopyFullPreparedTargets:
-          _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
+        DropPointsByClass:
+          _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
         CopyFullPos:
           _target_: myria3d.pctl.transforms.transforms.CopyFullPos
+        CopyFullPreparedTargets:
+          _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
         GridSampling:
           _target_: torch_geometric.transforms.GridSampling
           _args_:
           - 0.25
-        FixedPoints:
-          _target_: torch_geometric.transforms.FixedPoints
+        MinimumNumNodes:
+          _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes
+          _args_:
+          - 300
+        MaximumNumNodes:
+          _target_: myria3d.pctl.transforms.transforms.MaximumNumNodes
           _args_:
-          - 12500
-          replace: false
-          allow_duplicates: true
+          - 40000
         CopySampledPos:
           _target_: myria3d.pctl.transforms.transforms.CopySampledPos
         Center:
           _target_: torch_geometric.transforms.Center
       predict:
+        DropPointsByClass:
+          _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
         CopyFullPos:
           _target_: myria3d.pctl.transforms.transforms.CopyFullPos
         GridSampling:
           _target_: torch_geometric.transforms.GridSampling
           _args_:
           - 0.25
-        FixedPoints:
-          _target_: torch_geometric.transforms.FixedPoints
+        MinimumNumNodes:
+          _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes
+          _args_:
+          - 300
+        MaximumNumNodes:
+          _target_: myria3d.pctl.transforms.transforms.MaximumNumNodes
           _args_:
-          - 12500
-          replace: false
-          allow_duplicates: true
+          - 40000
         CopySampledPos:
           _target_: myria3d.pctl.transforms.transforms.CopySampledPos
         Center:
@@ -137,7 +149,6 @@ dataset_description:
     57: 1
     58: 1
     64: 1
-    65: 1
     66: 1
     67: 1
     77: 1
@@ -208,6 +219,7 @@ model:
   criterion:
     _target_: torch.nn.CrossEntropyLoss
     label_smoothing: 0.0
+    ignore_index: 65
   _target_: myria3d.models.model.Model
   d_in: ${dataset_description.d_in}
   num_classes: ${dataset_description.num_classes}
diff --git a/package_metadata.yaml b/package_metadata.yaml
index c9074a07..ed4ff614 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.2.5"
+__version__: "3.3.0"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Multiclass Semantic Segmentation for Lidar Point Cloud"
diff --git a/run.py b/run.py
index 3a8791fc..af1bdaf3 100755
--- a/run.py
+++ b/run.py
@@ -20,7 +20,7 @@
 
 TASK_NAME_DETECTION_STRING = "task.task_name="
 DEFAULT_DIRECTORY = "trained_model_assets/"
-DEFAULT_CONFIG_FILE = "proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml"
+DEFAULT_CONFIG_FILE = "proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml"
 DEFAULT_CHECKPOINT = "proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt"
 DEFAULT_ENV = "placeholder.env"
 
diff --git a/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml b/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml
similarity index 95%
rename from trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml
rename to trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml
index fbc32d7a..e89f39b1 100644
--- a/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml
+++ b/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml
@@ -28,6 +28,8 @@ datamodule:
           _args_:
           - ${dataset_description.classification_preprocessing_dict}
           - ${dataset_description.classification_dict}
+        DropPointsByClass:
+          _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
         GridSampling:
           _target_: torch_geometric.transforms.GridSampling
           _args_:
@@ -48,10 +50,12 @@ datamodule:
           _args_:
           - ${dataset_description.classification_preprocessing_dict}
           - ${dataset_description.classification_dict}
-        CopyFullPreparedTargets:
-          _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
+        DropPointsByClass:
+          _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
         CopyFullPos:
           _target_: myria3d.pctl.transforms.transforms.CopyFullPos
+        CopyFullPreparedTargets:
+          _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
         GridSampling:
           _target_: torch_geometric.transforms.GridSampling
           _args_:
@@ -69,6 +73,8 @@ datamodule:
         Center:
           _target_: torch_geometric.transforms.Center
       predict:
+        DropPointsByClass:
+          _target_: myria3d.pctl.transforms.transforms.DropPointsByClass
         CopyFullPos:
           _target_: myria3d.pctl.transforms.transforms.CopyFullPos
         GridSampling:
@@ -156,7 +162,6 @@ dataset_description:
     57: 1
     58: 1
     64: 1
-    65: 1
     66: 1
     67: 1
     77: 1
@@ -270,6 +275,6 @@ predict:
     _target_: myria3d.models.interpolation.Interpolator
     interpolation_k: 10
     classification_dict: ${dataset_description.classification_dict}
-    probas_to_save: [building,ground,vegetation,unclassified]
+    probas_to_save: [building,ground]
     predicted_classification_channel: confidence
     entropy_channel: entropy

From ed26538ae9f8c4dfc2c3cf1e0614adf913906afb Mon Sep 17 00:00:00 2001
From: CharlesGaydon <charles.gaydon@gmail.com>
Date: Tue, 31 Jan 2023 09:41:23 +0100
Subject: [PATCH 07/10] Update default config call in cicd

---
 .github/workflows/cicd.yaml | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
index d723cd97..a2b0e179 100644
--- a/.github/workflows/cicd.yaml
+++ b/.github/workflows/cicd.yaml
@@ -36,42 +36,39 @@ jobs:
         pytest -rA -v
         --ignore=actions-runner
 
-
-    # Always run with --ipc=host and --shm-size=2gb (at least) to enable sufficient shared memory when predicting on large data
-    # predict.subtile_overlap specifies overlap between adjacent samples (in meters).
-    - name: Example inference run via Docker with inference-time subtiles overlap to smooth-out results.
+    # IMPORTANT: Always run images with --ipc=host and --shm-size=2gb (at least) to enable 
+    # sufficient shared memory when predicting on large files.
+    - name: Example inference run via Docker with default config and checkpoint
       run: >
         docker run
-        -v /var/data/cicd/CICD_github_assets/myria3d_V3.2.0/inputs/:/inputs/
-        -v /var/data/cicd/CICD_github_assets/myria3d_V3.2.0/outputs/:/outputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/inputs/:/inputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/outputs/:/outputs/
         --ipc=host
         --shm-size=2gb
         myria3d
         python run.py
-        --config-path /inputs/
-        --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0
-        predict.ckpt_path=/inputs/proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt
         predict.src_las=/inputs/792000_6272000_subset_buildings.las
         predict.output_dir=/outputs/
-        predict.subtile_overlap=25
-        datamodule.batch_size=10
-        predict.interpolator.probas_to_save=[building,unclassified]
         task.task_name=predict
 
-    - name: Example inference run via Docker with default config and checkpoint
+    # predict.subtile_overlap specifies overlap between adjacent samples (in meters).
+    - name: Example inference run via Docker with inference-time subtiles overlap to smooth-out results.
       run: >
         docker run
-        -v /var/data/cicd/CICD_github_assets/myria3d_V3.2.0/inputs/:/inputs/
-        -v /var/data/cicd/CICD_github_assets/myria3d_V3.2.0/outputs/:/outputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/inputs/:/inputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/outputs/:/outputs/
         --ipc=host
         --shm-size=2gb
         myria3d
         python run.py
+        --config-path /inputs/
+        --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0
+        predict.ckpt_path=/inputs/proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt
         predict.src_las=/inputs/792000_6272000_subset_buildings.las
         predict.output_dir=/outputs/
         predict.subtile_overlap=25
         datamodule.batch_size=10
-        predict.interpolator.probas_to_save=[building,unclassified]
+        predict.interpolator.probas_to_save=[building,ground]
         task.task_name=predict
 
     - name: Check code neatness (linter)

From 3bbc77b5a7bc246c0e20c34e80f666b8a2efc61d Mon Sep 17 00:00:00 2001
From: CharlesGaydon <charles.gaydon@gmail.com>
Date: Tue, 31 Jan 2023 09:44:51 +0100
Subject: [PATCH 08/10] Update unit test for DropPointsByClass

---
 myria3d/pctl/transforms/transforms.py |  3 ++-
 tests/myria3d/data/test_transforms.py | 12 +-----------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/myria3d/pctl/transforms/transforms.py b/myria3d/pctl/transforms/transforms.py
index ad9f665c..e3b6db70 100755
--- a/myria3d/pctl/transforms/transforms.py
+++ b/myria3d/pctl/transforms/transforms.py
@@ -229,5 +229,6 @@ def __call__(self, data):
             points_to_keep = torch.logical_not(points_to_drop)
             data = subsample_data(data, num_nodes=data.num_nodes, choice=points_to_keep)
             # Here we also subsample these idx since we do not need to interpolate these points back
-            data.idx_in_original_cloud = data.idx_in_original_cloud[points_to_keep]
+            if "idx_in_original_cloud" in data:
+                data.idx_in_original_cloud = data.idx_in_original_cloud[points_to_keep]
         return data
diff --git a/tests/myria3d/data/test_transforms.py b/tests/myria3d/data/test_transforms.py
index f3779907..988444a5 100644
--- a/tests/myria3d/data/test_transforms.py
+++ b/tests/myria3d/data/test_transforms.py
@@ -35,7 +35,7 @@ def test_DropPointsByClass():
     y = torch.Tensor([1, 65, 65, 2, 65])
     x = torch.rand((5, 3))
     data = torch_geometric.data.Data(x=x, y=y)
-    drop_transforms = DropPointsByClass([65])
+    drop_transforms = DropPointsByClass()
     transformed_data = drop_transforms(data)
     assert torch.equal(transformed_data.y, torch.Tensor([1, 2]))
     assert transformed_data.x.size(0) == 2
@@ -47,13 +47,3 @@ def test_DropPointsByClass():
     transformed_data = drop_transforms(data)
     assert torch.equal(data.x, transformed_data.x)
     assert torch.equal(data.y, transformed_data.y)
-
-
-def test_DropPointsByClass_creation():
-    scalar = 42
-    a = DropPointsByClass(scalar)
-    b = DropPointsByClass([scalar])
-    assert torch.equal(a.classes_to_drop, b.classes_to_drop)
-
-    c = DropPointsByClass(None)
-    assert c.classes_to_drop is None

From 9d6fa0c2b45ff24e8b71cdeac00b7f63bca2267f Mon Sep 17 00:00:00 2001
From: CharlesGaydon <charles.gaydon@gmail.com>
Date: Tue, 7 Feb 2023 06:44:02 +0100
Subject: [PATCH 09/10] Change typing of reduce_predicted_logits

---
 myria3d/models/interpolation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/myria3d/models/interpolation.py b/myria3d/models/interpolation.py
index c2394ac5..c526cf41 100644
--- a/myria3d/models/interpolation.py
+++ b/myria3d/models/interpolation.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import Dict, List, Literal, Optional, Union
+from typing import Dict, List, Literal, Optional, Tuple, Union
 
 import numpy as np
 import pdal
@@ -86,7 +86,7 @@ def store_predictions(self, logits, idx_in_original_cloud) -> None:
         self.idx_in_full_cloud_list += idx_in_original_cloud
 
     @torch.no_grad()
-    def reduce_predicted_logits(self, nb_points) -> torch.Tensor:
+    def reduce_predicted_logits(self, nb_points) -> Tuple[torch.Tensor, np.ndarray]:
         """Interpolate logits to points without predictions using an inverse-distance weightning scheme.
 
         Returns:

From 51e5815441ebb17a04ccfac9378e2168e6236161 Mon Sep 17 00:00:00 2001
From: CharlesGaydon <charles.gaydon@gmail.com>
Date: Tue, 7 Feb 2023 07:06:24 +0100
Subject: [PATCH 10/10] Update documentation for making inference

---
 docs/source/guides/development.md         | 15 ++++++++-------
 docs/source/tutorials/make_predictions.md |  8 ++++----
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/docs/source/guides/development.md b/docs/source/guides/development.md
index 9b619659..022d730d 100644
--- a/docs/source/guides/development.md
+++ b/docs/source/guides/development.md
@@ -2,9 +2,9 @@
 
 ## Code versionning
 
-Package version follows semantic versionning conventions and is defined in `setup.py`. 
+Package version follows semantic versionning conventions and is defined in `package_metadata.yaml`. 
 
-Releases are generated when new high-level functionnality are implemented (e.g. a new step in the production process), with a documentation role. Production-ready code is fast-forwarded in the `prod` branch when needed to match the `main` branch. When updating the `prod` branch, one should move the tag `prod-release-tag` alongside to the [related release](https://github.com/IGNF/myria3d/releases/tag/prod-release-tag).
+Releases are created when new high-level functionnality are implemented (e.g. a new step in the production process), with a documentation role. A `prod-release-tag` is created that tracks an _arbitrary_ commit, and serves as a mean to make a few models, model card, and config accessible via its associated [release](https://github.com/IGNF/myria3d/releases/tag/prod-release-tag).
 
 ## Tests
 
@@ -17,15 +17,16 @@ python -m pytest -rA -v
 
 ## Continuous Integration (CI)
 
-New features are developped in ad-hoc branches (e.g. `dev-Vx.y.z-Feature-Name`).
+New features are developped in ad-hoc branches (e.g. `2023MMDD-Feature-Name`).
 
-CI tests are run for pull request to merge on either the `main` branches, and on pushes to `main`, and `prod` branches. The CI workflow builds a docker image, runs linting, and tests the code.
+CI tests are run for push and pull request on the `main` branche. The workflow builds a docker image, runs linting, and tests the code.
 
 ## Continuous Delivery (CD)
 
-When the event is a push and not a merge request, this means that there was either a direct push to `main`|`prod` or that a merge request was accepted. In this case, if the CI workflow passes (i.e. tests pass and code is PEP8 compliant), the created docker image is tagged with the branch name, resulting in e.g. a `myria3d:prod` image that is up to date with the branch content. 
-See [../tutorials/use.md] for how to leverage such image to run the app.
+In case of push / accepted merge to the `main` branch, and if the CI workflow is successful (i.e. docker build is complete, tests pass, and code is PEP8 compliant), a docker image is pushed to an in-house Nexus image repository.
 
-Additionnaly, pushes on the `main` branch build this library documentation, which is hosted on Github pages.
+Additionnaly, images may be built for feature branches, for further testings / staging. Details are in workflow `cicd.yaml`.
 
+See [../tutorials/use.md] for how to leverage such image to run the app.
 
+Additionnaly, pushes on the `main` branch build this library documentation, which is hosted on Github pages.
diff --git a/docs/source/tutorials/make_predictions.md b/docs/source/tutorials/make_predictions.md
index e04926ea..8341fd5c 100644
--- a/docs/source/tutorials/make_predictions.md
+++ b/docs/source/tutorials/make_predictions.md
@@ -1,15 +1,15 @@
 # Performing inference on new data
 
-Refer to [this tutorial](./install_on_linux.md) for how to setup a virtual environment and install the library.
+Refer to the tutorials ([Linux](./install_on_linux.md), [Windows](./install_on_wsl2.md)) for installation instructions.
 
 To run inference, you will need:
 - A source cloud point in LAS format on which to infer classes and probabilites. Sample data from the French "Lidar HD" project can be downloaded at [this address](https://geoservices.ign.fr/lidarhd).
 - A checkpoint of a trained lightning module implementing model logic (class `myria3d.models.model.Model`)
 - A minimal yaml configuration specifying parameters. We use [hydra](https://hydra.cc/) to manage configurations, and this yaml results from the model training. The `datamodule` and `model` parameters groups must match dataset characteristics and model training settings.  The `predict` parameters group specifies path to models and data as well as batch size (N=50 works well, the larger the faster) and use of gpu (optionnal). For hints on what to modify, see the `experiment/predict.yaml` file.
 
-## Run inference from installed package
+> **A default model and its configuration are embedded directly in code under folder `trained_model_assets`.** They are expected to always be compatible with the code base, and updated as needed in case of e.g. change of configuration format or model implementation.
 
-From the package root, run `pip install -e .` to install the package locally and freeze its current version.
+## Run inference from source
 
 Then, fill out the {missing parameters} below and run: 
 
@@ -56,7 +56,7 @@ docker run \
 --ipc=host \
 --gpus=all \
 --shm-size=2gb \
-myria3d.predict {...config paths & options...}
+python run.py {...config paths & options...}
 ```
 
 ## Additional options for prediction