From da1ad291ea23374a3b4c967c6239d786ba10a068 Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Mon, 16 Jan 2023 12:17:55 +0100 Subject: [PATCH 01/10] Points Budget is the new defaut config --- configs/config.yaml | 2 +- configs/datamodule/transforms/default.yaml | 2 +- .../{default.yaml => fixed_num_points.yaml} | 0 ...om_subsampling.yaml => points_budget.yaml} | 0 ...yG-Overfit.yaml => RandLaNet-Overfit.yaml} | 2 +- .../RandLaNet-PyG-Overfit-NoRS.yaml | 25 ------------------- ...ml => RandLaNet_base_run_FR-MultiGPU.yaml} | 2 +- ...-SQRTOfInverseFreqencyClassWeighting.yaml} | 5 ++-- ...t_NoRS.yaml => RandLaNet_base_run_FR.yaml} | 8 +++--- docs/source/guides/train_new_model.md | 10 ++++---- 10 files changed, 16 insertions(+), 40 deletions(-) rename configs/datamodule/transforms/preparations/{default.yaml => fixed_num_points.yaml} (100%) rename configs/datamodule/transforms/preparations/{no_random_subsampling.yaml => points_budget.yaml} (100%) rename configs/experiment/{RandLaNet-PyG-Overfit.yaml => RandLaNet-Overfit.yaml} (94%) delete mode 100755 configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml rename configs/experiment/{RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml => RandLaNet_base_run_FR-MultiGPU.yaml} (84%) rename configs/experiment/{RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW.yaml => RandLaNet_base_run_FR-SQRTOfInverseFreqencyClassWeighting.yaml} (52%) rename configs/experiment/{RandLaNet_base_run_FR_pyg_randla_net_NoRS.yaml => RandLaNet_base_run_FR.yaml} (57%) diff --git a/configs/config.yaml b/configs/config.yaml index 134841b2..80463192 100755 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -29,7 +29,7 @@ defaults: - datamodule: hdf5_datamodule.yaml - dataset_description: 20220607_151_dalles_proto.yaml # describes input features and classes - callbacks: default.yaml # set this to null if you don't want to use callbacks - - model: pyg_randla_net_model.yaml # other option is pyg_randla_net_model + - model: pyg_randla_net_model.yaml - logger: comet # set logger here or use command line (e.g. `python run.py logger=wandb`) - task: default.yaml diff --git a/configs/datamodule/transforms/default.yaml b/configs/datamodule/transforms/default.yaml index c5f2e980..59605c5c 100644 --- a/configs/datamodule/transforms/default.yaml +++ b/configs/datamodule/transforms/default.yaml @@ -1,5 +1,5 @@ defaults: - - preparations: default.yaml + - preparations: points_budget.yaml - augmentations: none.yaml - normalizations: default.yaml diff --git a/configs/datamodule/transforms/preparations/default.yaml b/configs/datamodule/transforms/preparations/fixed_num_points.yaml similarity index 100% rename from configs/datamodule/transforms/preparations/default.yaml rename to configs/datamodule/transforms/preparations/fixed_num_points.yaml diff --git a/configs/datamodule/transforms/preparations/no_random_subsampling.yaml b/configs/datamodule/transforms/preparations/points_budget.yaml similarity index 100% rename from configs/datamodule/transforms/preparations/no_random_subsampling.yaml rename to configs/datamodule/transforms/preparations/points_budget.yaml diff --git a/configs/experiment/RandLaNet-PyG-Overfit.yaml b/configs/experiment/RandLaNet-Overfit.yaml similarity index 94% rename from configs/experiment/RandLaNet-PyG-Overfit.yaml rename to configs/experiment/RandLaNet-Overfit.yaml index e017fbac..384b1667 100755 --- a/configs/experiment/RandLaNet-PyG-Overfit.yaml +++ b/configs/experiment/RandLaNet-Overfit.yaml @@ -8,7 +8,7 @@ defaults: logger: comet: - experiment_name: "RandLaNetOverfit" + experiment_name: "RandLaNet-Overfit" trainer: min_epochs: 100 diff --git a/configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml b/configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml deleted file mode 100755 index 5d298370..00000000 --- a/configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# @package _global_ - -# Nota : call "python myria3d/pctl/dataset/toy_dataset.py" to create a toy dataset before running this. -defaults: - - override /model: pyg_randla_net_model.yaml - - override /datamodule/transforms/preparations: no_random_subsampling.yaml - - -logger: - comet: - experiment_name: "RandLaNetOverfit" - -trainer: - min_epochs: 100 - max_epochs: 100 - overfit_batches: 1 - num_sanity_val_steps: 0 - -datamodule: - batch_size: 12 - num_workers: 2 - # runtime.cwd is where application is run from e.g. where run.py is. - data_dir: "${hydra:runtime.cwd}/tests/data/" - split_csv_path: "${hydra:runtime.cwd}/tests/data/toy_dataset_src/toy_dataset_split.csv" - hdf5_file_path: "${hydra:runtime.cwd}/tests/data/toy_dataset.hdf5" diff --git a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml b/configs/experiment/RandLaNet_base_run_FR-MultiGPU.yaml similarity index 84% rename from configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml rename to configs/experiment/RandLaNet_base_run_FR-MultiGPU.yaml index 20cffcf5..5a9e8727 100755 --- a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml +++ b/configs/experiment/RandLaNet_base_run_FR-MultiGPU.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - RandLaNet_base_run_FR_pyg_randla_net.yaml + - RandLaNet_base_run_FR.yaml logger: comet: diff --git a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW.yaml b/configs/experiment/RandLaNet_base_run_FR-SQRTOfInverseFreqencyClassWeighting.yaml similarity index 52% rename from configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW.yaml rename to configs/experiment/RandLaNet_base_run_FR-SQRTOfInverseFreqencyClassWeighting.yaml index 9bd08ddb..131ce2b1 100755 --- a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW.yaml +++ b/configs/experiment/RandLaNet_base_run_FR-SQRTOfInverseFreqencyClassWeighting.yaml @@ -1,11 +1,12 @@ # @package _global_ defaults: - - RandLaNet_base_run_FR_pyg_randla_net.yaml + - RandLaNet_base_run_FR.yaml - override /model/criterion: WeightedCrossEntropyLoss.yaml logger: comet: - experiment_name: "RandLaNet_base_run_FR_pyg_randla_net-SQRT-ICFW" + experiment_name: "RandLaNet_base_run_FR-SQRT-ICFW" dataset_description: + # Sqrt(Inverse Frequency) of classes in defaut dataset (a.k.a. `151proto`). class_weights: [0.19,0.08,0.08,0.36,1.13,3.11,2.05] \ No newline at end of file diff --git a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net_NoRS.yaml b/configs/experiment/RandLaNet_base_run_FR.yaml similarity index 57% rename from configs/experiment/RandLaNet_base_run_FR_pyg_randla_net_NoRS.yaml rename to configs/experiment/RandLaNet_base_run_FR.yaml index 0ec696e5..7799ed7d 100755 --- a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net_NoRS.yaml +++ b/configs/experiment/RandLaNet_base_run_FR.yaml @@ -1,11 +1,11 @@ # @package _global_ defaults: - - RandLaNet_base_run_FR_pyg_randla_net.yaml - - override /datamodule/transforms/preparations: no_random_subsampling.yaml + - RandLaNet_base_run_FR.yaml + - override /datamodule/transforms/augmentations: light.yaml logger: comet: - experiment_name: "RandLaNet_base_run_FR_pyg_randla_net_NoRS-(BS10xMAX40000pts)" + experiment_name: "RandLaNet_base_run_FR-(BatchSize10xBudget(300pts-40000pts))" # Smaller BS : 10 x 40 000 (max) == 400 000 pts i.e. previous budget of 32 x 12 500pts. @@ -16,5 +16,5 @@ trainer: num_sanity_val_steps: 2 min_epochs: 100 max_epochs: 150 - accumulate_grad_batches: 3 # b/c larger clouds will not fit in memory with original BS. + accumulate_grad_batches: 3 # b/c larger clouds will not fit in memory with original Batch Size # gpus: [1] diff --git a/docs/source/guides/train_new_model.md b/docs/source/guides/train_new_model.md index 12fa7cea..64bcead0 100644 --- a/docs/source/guides/train_new_model.md +++ b/docs/source/guides/train_new_model.md @@ -17,18 +17,18 @@ To test your setup and logging capabilities, you can try overfitting on a single To overfit on a single batch for 30 epochs, run: ```bash -python run.py experiment=RandLaNetDebug +python run.py experiment=RandLaNet-Overfit ``` ## Training -Define your experiment hyperparameters in an experiment file in the `configs/experiment` folder. You may stem from one of the provided experiment file (e.g. `RandLaNet_base_run_FR_pyg_randla_net.yaml`). In particular, you will need to define `dataset_description` to specify your classification task - see config `20220607_151_dalles_proto.yaml` for an example. +Define your experiment hyperparameters in an experiment file in the `configs/experiment` folder. You may stem from one of the provided experiment file (e.g. `RandLaNet_base_run_FR.yaml`). In particular, you will need to define `dataset_description` to specify your classification task - see config `20220607_151_dalles_proto.yaml` for an example. To run the full training and validation for French Lidar HD, run: ```bash -python run.py experiment=RandLaNet_base_run_FR_pyg_randla_net +python run.py experiment=RandLaNet_base_run_FR ``` After training, you model best checkpoints and hydra config will be saved in a `DATE/TIME/` subfolder of the `LOGS_DIR` you specified, with an associated hydra `config.yaml`. @@ -40,7 +40,7 @@ You can perfom this automatically before training by setting `trainer.auto_lr_fi ### Multi-GPUs -Multi-GPUs training is supported. Refer to e.g. experiment file `RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml` for pytorch lightning flags to activate it. +Multi-GPUs training is supported. Refer to e.g. experiment file `RandLaNet_base_run_FR-MultiGPU.yaml` for pytorch lightning flags to activate it. Multi-GPUs training effectively reduces training time by the number of GPUs used. Batch size might need to be reduced to keep a constant number of steps per epoch in DDP. ## Testing the model @@ -57,7 +57,7 @@ task.task_name="test" \ model.ckpt_path={/path/to/checkpoint.ckpt} \ trainer.gpus={0 for none, [i] to use GPU number i} \ ``` -ARguments `config-path` and `config-name` means you are using the saved configuration from your training, which contains the path to the prepared HDF5 dataset. +Arguments `config-path` and `config-name` means you are using the saved configuration from your training, which contains the path to the prepared HDF5 dataset. If you are using defaut configurations, you can call test using a custom experiment: From f1e5f73ed58711a5af9590bc5d6d94098cb9c8de Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Mon, 16 Jan 2023 14:35:52 +0100 Subject: [PATCH 02/10] Implement DropPointsByClass as well as its tests - make it the defaut behavior Tests pass for DropPointsByClass Copy full position BEFORE droping by class to avoid error at knn_interpolate More test for DropPointsByClass creation Flake8 --- .../preparations/fixed_num_points.yaml | 15 +++++ .../preparations/points_budget.yaml | 15 +++++ ...0220204_BuildingValidation_and_Ground.yaml | 1 + .../20220607_151_dalles_proto.yaml | 8 ++- docs/source/tutorials/make_predictions.md | 8 ++- docs/source/tutorials/prepare_dataset.md | 13 +++-- myria3d/pctl/transforms/transforms.py | 56 +++++++++---------- tests/myria3d/data/test_transforms.py | 36 ++++++++++-- 8 files changed, 112 insertions(+), 40 deletions(-) diff --git a/configs/datamodule/transforms/preparations/fixed_num_points.yaml b/configs/datamodule/transforms/preparations/fixed_num_points.yaml index 94d0709c..410b372f 100644 --- a/configs/datamodule/transforms/preparations/fixed_num_points.yaml +++ b/configs/datamodule/transforms/preparations/fixed_num_points.yaml @@ -1,12 +1,17 @@ # default preparations with grid sampling and random sampling. train: + TargetTransform: _target_: myria3d.pctl.transforms.transforms.TargetTransform _args_: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass + classes_to_drop: ${dataset_description.classes_to_drop} + GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: @@ -23,12 +28,17 @@ train: _target_: torch_geometric.transforms.Center eval: + TargetTransform: _target_: myria3d.pctl.transforms.transforms.TargetTransform _args_: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass + classes_to_drop: ${dataset_description.classes_to_drop} + CopyFullPreparedTargets: _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets @@ -55,9 +65,14 @@ eval: _target_: torch_geometric.transforms.Center predict: + CopyFullPos: _target_: myria3d.pctl.transforms.transforms.CopyFullPos + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass + classes_to_drop: ${dataset_description.classes_to_drop} + GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: diff --git a/configs/datamodule/transforms/preparations/points_budget.yaml b/configs/datamodule/transforms/preparations/points_budget.yaml index 88b3ccbe..e954aea1 100644 --- a/configs/datamodule/transforms/preparations/points_budget.yaml +++ b/configs/datamodule/transforms/preparations/points_budget.yaml @@ -1,12 +1,17 @@ # default preparations with grid sampling and random sampling. train: + TargetTransform: _target_: myria3d.pctl.transforms.transforms.TargetTransform _args_: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass + classes_to_drop: ${dataset_description.classes_to_drop} + GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: @@ -26,6 +31,7 @@ train: _target_: torch_geometric.transforms.Center eval: + TargetTransform: _target_: myria3d.pctl.transforms.transforms.TargetTransform _args_: @@ -35,6 +41,10 @@ eval: CopyFullPreparedTargets: _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass + classes_to_drop: ${dataset_description.classes_to_drop} + CopyFullPos: _target_: myria3d.pctl.transforms.transforms.CopyFullPos @@ -61,9 +71,14 @@ eval: _target_: torch_geometric.transforms.Center predict: + CopyFullPos: _target_: myria3d.pctl.transforms.transforms.CopyFullPos + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass + classes_to_drop: ${dataset_description.classes_to_drop} + GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: diff --git a/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml b/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml index 8f955119..6d7a68b2 100644 --- a/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml +++ b/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml @@ -2,6 +2,7 @@ _convert_: all # For omegaconf struct to be converted to python dictionnaries # classification_preprocessing_dict = {source_class_code_int: target_class_code_int}, classification_preprocessing_dict: {59: 6, 50: 1} # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order). +classes_to_drop: [] classification_dict: {1: "unclassified", 2: "ground", 6: "building"} # Input and output dims of neural net are dataset dependant: diff --git a/configs/dataset_description/20220607_151_dalles_proto.yaml b/configs/dataset_description/20220607_151_dalles_proto.yaml index ee9f5704..cddb1bc1 100644 --- a/configs/dataset_description/20220607_151_dalles_proto.yaml +++ b/configs/dataset_description/20220607_151_dalles_proto.yaml @@ -6,10 +6,12 @@ _convert_: all # For omegaconf struct to be converted to python dictionnaries # 161: wind_turbines -> lasting_above # 162: pylon -> lasting_above -# Expectded classification dict: +# Noise points have class 65 - this will be the defaut for inference in production. +# Dropped points will be ignored in the inference process but still included in the final output cloud. +classes_to_drop: [65] +# Reduced classification dict: # classification_preprocessing_dict: {3: 5, 4: 5, 64:1, 65:1, 160: 64, 161: 64, 162: 64} - -# Additionnaly, artefacts as well as synthetic points (65, 66) are set to "unclassified" +# Complete classification dict since some trash classes are leftover. classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 64: 1, 65: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1} # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order). diff --git a/docs/source/tutorials/make_predictions.md b/docs/source/tutorials/make_predictions.md index a1218e74..41abd538 100644 --- a/docs/source/tutorials/make_predictions.md +++ b/docs/source/tutorials/make_predictions.md @@ -75,4 +75,10 @@ One can control for which classes to save the probabilities. This is achieved wi To improve spatial regularity of the predicted probabilities, one can make inference on square receptive fields that have a non-null overlap with each other. This has the effect of smoothing out irregular predictions. The resulting classification is better looking, with more homogeneous predictions at the object level. To define an overlap between successive 50m*50m receptive fields, set `predict.subtile_overlap={value}`. -This, however, comes with a large computation price. For instance, `predict.subtile_overlap=25` means a 25m overlap on both x and y axes, which multiplies inference time by a factor of 4. \ No newline at end of file +This, however, comes with a large computation price. For instance, `predict.subtile_overlap=25` means a 25m overlap on both x and y axes, which multiplies inference time by a factor of 4. + +### Ignoring artefacts points during inference + +Lidar acquisition may have produced artefacts points. If these points were identified with one (or several) classification code(s), they can be ignored during inference. These points will still be present in the output cloud, but will not negatively disturb model inference. Note that they will still have class probabilities, obtained from their non-artefacts closest neighboors. + +In the configuration, data transforms are used to drop these points according to the `dataset_description.classes_to_drop` parameter. By default, `dataset_description.classes_to_drop=[65]` where 65 is the defaut code used to flag artefact. Note: you may need to use quotes when overriding this parameter via CLI. diff --git a/docs/source/tutorials/prepare_dataset.md b/docs/source/tutorials/prepare_dataset.md index efe56286..c9646672 100644 --- a/docs/source/tutorials/prepare_dataset.md +++ b/docs/source/tutorials/prepare_dataset.md @@ -6,10 +6,16 @@ The loading function is dataset dependant, and is `lidar_hd_pre_transform` by de It is adapted to the French Lidar HD data provided by IGN (see [the official page](https://geoservices.ign.fr/lidarhd) - link in French). Return number and color information (RGBI) are scaled to 0-1 interval, a NDVI and an average color ((R+G+B)/3) dimension are created, and points that may be occluded (as indicated by higher return number) have their color set to 0. -You may want to implement your own logic (e.g. with custom, additional features) in directory `points_pre_transform`. It then needs to be referenced similarly to `lidar_hd_pre_transform`. +You may want to implement your own logic (e.g. with custom, additional features) in directory `points_pre_transform`. It then needs to be referenced similarly to `lidar_hd_pre_transform`. +If you use your own classification convention , you will need to create a `dataset_description` configuration (for an example see `configs/dataset_description/20220607_151_dalles_proto.yaml`). -## Using your own data +Additionnaly, you can control cloud sampling parameters via two configurations: +- `configs/datamodule/transforms/preparations/points_budget.yaml`: (defaut) allows variable cloud size within lower and higher boundaries. +- `configs/datamodule/transforms/preparations/fixed_num_points.yaml`: (alternative) samples all clouds to a fixed size, allowing for duplicated points. + + +## Preparing the dataset Input point clouds need to be splitted in subtiles that can be digested by segmentation models. We found that a receptive field of 50m*50m was a good balance between context and memory intensity. For faster training, this split can be done once, to avoid loading large file in memory multiple times. @@ -21,8 +27,7 @@ These will be composed into a single file dataset for which you can specify a pa Once this is done, you do not need sources anymore, and simply specifying the path to the HDF5 dataset is enough. - -It's also possible to create the hdf5 file without a whole training, just fill the `datamodule.hdf5_file_path` parameter as before to specify the file path, but use `task=create_hdf5` instead of `task=fit`. +It's also possible to create the hdf5 file without training any model: just fill the `datamodule.hdf5_file_path` parameter as before to specify the file path, but use `task=create_hdf5` instead of `task=fit`. ## Getting started quickly with a toy dataset diff --git a/myria3d/pctl/transforms/transforms.py b/myria3d/pctl/transforms/transforms.py index 46613b63..da7d565f 100755 --- a/myria3d/pctl/transforms/transforms.py +++ b/myria3d/pctl/transforms/transforms.py @@ -26,16 +26,13 @@ def __call__(self, data: Data): def subsample_data(data, num_nodes, choice): + # TODO: get num_nodes from data.num_nodes instead to simplify signature for key, item in data: if key == "num_nodes": data.num_nodes = choice.size(0) elif bool(re.search("edge", key)): continue - elif ( - torch.is_tensor(item) - and item.size(0) == num_nodes - and item.size(0) != 1 - ): + elif torch.is_tensor(item) and item.size(0) == num_nodes and item.size(0) != 1: data[key] = item[choice] return data @@ -67,10 +64,7 @@ def __call__(self, data): return data choice = torch.cat( - [ - torch.randperm(num_nodes) - for _ in range(math.ceil(self.num / num_nodes)) - ], + [torch.randperm(num_nodes) for _ in range(math.ceil(self.num / num_nodes))], dim=0, )[: self.num] @@ -124,9 +118,7 @@ def __call__(self, data: Data): data.x[:, idx] = self.standardize_channel(data.x[:, idx]) return data - def standardize_channel( - self, channel_data: torch.Tensor, clamp_sigma: int = 3 - ): + def standardize_channel(self, channel_data: torch.Tensor, clamp_sigma: int = 3): """Sample-wise standardization y* = (y-y_mean)/y_std. clamping to ignore large values.""" mean = channel_data.mean() std = channel_data.std() + 10**-6 @@ -189,9 +181,7 @@ def __init__( # Set to attribute to log potential type errors self.classification_dict = classification_dict - self.classification_preprocessing_dict = ( - classification_preprocessing_dict - ) + self.classification_preprocessing_dict = classification_preprocessing_dict def __call__(self, data: Data): data.y = self.transform(data.y) @@ -218,20 +208,30 @@ def transform(self, y): def _set_preprocessing_mapper(self, classification_preprocessing_dict): """Set mapper from source classification code to another code.""" - d = { - key: value - for key, value in classification_preprocessing_dict.items() - } - self.preprocessing_mapper = np.vectorize( - lambda class_code: d.get(class_code, class_code) - ) + d = {key: value for key, value in classification_preprocessing_dict.items()} + self.preprocessing_mapper = np.vectorize(lambda class_code: d.get(class_code, class_code)) def _set_mapper(self, classification_dict): """Set mapper from source classification code to consecutive integers.""" - d = { - class_code: class_index - for class_index, class_code in enumerate( - classification_dict.keys() - ) - } + d = {class_code: class_index for class_index, class_code in enumerate(classification_dict.keys())} self.mapper = np.vectorize(lambda class_code: d.get(class_code)) + + +class DropPointsByClass(BaseTransform): + """Drop points""" + + def __init__(self, classes_to_drop=None): + self.classes_to_drop = classes_to_drop + if np.isscalar(self.classes_to_drop): + self.classes_to_drop = [self.classes_to_drop] + if self.classes_to_drop: + self.classes_to_drop = torch.Tensor(self.classes_to_drop) + + def __call__(self, data): + if self.classes_to_drop: + choice = torch.logical_not(torch.isin(data.y, self.classes_to_drop)) + data = subsample_data(data, num_nodes=data.num_nodes, choice=choice) + return data + + def __repr__(self): + return "{}()".format(self.__class__.__name__) diff --git a/tests/myria3d/data/test_transforms.py b/tests/myria3d/data/test_transforms.py index 157f6603..f3779907 100644 --- a/tests/myria3d/data/test_transforms.py +++ b/tests/myria3d/data/test_transforms.py @@ -1,8 +1,9 @@ import numpy as np import pytest import torch_geometric +import torch -from myria3d.pctl.transforms.transforms import TargetTransform +from myria3d.pctl.transforms.transforms import TargetTransform, DropPointsByClass def test_TargetTransform_with_valid_config(): @@ -22,10 +23,37 @@ def test_TargetTransform_throws_type_error_if_invalid_classification_dict(): classification_dict = {1: "unclassified", 2: "ground", 6: "building"} tt = TargetTransform(classification_preprocessing_dict, classification_dict) - invalid_input_data = torch_geometric.data.Data( - x=None, y=np.array([1, 1, 1, 2, 99999, 1]) - ) + invalid_input_data = torch_geometric.data.Data(x=None, y=np.array([1, 1, 1, 2, 99999, 1])) with pytest.raises(TypeError): # error content: # int() argument must be a string, a bytes-like object or a number, not 'NoneType' _ = tt(invalid_input_data) + + +def test_DropPointsByClass(): + # points with class 65 are droped. + y = torch.Tensor([1, 65, 65, 2, 65]) + x = torch.rand((5, 3)) + data = torch_geometric.data.Data(x=x, y=y) + drop_transforms = DropPointsByClass([65]) + transformed_data = drop_transforms(data) + assert torch.equal(transformed_data.y, torch.Tensor([1, 2])) + assert transformed_data.x.size(0) == 2 + + # No modification + x = torch.rand((3, 3)) + y = torch.Tensor([1, 2, 3]) + data = torch_geometric.data.Data(x=x, y=y) + transformed_data = drop_transforms(data) + assert torch.equal(data.x, transformed_data.x) + assert torch.equal(data.y, transformed_data.y) + + +def test_DropPointsByClass_creation(): + scalar = 42 + a = DropPointsByClass(scalar) + b = DropPointsByClass([scalar]) + assert torch.equal(a.classes_to_drop, b.classes_to_drop) + + c = DropPointsByClass(None) + assert c.classes_to_drop is None From 836fbe6f860a2a20e1b2b4102ae4de10196bb959 Mon Sep 17 00:00:00 2001 From: CharlesGaydon Date: Mon, 16 Jan 2023 16:23:45 +0100 Subject: [PATCH 03/10] Givre access to input classification during inference for DropPointsByClass --- myria3d/pctl/dataset/iterable.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/myria3d/pctl/dataset/iterable.py b/myria3d/pctl/dataset/iterable.py index 7dcad706..24d077a6 100644 --- a/myria3d/pctl/dataset/iterable.py +++ b/myria3d/pctl/dataset/iterable.py @@ -20,12 +20,8 @@ class InferenceDataset(IterableDataset): def __init__( self, las_file: str, - points_pre_transform: Callable[ - [ArrayLike], Data - ] = lidar_hd_pre_transform, - pre_filter: Optional[ - Callable[[Data], bool] - ] = pre_filter_below_n_points, + points_pre_transform: Callable[[ArrayLike], Data] = lidar_hd_pre_transform, + pre_filter: Optional[Callable[[Data], bool]] = pre_filter_below_n_points, transform: Optional[Callable[[Data], Data]] = None, tile_width: Number = 1000, subtile_width: Number = 50, @@ -57,7 +53,7 @@ def get_iterator(self): ): sample_data = self.points_pre_transform(sample_points) sample_data["x"] = torch.from_numpy(sample_data["x"]) - # sample_data["y"] = torch.from_numpy(sample_data["y"]) # No need in inference. + sample_data["y"] = torch.LongTensor(sample_data["y"]) # Need input classification for DropPointsByClass sample_data["pos"] = torch.from_numpy(sample_data["pos"]) # for final interpolation - should be kept as a np.ndarray to be batched as a list later. sample_data["idx_in_original_cloud"] = idx_in_original_cloud From 669ee4cd4f7343f447134f9fa66aa4ce696814bb Mon Sep 17 00:00:00 2001 From: CharlesGaydon Date: Mon, 16 Jan 2023 17:45:32 +0100 Subject: [PATCH 04/10] Do not map code for artefact to unclassified class in order to ignore it --- configs/dataset_description/20220607_151_dalles_proto.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/dataset_description/20220607_151_dalles_proto.yaml b/configs/dataset_description/20220607_151_dalles_proto.yaml index cddb1bc1..9bf378f7 100644 --- a/configs/dataset_description/20220607_151_dalles_proto.yaml +++ b/configs/dataset_description/20220607_151_dalles_proto.yaml @@ -12,7 +12,7 @@ classes_to_drop: [65] # Reduced classification dict: # classification_preprocessing_dict: {3: 5, 4: 5, 64:1, 65:1, 160: 64, 161: 64, 162: 64} # Complete classification dict since some trash classes are leftover. -classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 64: 1, 65: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1} +classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 64: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1} # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order). classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above} From 8591ff25194e8315e132c833cb833a49ded985ba Mon Sep 17 00:00:00 2001 From: CharlesGaydon Date: Tue, 31 Jan 2023 09:30:55 +0100 Subject: [PATCH 05/10] C=65 is default code to be ignored. Its class will remain an artefact --- .../preparations/fixed_num_points.yaml | 13 +++----- .../preparations/points_budget.yaml | 16 ++++----- ...0220204_BuildingValidation_and_Ground.yaml | 1 - .../20220607_151_dalles_proto.yaml | 9 ++--- configs/model/criterion/CrossEntropyLoss.yaml | 3 +- .../criterion/WeightedCrossEntropyLoss.yaml | 1 + docs/source/tutorials/make_predictions.md | 4 +-- myria3d/models/interpolation.py | 33 ++++++++++++------- myria3d/pctl/transforms/transforms.py | 24 ++++++-------- 9 files changed, 49 insertions(+), 55 deletions(-) diff --git a/configs/datamodule/transforms/preparations/fixed_num_points.yaml b/configs/datamodule/transforms/preparations/fixed_num_points.yaml index 410b372f..a7534fa2 100644 --- a/configs/datamodule/transforms/preparations/fixed_num_points.yaml +++ b/configs/datamodule/transforms/preparations/fixed_num_points.yaml @@ -10,7 +10,6 @@ train: DropPointsByClass: _target_: myria3d.pctl.transforms.transforms.DropPointsByClass - classes_to_drop: ${dataset_description.classes_to_drop} GridSampling: _target_: torch_geometric.transforms.GridSampling @@ -29,21 +28,20 @@ train: eval: + CopyFullPos: + _target_: myria3d.pctl.transforms.transforms.CopyFullPos + TargetTransform: _target_: myria3d.pctl.transforms.transforms.TargetTransform _args_: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} - DropPointsByClass: - _target_: myria3d.pctl.transforms.transforms.DropPointsByClass - classes_to_drop: ${dataset_description.classes_to_drop} - CopyFullPreparedTargets: _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets - CopyFullPos: - _target_: myria3d.pctl.transforms.transforms.CopyFullPos + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass GridSampling: _target_: torch_geometric.transforms.GridSampling @@ -71,7 +69,6 @@ predict: DropPointsByClass: _target_: myria3d.pctl.transforms.transforms.DropPointsByClass - classes_to_drop: ${dataset_description.classes_to_drop} GridSampling: _target_: torch_geometric.transforms.GridSampling diff --git a/configs/datamodule/transforms/preparations/points_budget.yaml b/configs/datamodule/transforms/preparations/points_budget.yaml index e954aea1..f34b843d 100644 --- a/configs/datamodule/transforms/preparations/points_budget.yaml +++ b/configs/datamodule/transforms/preparations/points_budget.yaml @@ -10,7 +10,6 @@ train: DropPointsByClass: _target_: myria3d.pctl.transforms.transforms.DropPointsByClass - classes_to_drop: ${dataset_description.classes_to_drop} GridSampling: _target_: torch_geometric.transforms.GridSampling @@ -38,16 +37,15 @@ eval: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} - CopyFullPreparedTargets: - _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets - DropPointsByClass: _target_: myria3d.pctl.transforms.transforms.DropPointsByClass - classes_to_drop: ${dataset_description.classes_to_drop} CopyFullPos: _target_: myria3d.pctl.transforms.transforms.CopyFullPos + CopyFullPreparedTargets: + _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets + GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: @@ -63,7 +61,6 @@ eval: _args_: - 40000 - # For interpolation CopySampledPos: _target_: myria3d.pctl.transforms.transforms.CopySampledPos @@ -72,12 +69,11 @@ eval: predict: - CopyFullPos: - _target_: myria3d.pctl.transforms.transforms.CopyFullPos - DropPointsByClass: _target_: myria3d.pctl.transforms.transforms.DropPointsByClass - classes_to_drop: ${dataset_description.classes_to_drop} + + CopyFullPos: + _target_: myria3d.pctl.transforms.transforms.CopyFullPos GridSampling: _target_: torch_geometric.transforms.GridSampling diff --git a/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml b/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml index 6d7a68b2..8f955119 100644 --- a/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml +++ b/configs/dataset_description/20220204_BuildingValidation_and_Ground.yaml @@ -2,7 +2,6 @@ _convert_: all # For omegaconf struct to be converted to python dictionnaries # classification_preprocessing_dict = {source_class_code_int: target_class_code_int}, classification_preprocessing_dict: {59: 6, 50: 1} # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order). -classes_to_drop: [] classification_dict: {1: "unclassified", 2: "ground", 6: "building"} # Input and output dims of neural net are dataset dependant: diff --git a/configs/dataset_description/20220607_151_dalles_proto.yaml b/configs/dataset_description/20220607_151_dalles_proto.yaml index 9bf378f7..f1d1de24 100644 --- a/configs/dataset_description/20220607_151_dalles_proto.yaml +++ b/configs/dataset_description/20220607_151_dalles_proto.yaml @@ -5,13 +5,8 @@ _convert_: all # For omegaconf struct to be converted to python dictionnaries # 160: antenna -> lasting_above # 161: wind_turbines -> lasting_above # 162: pylon -> lasting_above - -# Noise points have class 65 - this will be the defaut for inference in production. -# Dropped points will be ignored in the inference process but still included in the final output cloud. -classes_to_drop: [65] -# Reduced classification dict: -# classification_preprocessing_dict: {3: 5, 4: 5, 64:1, 65:1, 160: 64, 161: 64, 162: 64} -# Complete classification dict since some trash classes are leftover. +# 65: noise --> -1 (to ignore them in inference process, but tey will still be included in the final output cloud). +# Some trash classes were left in this dataset We do not drop them (i.e. map them to -1) to avoid unintended conflict in production. classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 64: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1} # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order). diff --git a/configs/model/criterion/CrossEntropyLoss.yaml b/configs/model/criterion/CrossEntropyLoss.yaml index 5cd60a5b..63f22ffe 100644 --- a/configs/model/criterion/CrossEntropyLoss.yaml +++ b/configs/model/criterion/CrossEntropyLoss.yaml @@ -1,2 +1,3 @@ _target_: torch.nn.CrossEntropyLoss -label_smoothing: 0.0 \ No newline at end of file +label_smoothing: 0.0 +ignore_index: 65 # artefacts are mapped to 65 by convention \ No newline at end of file diff --git a/configs/model/criterion/WeightedCrossEntropyLoss.yaml b/configs/model/criterion/WeightedCrossEntropyLoss.yaml index 76d04362..893dc4bf 100644 --- a/configs/model/criterion/WeightedCrossEntropyLoss.yaml +++ b/configs/model/criterion/WeightedCrossEntropyLoss.yaml @@ -1,5 +1,6 @@ _target_: torch.nn.CrossEntropyLoss label_smoothing: 0.0 +ignore_index: 65 # artefacts are mapped to 65 by convention weight: _target_: torch.FloatTensor _args_: diff --git a/docs/source/tutorials/make_predictions.md b/docs/source/tutorials/make_predictions.md index 41abd538..e04926ea 100644 --- a/docs/source/tutorials/make_predictions.md +++ b/docs/source/tutorials/make_predictions.md @@ -79,6 +79,6 @@ This, however, comes with a large computation price. For instance, `predict.subt ### Ignoring artefacts points during inference -Lidar acquisition may have produced artefacts points. If these points were identified with one (or several) classification code(s), they can be ignored during inference. These points will still be present in the output cloud, but will not negatively disturb model inference. Note that they will still have class probabilities, obtained from their non-artefacts closest neighboors. +Lidar acquisition may have produced artefacts points. If these points were identified with one (or several) classification code(s), they can be ignored during inference. These points will still be present in the output cloud, but will not negatively disturb model inference. They will keep their original class in the predicted classification dim. They will have null probas and entropy. -In the configuration, data transforms are used to drop these points according to the `dataset_description.classes_to_drop` parameter. By default, `dataset_description.classes_to_drop=[65]` where 65 is the defaut code used to flag artefact. Note: you may need to use quotes when overriding this parameter via CLI. +In the configuration, data transforms are used to drop points with a class 65. By convention, 65 will flag Lidar artefacts points. Additional classes may be mapped to 65 to be ignored during inference as well, via the `dataset_description.classification_preprocessing_dict` parameter. Note: you may need to use quotes when overriding this parameter via CLI. diff --git a/myria3d/models/interpolation.py b/myria3d/models/interpolation.py index b5c85fca..c2394ac5 100644 --- a/myria3d/models/interpolation.py +++ b/myria3d/models/interpolation.py @@ -59,21 +59,23 @@ def load_full_las_for_update(self, src_las: str) -> np.ndarray: Args: filepath (str): Path to LAS for which predictions are made. """ - # self.current_f = filepath + # We do not reset the dims we create channel. + # Slight risk of interaction with previous values, but it is expected that all non-artefacts values are updated. + pipeline = get_pdal_reader(src_las) for proba_channel_to_create in self.probas_to_save: pipeline |= pdal.Filter.ferry(dimensions=f"=>{proba_channel_to_create}") pipeline |= pdal.Filter.assign(value=f"{proba_channel_to_create}=0") if self.predicted_classification_channel: - # Copy from Classification to preserve data type. + # Copy from Classification to preserve data type + # Also preserves values of artefacts. if self.predicted_classification_channel != "Classification": pipeline |= pdal.Filter.ferry(dimensions=f"Classification=>{self.predicted_classification_channel}") - # Reset channel. - pipeline |= pdal.Filter.assign(value=f"{self.predicted_classification_channel}=0") if self.entropy_channel: - pipeline |= pdal.Filter.ferry(dimensions=f"=>{self.entropy_channel}") | pdal.Filter.assign(value=f"{self.entropy_channel}=0") + pipeline |= pdal.Filter.ferry(dimensions=f"=>{self.entropy_channel}") + pipeline |= pdal.Filter.assign(value=f"{self.entropy_channel}=0") pipeline.execute() return pipeline.arrays[0] @@ -99,12 +101,14 @@ def reduce_predicted_logits(self, nb_points) -> torch.Tensor: del self.idx_in_full_cloud_list # We scatter_sum logits based on idx, in case there are multiple predictions for a point. - # scatter_sum reorders logitsbased on index,they therefore match las order. + # scatter_sum reorders logits based on index,they therefore match las order. reduced_logits = torch.zeros((nb_points, logits.size(1))) scatter_sum(logits, torch.from_numpy(idx_in_full_cloud), out=reduced_logits, dim=0) # reduced_logits contains logits ordered by their idx in original cloud ! - # Warning : some points may not contain any predictions if they were in small areas. - return reduced_logits + # We need to select the points for which we have a prediction via idx_in_full_cloud. + # NB1 : some points may not contain any predictions if they were in small areas. + + return reduced_logits[idx_in_full_cloud], idx_in_full_cloud @torch.no_grad() def reduce_predictions_and_save(self, raw_path: str, output_dir: str) -> str: @@ -122,7 +126,7 @@ def reduce_predictions_and_save(self, raw_path: str, output_dir: str) -> str: basename = os.path.basename(raw_path) # Read number of points only from las metadata in order to minimize memory usage nb_points = get_pdal_info_metadata(raw_path)["count"] - logits = self.reduce_predicted_logits(nb_points) + logits, idx_in_full_cloud = self.reduce_predicted_logits(nb_points) probas = torch.nn.Softmax(dim=1)(logits) @@ -137,10 +141,12 @@ def reduce_predictions_and_save(self, raw_path: str, output_dir: str) -> str: for idx, class_name in enumerate(self.classification_dict.values()): if class_name in self.probas_to_save: - las[class_name] = probas[:, idx] + # NB: Values for which we do not have a prediction (i.e. artefacts) get null probabilities. + las[class_name][idx_in_full_cloud] = probas[:, idx] if self.predicted_classification_channel: - las[self.predicted_classification_channel] = preds + # NB: Values for which we do not have a prediction (i.e. artefacts) keep their original class. + las[self.predicted_classification_channel][idx_in_full_cloud] = preds log.info( f"Saving predicted classes to channel {self.predicted_classification_channel}." "Channel name can be changed by setting `predict.interpolator.predicted_classification_channel`." @@ -148,11 +154,14 @@ def reduce_predictions_and_save(self, raw_path: str, output_dir: str) -> str: del preds if self.entropy_channel: - las[self.entropy_channel] = Categorical(probs=probas).entropy() + # NB: Values for which we do not have a prediction (i.e. artefacts) get null entropy. + las[self.entropy_channel][idx_in_full_cloud] = Categorical(probs=probas).entropy() log.info( f"Saving Shannon entropy of probabilities to channel {self.entropy_channel}." "Channel name can be changed by setting `predict.interpolator.entropy_channel`" ) + del idx_in_full_cloud + os.makedirs(output_dir, exist_ok=True) out_f = os.path.join(output_dir, basename) out_f = os.path.abspath(out_f) diff --git a/myria3d/pctl/transforms/transforms.py b/myria3d/pctl/transforms/transforms.py index da7d565f..ad9f665c 100755 --- a/myria3d/pctl/transforms/transforms.py +++ b/myria3d/pctl/transforms/transforms.py @@ -11,6 +11,8 @@ log = utils.get_logger(__name__) +COMMON_CODE_FOR_ALL_ARTEFACTS = 65 + class ToTensor(BaseTransform): """Turn np.arrays specified by their keys into Tensor.""" @@ -214,24 +216,18 @@ def _set_preprocessing_mapper(self, classification_preprocessing_dict): def _set_mapper(self, classification_dict): """Set mapper from source classification code to consecutive integers.""" d = {class_code: class_index for class_index, class_code in enumerate(classification_dict.keys())} + d.update({65: 65}) # code -1 is for artefacts and is used in DropPointsByClass. self.mapper = np.vectorize(lambda class_code: d.get(class_code)) class DropPointsByClass(BaseTransform): - """Drop points""" - - def __init__(self, classes_to_drop=None): - self.classes_to_drop = classes_to_drop - if np.isscalar(self.classes_to_drop): - self.classes_to_drop = [self.classes_to_drop] - if self.classes_to_drop: - self.classes_to_drop = torch.Tensor(self.classes_to_drop) + """Drop points with class -1 (i.e. artefacts that would have been mapped to code -1)""" def __call__(self, data): - if self.classes_to_drop: - choice = torch.logical_not(torch.isin(data.y, self.classes_to_drop)) - data = subsample_data(data, num_nodes=data.num_nodes, choice=choice) + points_to_drop = torch.isin(data.y, COMMON_CODE_FOR_ALL_ARTEFACTS) + if points_to_drop.sum() > 0: + points_to_keep = torch.logical_not(points_to_drop) + data = subsample_data(data, num_nodes=data.num_nodes, choice=points_to_keep) + # Here we also subsample these idx since we do not need to interpolate these points back + data.idx_in_original_cloud = data.idx_in_original_cloud[points_to_keep] return data - - def __repr__(self): - return "{}()".format(self.__class__.__name__) From 4d3a0f28e9d98460e6523f959fc86863790f3e4e Mon Sep 17 00:00:00 2001 From: CharlesGaydon Date: Tue, 31 Jan 2023 09:37:43 +0100 Subject: [PATCH 06/10] Update default config --- .github/workflows/cicd.yaml | 2 +- docs/source/apidoc/default_config.yml | 48 ++++++++++++------- package_metadata.yaml | 2 +- run.py | 2 +- ..._Myria3DV3.1.0_predict_config_V3.3.0.yaml} | 13 +++-- 5 files changed, 42 insertions(+), 25 deletions(-) rename trained_model_assets/{proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml => proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml} (95%) diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index b00bbc76..d723cd97 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -49,7 +49,7 @@ jobs: myria3d python run.py --config-path /inputs/ - --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0 + --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0 predict.ckpt_path=/inputs/proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt predict.src_las=/inputs/792000_6272000_subset_buildings.las predict.output_dir=/outputs/ diff --git a/docs/source/apidoc/default_config.yml b/docs/source/apidoc/default_config.yml index dad8a66b..acee234b 100644 --- a/docs/source/apidoc/default_config.yml +++ b/docs/source/apidoc/default_config.yml @@ -25,16 +25,20 @@ datamodule: _args_: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: - 0.25 - FixedPoints: - _target_: torch_geometric.transforms.FixedPoints + MinimumNumNodes: + _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes _args_: - - 12500 - replace: false - allow_duplicates: true + - 300 + MaximumNumNodes: + _target_: myria3d.pctl.transforms.transforms.MaximumNumNodes + _args_: + - 40000 Center: _target_: torch_geometric.transforms.Center eval: @@ -43,37 +47,45 @@ datamodule: _args_: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} - CopyFullPreparedTargets: - _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass CopyFullPos: _target_: myria3d.pctl.transforms.transforms.CopyFullPos + CopyFullPreparedTargets: + _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: - 0.25 - FixedPoints: - _target_: torch_geometric.transforms.FixedPoints + MinimumNumNodes: + _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes + _args_: + - 300 + MaximumNumNodes: + _target_: myria3d.pctl.transforms.transforms.MaximumNumNodes _args_: - - 12500 - replace: false - allow_duplicates: true + - 40000 CopySampledPos: _target_: myria3d.pctl.transforms.transforms.CopySampledPos Center: _target_: torch_geometric.transforms.Center predict: + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass CopyFullPos: _target_: myria3d.pctl.transforms.transforms.CopyFullPos GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: - 0.25 - FixedPoints: - _target_: torch_geometric.transforms.FixedPoints + MinimumNumNodes: + _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes + _args_: + - 300 + MaximumNumNodes: + _target_: myria3d.pctl.transforms.transforms.MaximumNumNodes _args_: - - 12500 - replace: false - allow_duplicates: true + - 40000 CopySampledPos: _target_: myria3d.pctl.transforms.transforms.CopySampledPos Center: @@ -137,7 +149,6 @@ dataset_description: 57: 1 58: 1 64: 1 - 65: 1 66: 1 67: 1 77: 1 @@ -208,6 +219,7 @@ model: criterion: _target_: torch.nn.CrossEntropyLoss label_smoothing: 0.0 + ignore_index: 65 _target_: myria3d.models.model.Model d_in: ${dataset_description.d_in} num_classes: ${dataset_description.num_classes} diff --git a/package_metadata.yaml b/package_metadata.yaml index c9074a07..ed4ff614 100644 --- a/package_metadata.yaml +++ b/package_metadata.yaml @@ -1,4 +1,4 @@ -__version__: "3.2.5" +__version__: "3.3.0" __name__: "myria3d" __url__: "https://github.com/IGNF/myria3d" __description__: "Multiclass Semantic Segmentation for Lidar Point Cloud" diff --git a/run.py b/run.py index 3a8791fc..af1bdaf3 100755 --- a/run.py +++ b/run.py @@ -20,7 +20,7 @@ TASK_NAME_DETECTION_STRING = "task.task_name=" DEFAULT_DIRECTORY = "trained_model_assets/" -DEFAULT_CONFIG_FILE = "proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml" +DEFAULT_CONFIG_FILE = "proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml" DEFAULT_CHECKPOINT = "proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt" DEFAULT_ENV = "placeholder.env" diff --git a/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml b/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml similarity index 95% rename from trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml rename to trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml index fbc32d7a..e89f39b1 100644 --- a/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.2.0.yaml +++ b/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml @@ -28,6 +28,8 @@ datamodule: _args_: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: @@ -48,10 +50,12 @@ datamodule: _args_: - ${dataset_description.classification_preprocessing_dict} - ${dataset_description.classification_dict} - CopyFullPreparedTargets: - _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass CopyFullPos: _target_: myria3d.pctl.transforms.transforms.CopyFullPos + CopyFullPreparedTargets: + _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets GridSampling: _target_: torch_geometric.transforms.GridSampling _args_: @@ -69,6 +73,8 @@ datamodule: Center: _target_: torch_geometric.transforms.Center predict: + DropPointsByClass: + _target_: myria3d.pctl.transforms.transforms.DropPointsByClass CopyFullPos: _target_: myria3d.pctl.transforms.transforms.CopyFullPos GridSampling: @@ -156,7 +162,6 @@ dataset_description: 57: 1 58: 1 64: 1 - 65: 1 66: 1 67: 1 77: 1 @@ -270,6 +275,6 @@ predict: _target_: myria3d.models.interpolation.Interpolator interpolation_k: 10 classification_dict: ${dataset_description.classification_dict} - probas_to_save: [building,ground,vegetation,unclassified] + probas_to_save: [building,ground] predicted_classification_channel: confidence entropy_channel: entropy From ed26538ae9f8c4dfc2c3cf1e0614adf913906afb Mon Sep 17 00:00:00 2001 From: CharlesGaydon Date: Tue, 31 Jan 2023 09:41:23 +0100 Subject: [PATCH 07/10] Update default config call in cicd --- .github/workflows/cicd.yaml | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index d723cd97..a2b0e179 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -36,42 +36,39 @@ jobs: pytest -rA -v --ignore=actions-runner - - # Always run with --ipc=host and --shm-size=2gb (at least) to enable sufficient shared memory when predicting on large data - # predict.subtile_overlap specifies overlap between adjacent samples (in meters). - - name: Example inference run via Docker with inference-time subtiles overlap to smooth-out results. + # IMPORTANT: Always run images with --ipc=host and --shm-size=2gb (at least) to enable + # sufficient shared memory when predicting on large files. + - name: Example inference run via Docker with default config and checkpoint run: > docker run - -v /var/data/cicd/CICD_github_assets/myria3d_V3.2.0/inputs/:/inputs/ - -v /var/data/cicd/CICD_github_assets/myria3d_V3.2.0/outputs/:/outputs/ + -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/inputs/:/inputs/ + -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/outputs/:/outputs/ --ipc=host --shm-size=2gb myria3d python run.py - --config-path /inputs/ - --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0 - predict.ckpt_path=/inputs/proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt predict.src_las=/inputs/792000_6272000_subset_buildings.las predict.output_dir=/outputs/ - predict.subtile_overlap=25 - datamodule.batch_size=10 - predict.interpolator.probas_to_save=[building,unclassified] task.task_name=predict - - name: Example inference run via Docker with default config and checkpoint + # predict.subtile_overlap specifies overlap between adjacent samples (in meters). + - name: Example inference run via Docker with inference-time subtiles overlap to smooth-out results. run: > docker run - -v /var/data/cicd/CICD_github_assets/myria3d_V3.2.0/inputs/:/inputs/ - -v /var/data/cicd/CICD_github_assets/myria3d_V3.2.0/outputs/:/outputs/ + -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/inputs/:/inputs/ + -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/outputs/:/outputs/ --ipc=host --shm-size=2gb myria3d python run.py + --config-path /inputs/ + --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0 + predict.ckpt_path=/inputs/proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt predict.src_las=/inputs/792000_6272000_subset_buildings.las predict.output_dir=/outputs/ predict.subtile_overlap=25 datamodule.batch_size=10 - predict.interpolator.probas_to_save=[building,unclassified] + predict.interpolator.probas_to_save=[building,ground] task.task_name=predict - name: Check code neatness (linter) From 3bbc77b5a7bc246c0e20c34e80f666b8a2efc61d Mon Sep 17 00:00:00 2001 From: CharlesGaydon Date: Tue, 31 Jan 2023 09:44:51 +0100 Subject: [PATCH 08/10] Update unit test for DropPointsByClass --- myria3d/pctl/transforms/transforms.py | 3 ++- tests/myria3d/data/test_transforms.py | 12 +----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/myria3d/pctl/transforms/transforms.py b/myria3d/pctl/transforms/transforms.py index ad9f665c..e3b6db70 100755 --- a/myria3d/pctl/transforms/transforms.py +++ b/myria3d/pctl/transforms/transforms.py @@ -229,5 +229,6 @@ def __call__(self, data): points_to_keep = torch.logical_not(points_to_drop) data = subsample_data(data, num_nodes=data.num_nodes, choice=points_to_keep) # Here we also subsample these idx since we do not need to interpolate these points back - data.idx_in_original_cloud = data.idx_in_original_cloud[points_to_keep] + if "idx_in_original_cloud" in data: + data.idx_in_original_cloud = data.idx_in_original_cloud[points_to_keep] return data diff --git a/tests/myria3d/data/test_transforms.py b/tests/myria3d/data/test_transforms.py index f3779907..988444a5 100644 --- a/tests/myria3d/data/test_transforms.py +++ b/tests/myria3d/data/test_transforms.py @@ -35,7 +35,7 @@ def test_DropPointsByClass(): y = torch.Tensor([1, 65, 65, 2, 65]) x = torch.rand((5, 3)) data = torch_geometric.data.Data(x=x, y=y) - drop_transforms = DropPointsByClass([65]) + drop_transforms = DropPointsByClass() transformed_data = drop_transforms(data) assert torch.equal(transformed_data.y, torch.Tensor([1, 2])) assert transformed_data.x.size(0) == 2 @@ -47,13 +47,3 @@ def test_DropPointsByClass(): transformed_data = drop_transforms(data) assert torch.equal(data.x, transformed_data.x) assert torch.equal(data.y, transformed_data.y) - - -def test_DropPointsByClass_creation(): - scalar = 42 - a = DropPointsByClass(scalar) - b = DropPointsByClass([scalar]) - assert torch.equal(a.classes_to_drop, b.classes_to_drop) - - c = DropPointsByClass(None) - assert c.classes_to_drop is None From 9d6fa0c2b45ff24e8b71cdeac00b7f63bca2267f Mon Sep 17 00:00:00 2001 From: CharlesGaydon Date: Tue, 7 Feb 2023 06:44:02 +0100 Subject: [PATCH 09/10] Change typing of reduce_predicted_logits --- myria3d/models/interpolation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/myria3d/models/interpolation.py b/myria3d/models/interpolation.py index c2394ac5..c526cf41 100644 --- a/myria3d/models/interpolation.py +++ b/myria3d/models/interpolation.py @@ -1,6 +1,6 @@ import logging import os -from typing import Dict, List, Literal, Optional, Union +from typing import Dict, List, Literal, Optional, Tuple, Union import numpy as np import pdal @@ -86,7 +86,7 @@ def store_predictions(self, logits, idx_in_original_cloud) -> None: self.idx_in_full_cloud_list += idx_in_original_cloud @torch.no_grad() - def reduce_predicted_logits(self, nb_points) -> torch.Tensor: + def reduce_predicted_logits(self, nb_points) -> Tuple[torch.Tensor, np.ndarray]: """Interpolate logits to points without predictions using an inverse-distance weightning scheme. Returns: From 51e5815441ebb17a04ccfac9378e2168e6236161 Mon Sep 17 00:00:00 2001 From: CharlesGaydon Date: Tue, 7 Feb 2023 07:06:24 +0100 Subject: [PATCH 10/10] Update documentation for making inference --- docs/source/guides/development.md | 15 ++++++++------- docs/source/tutorials/make_predictions.md | 8 ++++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/docs/source/guides/development.md b/docs/source/guides/development.md index 9b619659..022d730d 100644 --- a/docs/source/guides/development.md +++ b/docs/source/guides/development.md @@ -2,9 +2,9 @@ ## Code versionning -Package version follows semantic versionning conventions and is defined in `setup.py`. +Package version follows semantic versionning conventions and is defined in `package_metadata.yaml`. -Releases are generated when new high-level functionnality are implemented (e.g. a new step in the production process), with a documentation role. Production-ready code is fast-forwarded in the `prod` branch when needed to match the `main` branch. When updating the `prod` branch, one should move the tag `prod-release-tag` alongside to the [related release](https://github.com/IGNF/myria3d/releases/tag/prod-release-tag). +Releases are created when new high-level functionnality are implemented (e.g. a new step in the production process), with a documentation role. A `prod-release-tag` is created that tracks an _arbitrary_ commit, and serves as a mean to make a few models, model card, and config accessible via its associated [release](https://github.com/IGNF/myria3d/releases/tag/prod-release-tag). ## Tests @@ -17,15 +17,16 @@ python -m pytest -rA -v ## Continuous Integration (CI) -New features are developped in ad-hoc branches (e.g. `dev-Vx.y.z-Feature-Name`). +New features are developped in ad-hoc branches (e.g. `2023MMDD-Feature-Name`). -CI tests are run for pull request to merge on either the `main` branches, and on pushes to `main`, and `prod` branches. The CI workflow builds a docker image, runs linting, and tests the code. +CI tests are run for push and pull request on the `main` branche. The workflow builds a docker image, runs linting, and tests the code. ## Continuous Delivery (CD) -When the event is a push and not a merge request, this means that there was either a direct push to `main`|`prod` or that a merge request was accepted. In this case, if the CI workflow passes (i.e. tests pass and code is PEP8 compliant), the created docker image is tagged with the branch name, resulting in e.g. a `myria3d:prod` image that is up to date with the branch content. -See [../tutorials/use.md] for how to leverage such image to run the app. +In case of push / accepted merge to the `main` branch, and if the CI workflow is successful (i.e. docker build is complete, tests pass, and code is PEP8 compliant), a docker image is pushed to an in-house Nexus image repository. -Additionnaly, pushes on the `main` branch build this library documentation, which is hosted on Github pages. +Additionnaly, images may be built for feature branches, for further testings / staging. Details are in workflow `cicd.yaml`. +See [../tutorials/use.md] for how to leverage such image to run the app. +Additionnaly, pushes on the `main` branch build this library documentation, which is hosted on Github pages. diff --git a/docs/source/tutorials/make_predictions.md b/docs/source/tutorials/make_predictions.md index e04926ea..8341fd5c 100644 --- a/docs/source/tutorials/make_predictions.md +++ b/docs/source/tutorials/make_predictions.md @@ -1,15 +1,15 @@ # Performing inference on new data -Refer to [this tutorial](./install_on_linux.md) for how to setup a virtual environment and install the library. +Refer to the tutorials ([Linux](./install_on_linux.md), [Windows](./install_on_wsl2.md)) for installation instructions. To run inference, you will need: - A source cloud point in LAS format on which to infer classes and probabilites. Sample data from the French "Lidar HD" project can be downloaded at [this address](https://geoservices.ign.fr/lidarhd). - A checkpoint of a trained lightning module implementing model logic (class `myria3d.models.model.Model`) - A minimal yaml configuration specifying parameters. We use [hydra](https://hydra.cc/) to manage configurations, and this yaml results from the model training. The `datamodule` and `model` parameters groups must match dataset characteristics and model training settings. The `predict` parameters group specifies path to models and data as well as batch size (N=50 works well, the larger the faster) and use of gpu (optionnal). For hints on what to modify, see the `experiment/predict.yaml` file. -## Run inference from installed package +> **A default model and its configuration are embedded directly in code under folder `trained_model_assets`.** They are expected to always be compatible with the code base, and updated as needed in case of e.g. change of configuration format or model implementation. -From the package root, run `pip install -e .` to install the package locally and freeze its current version. +## Run inference from source Then, fill out the {missing parameters} below and run: @@ -56,7 +56,7 @@ docker run \ --ipc=host \ --gpus=all \ --shm-size=2gb \ -myria3d.predict {...config paths & options...} +python run.py {...config paths & options...} ``` ## Additional options for prediction