Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V2.3.0 eval time interpolation #28

Merged
merged 16 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
python -m
myria3d.predict
--config-path /inputs/
--config-name predict_config_V2.1.0.yaml
--config-name predict_config_V2.3.0.yaml
predict.src_las=/inputs/792000_6272000_subset_buildings.las
predict.output_dir=/outputs/
predict.ckpt_path=/inputs/RandLaNet_Buildings_B2V0.5_epoch_033.ckpt
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
_convert_: all # For omegaconf struct to be converted to python dictionnaries
# classification_preprocessing_dict = {source_class_code_int: target_class_code_int},
# 3: medium vegetation -> vegetation
# 4: high vegetation -> vegetation
# 160: antenna -> lasting_above
# 161: wind_turbines -> lasting_above
# 162: pylon -> lasting_above

# Expectded classification dict:
# classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64}

# Temporary classification dict to care of abnormal classes that are to be corrected in the dataset.
classification_preprocessing_dict: {3: 5, 4: 5, 160: 64, 161: 64, 162: 64, 0: 1, 7: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 66: 1, 67: 1, 77: 1, 155: 1, 204: 1}
# classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above}

# Input and output dims of neural net are dataset dependant:
d_in: 12
num_classes: 7

# loading functions for data object prepared via myria3d/datamodule/data.py
# Nota: In this function, artefacts as well as synthetic points (65, 66) are filtered out.
load_las_func:
_target_: functools.partial
_args_:
- "${get_method:myria3d.data.loading.FrenchLidarDataLogic.load_las}"
2 changes: 1 addition & 1 deletion configs/datamodule/transforms/default.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
defaults:
- preparations: default.yaml
- preparations: train.yaml
- augmentations: default.yaml
- normalizations: default.yaml

Expand Down
23 changes: 23 additions & 0 deletions configs/datamodule/transforms/preparations/predict.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
ToTensor:
_target_: myria3d.data.transforms.ToTensor

CopyFullPos:
_target_: myria3d.data.transforms.CopyFullPos

GridSampling:
_target_: torch_geometric.transforms.GridSampling
_args_:
- 0.25

FixedPoints:
_target_: torch_geometric.transforms.FixedPoints
_args_:
- 12500
replace: False
allow_duplicates: True

CopySampledPos:
_target_: myria3d.data.transforms.CopySampledPos

Center:
_target_: torch_geometric.transforms.Center
33 changes: 33 additions & 0 deletions configs/datamodule/transforms/preparations/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
ToTensor:
_target_: myria3d.data.transforms.ToTensor

TargetTransform:
_target_: myria3d.data.transforms.TargetTransform
_args_:
- ${datamodule.dataset_description.classification_preprocessing_dict}
- ${datamodule.dataset_description.classification_dict}

CopyFullPreparedTargets:
_target_: myria3d.data.transforms.CopyFullPreparedTargets

CopyFullPos:
_target_: myria3d.data.transforms.CopyFullPos

GridSampling:
_target_: torch_geometric.transforms.GridSampling
_args_:
- 0.25

FixedPoints:
_target_: torch_geometric.transforms.FixedPoints
_args_:
- 12500
replace: False
allow_duplicates: True

# For interpolation
CopySampledPos:
_target_: myria3d.data.transforms.CopySampledPos

Center:
_target_: torch_geometric.transforms.Center
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
EmptySubtileFilter:
_target_: myria3d.data.transforms.EmptySubtileFilter

ToTensor:
_target_: myria3d.data.transforms.ToTensor

Expand All @@ -22,8 +19,5 @@ FixedPoints:
replace: False
allow_duplicates: True

CopySampledPos:
_target_: myria3d.data.transforms.CopySampledPos

Center:
_target_: torch_geometric.transforms.Center
5 changes: 1 addition & 4 deletions configs/experiment/RandLaNet_base_run_FR.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
defaults:
- override /datamodule/dataset_description: 20220504_proto23dalles

datamodule:
augment: true

logger:
comet:
experiment_name: "RandLaNet - FR Data"
Expand All @@ -15,7 +12,7 @@ trainer:
min_epochs: 30
max_epochs: 150
check_val_every_n_epoch: 1
val_check_interval: 0.25
val_check_interval: 1.0
# gpus: [1]

callbacks:
Expand Down
26 changes: 26 additions & 0 deletions configs/experiment/predict.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# @package _global_

# Those are the parameters you need to override from a training configuration in order to perform
# an inference.
# This file should not be used directly if the trained model's config is different than the default config.

defaults:
- override /datamodule/transforms/preparations: predict.yaml

seed: 12345

task:
task_name: "predict"

predict:
src_las: "/path/to/input.las" # Any glob pattern can be used to predict on multiple files.
output_dir: "/path/to/output_dir/" # Predictions are saved in a new file which shares src_las basename.
ckpt_path: "/path/to/lightning_model.ckpt" # Checkpoint of trained model.
# + other params in configs/predict/default.yaml

hydra:
verbose: false

datamodule:
# larger batch size for speed
batch_size: 50
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

# all parameters below will be merged with parameters from default configurations

defaults:
- override /datamodule/transforms/preparations: test.yaml

seed: 12345

task:
Expand All @@ -13,8 +16,10 @@ hydra:
verbose: false

datamodule:
# test_data_dir: "path/to_dir/with/annotated/files.las" # override
batch_size: 50
# override if needed
# test_data_dir: "path/to_dir/with/annotated/files.las"
# larger batch size for speed
batch_size: 50

model:
ckpt_path: "path/to/checkpoint" # override
Expand Down
2 changes: 1 addition & 1 deletion configs/predict/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ output_dir: "/path/to/output_dir/" # Predictions are saved in a new file which
ckpt_path: "/path/to/lightning_model.ckpt" # Checkpoint of trained model.
gpus: 0 # 0 for none, 1 for one, [gpu_id] to specify which gpu to use e.g [1]

# Speifying the ouptut:
# Specifying the ouptut:
# A list of string matching class names to select specific probas to save
# OR keyword "all" to save all probabilities.
# In addition, these dimensions are always created:
Expand Down
45 changes: 43 additions & 2 deletions docs/source/apidoc/default_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ ignore_warnings: true
trainer:
_target_: pytorch_lightning.Trainer
gpus: 0
min_epochs: 30
max_epochs: 30
min_epochs: 40
max_epochs: 40
log_every_n_steps: 1
check_val_every_n_epoch: 1
weights_summary: null
Expand All @@ -31,10 +31,51 @@ datamodule:
_target_: functools.partial
_args_:
- ${get_method:myria3d.data.loading.FrenchLidarDataLogic.load_las}
transforms:
preparations:
ToTensor:
_target_: myria3d.data.transforms.ToTensor
TargetTransform:
_target_: myria3d.data.transforms.TargetTransform
_args_:
- ${datamodule.dataset_description.classification_preprocessing_dict}
- ${datamodule.dataset_description.classification_dict}
GridSampling:
_target_: torch_geometric.transforms.GridSampling
_args_:
- 0.25
FixedPoints:
_target_: torch_geometric.transforms.FixedPoints
_args_:
- 12500
replace: false
allow_duplicates: true
Center:
_target_: torch_geometric.transforms.Center
augmentations:
x_flip:
_target_: torch_geometric.transforms.RandomFlip
_args_:
- 0
p: 0.5
y_flip:
_target_: torch_geometric.transforms.RandomFlip
_args_:
- 1
p: 0.5
normalizations:
NormalizePos:
_target_: myria3d.data.transforms.NormalizePos
StandardizeFeatures:
_target_: myria3d.data.transforms.StandardizeFeatures
augmentations_list: '${oc.dict.values: datamodule.transforms.augmentations}'
preparations_list: '${oc.dict.values: datamodule.transforms.preparations}'
normalizations_list: '${oc.dict.values: datamodule.transforms.normalizations}'
_target_: myria3d.data.datamodule.DataModule
prepared_data_dir: ${oc.env:PREPARED_DATA_DIR}
test_data_dir: ${datamodule.prepared_data_dir}/test/
batch_size: 16
prefetch_factor: 2
num_workers: 1
subtile_width_meters: 50
subtile_overlap: ${predict.subtile_overlap}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ myria3d.models.modules
PointNet
-------------

.. autoclass:: myria3d.models.modules.point_net.PointNet
.. autoclass:: myria3d.models.modules.point_net2.PointNet2
:members:

RandLA-Net
Expand Down
7 changes: 0 additions & 7 deletions docs/source/background/data_optimization.md

This file was deleted.

32 changes: 32 additions & 0 deletions docs/source/background/general_design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# General design of the package

Here are a few challenges relative to training 3D segmentation models for Aerial High Density Lidar, and the strategies we adopt in Myria3D in order to face them.


## Subsampling is important to improve point cloud structure

**Situation**:
- Point Cloud data, and aerial Lidar data, represent rich data with a level of detail that might hinder the detection of objects with generally simple structures such as buildings, ground, and trees. On the other hand, smaller, more variate objects might benefit from denser point clouds.
- Another point to consider is that some 3D semantic segmentation architectures - including the RandLa-Net architecture we leverage - need fixed size point cloud. This means that either subsampling or padding are required. This kind of implementation reduces flexibility and is suboptimal, but to our knowledge there are no alternative RandLa-Net implementation in pytorch that can accept different-size point clouds within the same batch.

**Strategy**:
- We leverage torch_geometric [GridSampling](https://pytorch-geometric.readthedocs.io/en/latest/modules/transforms.html#torch_geometric.transforms.GridSampling) and [FixedPoints](https://pytorch-geometric.readthedocs.io/en/latest/modules/transforms.html#torch_geometric.transforms.FixedPoints) to (i) simplify local point structures with a 0.25m resolution, and (ii) get a fixed size point cloud that can be fed to the mmodel. Grid Sampling has the effect of reducing point cloud size by around a third, with most reductions expected to occur in vegetation.

## Speed is of the essence
**Situation**:

- Short training time allow for faster iterations, more frequent feedback on architecture design, less time spent on doomed solutions. As a result, we want to be parcimonious in terms of the operation performed during a train forward pass of a model.

**Strategy**:
- During training and validation phases, we perform supervision and back-propagation on the subsampled point cloud directly. Our hypothesis is that the gain we may expect from interpolating predicted logits to the full point cloud (usually from N'=~12500 to N~30000 on a average for a 50mx50m sample) is tiny compared to the computational cost of such operation (time of a forward pass multiplied from x5 to x10 with a batch size of 32 on CPU).


## Evaluation is key to select the right approach

**Situation**:

- Evaluation of models must be reliable in order to compare solutions. For semantic segmentation models on point cloud, this means that performance metrics (i.e. mean and by-class Intersection-over-Union) should be computed based on a confusion matrix that is computed from all points in all point clouds in the test dataset.

**Strategy**:
- During test phase, we **do** interpolate logits back to the each sample (point cloud) before computing performance metrics. Interestingly, this enable to compare different subsampling approaches and interpolation methods in a robust way. The interpolation step is triggered in `eval` mode only, and is also leveraged during inference.
- This differentiated approach between `train` and `eval` modes has the drawback of requiring full (non-subsampled) positions and targets as well as subsampled, non-normalized positions to be copied and saved at data preparation time to allow for the interpolation.
4 changes: 1 addition & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ Myria3D > Documentation
:caption: Background

background/interpolation
background/data_optimization
background/general_design

.. TODO: assure that all dosctrings are in third-personn mode.
.. TODO: find a way to document hydra config ; perhaps by switching to a full dataclasses mode.

.. toctree::
:maxdepth: 1
Expand Down
2 changes: 1 addition & 1 deletion docs/source/tutorials/make_predictions.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Refer to [this tutorial](./setup_install.md) for how to setup a virtual environm
To run inference, you will need:
- A source cloud point in LAS format on which to infer classes and probabilites.
- A checkpoint of a trained lightning module implementing model logic (class `myria3d.models.model.Model`)
- A minimal yaml configuration specifying parameters. We use [hydra](https://hydra.cc/) to manage configurations, and this yaml results from the model training. The `datamodule` and `model` parameters groups must match datset characteristics and model training settings. The `predict` parameters group specifies path to models and data as well as batch size (N=50 works well, the larger the faster) and use of gpu (optionnal).
- A minimal yaml configuration specifying parameters. We use [hydra](https://hydra.cc/) to manage configurations, and this yaml results from the model training. The `datamodule` and `model` parameters groups must match dataset characteristics and model training settings. The `predict` parameters group specifies path to models and data as well as batch size (N=50 works well, the larger the faster) and use of gpu (optionnal). For hints on what to modify, see the `experiment/predict.yaml` file.

## Run inference from installed package

Expand Down
4 changes: 2 additions & 2 deletions myria3d/callbacks/logging_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def on_test_batch_end(
batch_idx: int,
dataloader_idx: int,
):
"""Log IoU for each class. Loop in case of multiple files in a single batch."""
"""Log IoU for each class."""
logits = outputs["logits"]
targets = batch.y
targets = outputs["targets"]
self.log_iou(logits, targets, "test", self.test_iou_by_class_dict)

def log_iou(self, logits, targets, phase: str, iou_dict):
Expand Down
13 changes: 8 additions & 5 deletions myria3d/data/datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,25 @@ class DataModule(LightningDataModule):

def __init__(self, **kwargs):
super().__init__()
# TODO: try to use save_hyperparameters to lightne this code.
# paths
self.prepared_data_dir = kwargs.get("prepared_data_dir")
self.test_data_dir = kwargs.get("test_data_dir")

# compute
self.num_workers = kwargs.get("num_workers", 0)

self.prefetch_factor = kwargs.get("prefetch_factor", 2)
# data preparation
self.subtile_width_meters = kwargs.get("subtile_width_meters", 50)
self.subtile_overlap = kwargs.get("subtile_overlap", 0)
self.batch_size = kwargs.get("batch_size", 32)
self.prefetch_factor = kwargs.get("prefetch_factor", 2)
self.augmentation_transforms = kwargs.get("augmentation_transforms", [])

# segmentation task
self.dataset_description = kwargs.get("dataset_description")
self.classification_dict = self.dataset_description.get("classification_dict")
self.classification_preprocessing_dict = self.dataset_description.get(
"classification_preprocessing_dict"
)
self.load_las = self.dataset_description.get("load_las_func")
# transforms
t = kwargs.get("transforms")
self.preparation_transforms = t.get("preparations_list")
self.augmentation_transforms = t.get("augmentations_list")
Expand Down Expand Up @@ -233,6 +234,8 @@ def yield_transformed_subtile_data(self):
# TODO: change to process time function
for xy_min_corner in centers:
data = self.extract_subtile_from_tile_data(tile_data, xy_min_corner)
if len(data.pos) < 50:
continue
if self.transform:
data = self.transform(data)
if data and (len(data.pos) > 50):
Expand Down
3 changes: 2 additions & 1 deletion myria3d/data/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def split_and_save(self, filepath: str, output_subdir_path: str) -> None:
# Ignore if empty
break
subtile_data = self._extract_by_y(data_x_band)
self._save(subtile_data, output_subdir_path, idx)
if subtile_data and subtile_data.pos.shape[0] > 50:
self._save(subtile_data, output_subdir_path, idx)
idx += 1

def _find_file_in_dir(self, input_data_dir: str, basename: str) -> str:
Expand Down
Loading