From 8df872823614cc05ab8e44a8a68fcb8a87018956 Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Mon, 4 Apr 2022 18:43:42 +0200 Subject: [PATCH 1/8] Move interpolation script with models where it belong --- configs/callbacks/default.yaml | 2 +- lidar_multiclass/{datamodules => models}/interpolation.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename lidar_multiclass/{datamodules => models}/interpolation.py (100%) diff --git a/configs/callbacks/default.yaml b/configs/callbacks/default.yaml index a77d2b11..55fe156e 100755 --- a/configs/callbacks/default.yaml +++ b/configs/callbacks/default.yaml @@ -18,7 +18,7 @@ log_iou_by_class: _target_: lidar_multiclass.callbacks.logging_callbacks.LogIoUByClass classification_dict: ${datamodule.dataset_description.classification_dict} interpolator: # only used at test time - _target_: lidar_multiclass.datamodules.interpolation.Interpolator + _target_: lidar_multiclass.models.interpolation.Interpolator interpolation_k: ${predict.interpolation_k} classification_dict: ${datamodule.dataset_description.classification_dict} probas_to_save: ${predict.probas_to_save} # replace by a list of string of class names to select specific probas to save diff --git a/lidar_multiclass/datamodules/interpolation.py b/lidar_multiclass/models/interpolation.py similarity index 100% rename from lidar_multiclass/datamodules/interpolation.py rename to lidar_multiclass/models/interpolation.py From 8c86ee1e78f2570dce1aa4b148ff597a398fe84f Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Tue, 5 Apr 2022 10:57:23 +0200 Subject: [PATCH 2/8] Refactor name of data scripts and add some docu --- docs/requirements.txt | 3 ++- docs/source/apidoc/lidar_multiclass.data.rst | 20 ++++++++++++++ .../apidoc/lidar_multiclass.datamodules.rst | 27 ------------------- docs/source/apidoc/lidar_multiclass.model.rst | 2 +- docs/source/index.rst | 2 +- 5 files changed, 24 insertions(+), 30 deletions(-) create mode 100644 docs/source/apidoc/lidar_multiclass.data.rst delete mode 100644 docs/source/apidoc/lidar_multiclass.datamodules.rst diff --git a/docs/requirements.txt b/docs/requirements.txt index 67a6d0c1..220eea6b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -10,8 +10,9 @@ myst_parser==0.17.* sphinx_paramlinks==0.5.* recommonmark==0.7.* sphinxnotes-mock==1.0.0b0 # still a beta - +sphinx-argparse==0.3.* # Using docutils==0.17 + hydra-core rich comet_ml diff --git a/docs/source/apidoc/lidar_multiclass.data.rst b/docs/source/apidoc/lidar_multiclass.data.rst new file mode 100644 index 00000000..51fc84cf --- /dev/null +++ b/docs/source/apidoc/lidar_multiclass.data.rst @@ -0,0 +1,20 @@ +lidar\_multiclass.data +===================================== + +lidar\_multiclass.datamodules.datamodule +----------------------------------------------- + +.. automodule:: lidar_multiclass.data.datamodule + :members: + +lidar\_multiclass.datamodules.loading +----------------------------------------- + +.. automodule:: lidar_multiclass.data.loading + :members: + +lidar\_multiclass.datamodules.transforms +----------------------------------------------- + +.. automodule:: lidar_multiclass.data.transforms + :members: diff --git a/docs/source/apidoc/lidar_multiclass.datamodules.rst b/docs/source/apidoc/lidar_multiclass.datamodules.rst deleted file mode 100644 index 61fe9f35..00000000 --- a/docs/source/apidoc/lidar_multiclass.datamodules.rst +++ /dev/null @@ -1,27 +0,0 @@ -lidar\_multiclass.datamodules -===================================== - - -lidar\_multiclass.datamodules.data ------------------------------------------ - -.. automodule:: lidar_multiclass.datamodules.data - :members: - -lidar\_multiclass.datamodules.datamodule ------------------------------------------------ - -.. automodule:: lidar_multiclass.datamodules.datamodule - :members: - -lidar\_multiclass.datamodules.interpolation --------------------------------------------------- - -.. automodule:: lidar_multiclass.datamodules.interpolation - :members: - -lidar\_multiclass.datamodules.transforms ------------------------------------------------ - -.. automodule:: lidar_multiclass.datamodules.transforms - :members: diff --git a/docs/source/apidoc/lidar_multiclass.model.rst b/docs/source/apidoc/lidar_multiclass.model.rst index 02fa70b7..014ec8da 100644 --- a/docs/source/apidoc/lidar_multiclass.model.rst +++ b/docs/source/apidoc/lidar_multiclass.model.rst @@ -4,5 +4,5 @@ lidar\_multiclass.models Model ------------------------------------- -.. autoclass:: lidar_multiclass.models.model.Model +.. automodule:: lidar_multiclass.models.model :members: \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 9277913e..813a4241 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -35,7 +35,7 @@ Lidar-Deep-Segmentation > Documentation :caption: Package Reference apidoc/scripts - apidoc/lidar_multiclass.datamodules + apidoc/lidar_multiclass.data apidoc/lidar_multiclass.model apidoc/lidar_multiclass.models.modules apidoc/lidar_multiclass.callbacks From 28f98bcd86e081a4b6db9af80e5696ee80c2edb1 Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Tue, 5 Apr 2022 11:00:03 +0200 Subject: [PATCH 3/8] Refactor name of data scripts and add some docu --- bash/setup_environment/requirements.txt | 7 +- configs/datamodule/datamodule.yaml | 2 +- ...0220204_BuildingValidation_and_Ground.yaml | 2 +- .../SwissSURFACE3D_Building_class.yaml | 2 +- .../SwissSURFACE3D_all_6_classes.yaml | 2 +- configs/datamodule/subsampler/fps.yaml | 2 +- configs/datamodule/subsampler/grid.yaml | 2 +- configs/datamodule/subsampler/random.yaml | 2 +- .../callbacks/logging_callbacks.py | 2 +- lidar_multiclass/datamodules/__init__.py | 0 lidar_multiclass/datamodules/data.py | 397 ------------------ lidar_multiclass/datamodules/datamodule.py | 313 -------------- lidar_multiclass/datamodules/transforms.py | 333 --------------- lidar_multiclass/models/interpolation.py | 5 +- lidar_multiclass/predict.py | 3 +- 15 files changed, 17 insertions(+), 1057 deletions(-) delete mode 100755 lidar_multiclass/datamodules/__init__.py delete mode 100755 lidar_multiclass/datamodules/data.py delete mode 100755 lidar_multiclass/datamodules/datamodule.py delete mode 100755 lidar_multiclass/datamodules/transforms.py diff --git a/bash/setup_environment/requirements.txt b/bash/setup_environment/requirements.txt index 985a5527..22ce3477 100755 --- a/bash/setup_environment/requirements.txt +++ b/bash/setup_environment/requirements.txt @@ -10,5 +10,8 @@ rstcheck==3.3.* sphinx==4.5.* sphinx_rtd_theme==1.0.* myst_parser==0.17.* -sphinx_paramlinks -recommonmark==0.7.* \ No newline at end of file +sphinx_paramlinks==0.5.* +recommonmark==0.7.* +sphinxnotes-mock==1.0.0b0 # still a beta +sphinx-argparse==0.3.* # Using +docutils==0.17 \ No newline at end of file diff --git a/configs/datamodule/datamodule.yaml b/configs/datamodule/datamodule.yaml index 9c4c70ce..c36cd255 100755 --- a/configs/datamodule/datamodule.yaml +++ b/configs/datamodule/datamodule.yaml @@ -1,4 +1,4 @@ -_target_: lidar_multiclass.datamodules.datamodule.DataModule +_target_: lidar_multiclass.data.datamodule.DataModule prepared_data_dir: ${oc.env:PREPARED_DATA_DIR} diff --git a/configs/datamodule/dataset_description/20220204_BuildingValidation_and_Ground.yaml b/configs/datamodule/dataset_description/20220204_BuildingValidation_and_Ground.yaml index be397f42..3ddf5601 100644 --- a/configs/datamodule/dataset_description/20220204_BuildingValidation_and_Ground.yaml +++ b/configs/datamodule/dataset_description/20220204_BuildingValidation_and_Ground.yaml @@ -12,4 +12,4 @@ num_classes: 3 load_las_func: _target_: functools.partial _args_: - - "${get_method:lidar_multiclass.datamodules.data.FrenchLidarDataLogic.load_las}" \ No newline at end of file + - "${get_method:lidar_multiclass.data.loading.FrenchLidarDataLogic.load_las}" \ No newline at end of file diff --git a/configs/datamodule/dataset_description/SwissSURFACE3D_Building_class.yaml b/configs/datamodule/dataset_description/SwissSURFACE3D_Building_class.yaml index ff538881..fb8a5c83 100644 --- a/configs/datamodule/dataset_description/SwissSURFACE3D_Building_class.yaml +++ b/configs/datamodule/dataset_description/SwissSURFACE3D_Building_class.yaml @@ -12,4 +12,4 @@ num_classes: 2 load_las_func: _target_: functools.partial _args_: - - "${get_method:lidar_multiclass.datamodules.data.SwissTopoLidarDataLogic.load_las}" \ No newline at end of file + - "${get_method:lidar_multiclass.data.loading.SwissTopoLidarDataLogic.load_las}" \ No newline at end of file diff --git a/configs/datamodule/dataset_description/SwissSURFACE3D_all_6_classes.yaml b/configs/datamodule/dataset_description/SwissSURFACE3D_all_6_classes.yaml index d2fe2ea5..d27390ea 100644 --- a/configs/datamodule/dataset_description/SwissSURFACE3D_all_6_classes.yaml +++ b/configs/datamodule/dataset_description/SwissSURFACE3D_all_6_classes.yaml @@ -12,4 +12,4 @@ num_classes: 6 load_las_func: _target_: functools.partial _args_: - - "${get_method:lidar_multiclass.datamodules.data.SwissTopoLidarDataLogic.load_las}" \ No newline at end of file + - "${get_method:lidar_multiclass.data.loading.SwissTopoLidarDataLogic.load_las}" \ No newline at end of file diff --git a/configs/datamodule/subsampler/fps.yaml b/configs/datamodule/subsampler/fps.yaml index b5d2eac8..b811ec5a 100644 --- a/configs/datamodule/subsampler/fps.yaml +++ b/configs/datamodule/subsampler/fps.yaml @@ -1,2 +1,2 @@ -_target_: lidar_multiclass.datamodules.transforms.FPSSampler +_target_: lidar_multiclass.data.transforms.FPSSampler subsample_size: 12500 diff --git a/configs/datamodule/subsampler/grid.yaml b/configs/datamodule/subsampler/grid.yaml index 35ab35cf..8e66f66e 100644 --- a/configs/datamodule/subsampler/grid.yaml +++ b/configs/datamodule/subsampler/grid.yaml @@ -1,3 +1,3 @@ -_target_: lidar_multiclass.datamodules.transforms.CustomGridSampler +_target_: lidar_multiclass.data.transforms.CustomGridSampler subsample_size: 12500 voxel_size: 0.25 diff --git a/configs/datamodule/subsampler/random.yaml b/configs/datamodule/subsampler/random.yaml index 8ed6dfa8..8dd4d7d4 100644 --- a/configs/datamodule/subsampler/random.yaml +++ b/configs/datamodule/subsampler/random.yaml @@ -1,2 +1,2 @@ -_target_: lidar_multiclass.datamodules.transforms.RandomSampler +_target_: lidar_multiclass.data.transforms.RandomSampler subsample_size: 12500 \ No newline at end of file diff --git a/lidar_multiclass/callbacks/logging_callbacks.py b/lidar_multiclass/callbacks/logging_callbacks.py index 92770065..3b9cd522 100644 --- a/lidar_multiclass/callbacks/logging_callbacks.py +++ b/lidar_multiclass/callbacks/logging_callbacks.py @@ -6,7 +6,7 @@ import torch from torchmetrics import JaccardIndex from torchmetrics.functional.classification.jaccard import _jaccard_from_confmat -from lidar_multiclass.datamodules.interpolation import Interpolator +from lidar_multiclass.models.interpolation import Interpolator from lidar_multiclass.utils import utils log = utils.get_logger(__name__) diff --git a/lidar_multiclass/datamodules/__init__.py b/lidar_multiclass/datamodules/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/lidar_multiclass/datamodules/data.py b/lidar_multiclass/datamodules/data.py deleted file mode 100755 index 1ee83a21..00000000 --- a/lidar_multiclass/datamodules/data.py +++ /dev/null @@ -1,397 +0,0 @@ -"""This module contains - -1) Data loading logics specific to each data format. - The "load_las" class method can be passed to the datamodule at inference time. -2) A data preparation script for deep learning training. - From a data directory containing point cloud in LAS format, and a scv specifying the dataset - train/val/test split for each file (columns: split, basename, example: "val","123_456.las"), - split the dataset, chunk the point cloud tiles into smaller subtiles, and prepare each sample - as a pytorch geometric Data object. - - Echo numbers and colors are scaled to be in 0-1 range. Intensity and average color - are not scaled b/c they are expected to be standardized later. - -To show help, run - cd lidar_multiclass/datamodules/ - python prepare_french_lidar.py -h - -""" - -from abc import ABC, abstractmethod -import argparse -import os, glob -import os.path as osp -from shutil import copyfile -from tqdm import tqdm -import laspy -import numpy as np -import pandas as pd -import torch -from torch_geometric.data import Data - - -class LidarDataLogic(ABC): - """Abstract class to load, chunk, and save a point cloud dataset according to a train/val/test split. - load_las and its needed parameters ares specified in child classes. - - """ - - split = ["val", "train", "test"] - input_tile_width_meters = 1000 - subtile_width_meters = 50 - return_num_normalization_max_value = 7 - - def __init__(self, **kwargs): - self.input_data_dir = kwargs.get("input_data_dir") - self.prepared_data_dir = kwargs.get("prepared_data_dir") - self.split_csv = kwargs.get("split_csv") - self.range_by_axis = np.arange( - self.input_tile_width_meters // self.subtile_width_meters + 1 - ) - - @abstractmethod - def load_las(self, las_filepath: str) -> None: - """Load a point cloud in LAS format to memory and turn it into torch-geometric Data object. - - Args: - las_filepath (str): path to the LAS file. - - Returns: - Data: the point cloud formatted for later deep learning training. - """ - raise NotImplementedError - - def prepare(self): - """Prepare a dataset for model training and model evaluation. - - Iterates through LAS files listed in a csv metadata file. A `split` column - specifies the train/val/test split of the dataset to be created. - Depending on the set, this method will: - - train/val: - Load LAS into memory as a Data object with selected features, - then iteratively extract 50m*50m subtiles by filtering along x - then y axis. Serialize the resulting Data object using torch.save. - - test: - Simply copy the LAS to the new test folder. - - """ - split_df = pd.read_csv(self.split_csv) - for phase in tqdm(self.split, desc="Phases"): - basenames = split_df[split_df.split == phase].basename.tolist() - print(f"Subset: {phase}") - print(" - ".join(basenames)) - for file_basename in tqdm(basenames, desc="Files"): - filepath = self._find_file_in_dir(self.input_data_dir, file_basename) - output_subdir_path = osp.join(self.prepared_data_dir, phase) - if phase == "test": - os.makedirs(output_subdir_path, exist_ok=True) - target_file = osp.join(output_subdir_path, file_basename) - copyfile(filepath, target_file) - elif phase in ["train", "val"]: - output_subdir_path = osp.join( - output_subdir_path, osp.basename(filepath) - ) - os.makedirs(output_subdir_path, exist_ok=True) - self.split_and_save(filepath, output_subdir_path) - else: - raise KeyError("Phase should be one of train/val/test.") - - def split_and_save(self, filepath: str, output_subdir_path: str) -> None: - """Parse a LAS, extract and save each subtile as a Data object. - - Args: - filepath (str): input LAS file - output_subdir_path (str): output directory to save splitted `.data` objects. - """ - data = self.load_las(filepath) - idx = 0 - for _ in tqdm(self.range_by_axis): - if len(data.pos) == 0: - break - data_x_band = self._extract_by_x(data) - for _ in self.range_by_axis: - if len(data_x_band.pos) == 0: - break - subtile_data = self._extract_by_y(data_x_band) - self._save(subtile_data, output_subdir_path, idx) - idx += 1 - - def _find_file_in_dir(self, input_data_dir: str, basename: str) -> str: - """Query files with .las extension in subfolder of input_data_dir. - - Args: - input_data_dir (str): data directory - - Returns: - [str]: first file path matching the query. - - """ - query = f"{input_data_dir}*{basename}" - files = glob.glob(query) - return files[0] - - def _extract_by_axis(self, data: Data, axis=0) -> Data: - """Filter a data object on a chosen axis, using a relative position . - Modifies the original data object so that extracted future filters are faster. - - Args: - data (Data): a pyg Data object with pos, x, and y attributes. - relative_pos (int): where the data to extract start on chosen axis (typically in range 0-1000) - axis (int, optional): 0 for x and 1 for y axis. Defaults to 0. - - Returns: - Data: the data that is at most subtile_width_meters above relative_pos on the chosen axis. - """ - sub_tile_data = data.clone() - pos_axis = sub_tile_data.pos[:, axis] - absolute_low = pos_axis.min(0) - absolute_high = absolute_low + self.subtile_width_meters - mask = (absolute_low <= pos_axis) & (pos_axis <= absolute_high) - - # select - sub_tile_data.pos = sub_tile_data.pos[mask] - sub_tile_data.x = sub_tile_data.x[mask] - sub_tile_data.y = sub_tile_data.y[mask] - - data.pos = data.pos[~mask] - data.x = data.x[~mask] - data.y = data.y[~mask] - return sub_tile_data - - def _extract_by_x(self, data: Data) -> Data: - """extract_by_axis applied on first axis x""" - return self._extract_by_axis(data, axis=0) - - def _extract_by_y(self, data: Data) -> Data: - """extract_by_axis applied on second axis y""" - return self._extract_by_axis(data, axis=1) - - def _save(self, subtile_data: Data, output_subdir_path: str, idx: int) -> None: - """Save the subtile data object with torch. - - Args: - subtile_data (Data): the object to save. - output_subdir_path (str): the subfolder to save it. - idx (int): an arbitrary but unique subtile identifier. - """ - subtile_save_path = osp.join(output_subdir_path, f"{str(idx).zfill(4)}.data") - torch.save(subtile_data, subtile_save_path) - - -class FrenchLidarDataLogic(LidarDataLogic): - - x_features_names = [ - "intensity", - "return_num", - "num_returns", - "red", - "green", - "blue", - "nir", - "rgb_avg", - "ndvi", - ] - colors_normalization_max_value = 255 * 256 - - @classmethod - def load_las(self, las_filepath: str): - """Load a point cloud in LAS format to memory and turn it into torch-geometric Data object. - Build a composite (average) color channel on the fly. - Calculate NDVI on the fly. - - Args: - las_filepath (str): path to the LAS file. - - Returns: - Data: the point cloud formatted for later deep learning training. - - """ - las = laspy.read(las_filepath) - pos = np.asarray([las.x, las.y, las.z], dtype=np.float32).transpose() - - x = np.asarray( - [ - las[x_name] - for x_name in [ - "intensity", - "return_num", - "num_returns", - "red", - "green", - "blue", - "nir", - ] - ], - dtype=np.float32, - ).transpose() - - return_num_idx = self.x_features_names.index("return_num") - occluded_points = x[:, return_num_idx] > 1 - - x[:, return_num_idx] = (x[:, return_num_idx]) / ( - self.return_num_normalization_max_value - ) - num_return_idx = self.x_features_names.index("num_returns") - x[:, num_return_idx] = (x[:, num_return_idx]) / ( - self.return_num_normalization_max_value - ) - - for idx, c in enumerate(self.x_features_names): - if c in ["red", "green", "blue", "nir"]: - assert x[:, idx].max() <= self.colors_normalization_max_value - x[:, idx] = x[:, idx] / self.colors_normalization_max_value - x[occluded_points, idx] = 0 - - red = x[:, self.x_features_names.index("red")] - green = x[:, self.x_features_names.index("green")] - blue = x[:, self.x_features_names.index("blue")] - - rgb_avg = np.asarray([red, green, blue], dtype=np.float32).mean(axis=0) - - nir = x[:, self.x_features_names.index("nir")] - ndvi = (nir - red) / (nir + red + 10**-6) - x = np.concatenate([x, rgb_avg[:, None], ndvi[:, None]], axis=1) - - try: - # for LAS format V1.2 - y = las.classification.array.astype(int) - except: - # for LAS format V1.4 - y = las.classification.astype(int) - - return Data( - pos=pos, - x=x, - y=y, - las_filepath=las_filepath, - x_features_names=self.x_features_names, - ) - - -class SwissTopoLidarDataLogic(LidarDataLogic): - x_features_names = [ - "intensity", - "return_num", - "num_returns", - "red", - "green", - "blue", - "rgb_avg", - ] - colors_normalization_max_value = 256 - - @classmethod - def load_las(self, las_filepath: str) -> Data: - """Load a point cloud in LAS format to memory and turn it into torch-geometric Data object. - Build a composite (average) color channel on the fly. - - Args: - las_filepath (str): path to the LAS file. - - Returns: - Data: the point cloud formatted for later deep learning training. - - """ - las = laspy.read(las_filepath) - pos = np.asarray([las.x, las.y, las.z], dtype=np.float32).transpose() - - x = np.asarray( - [ - las[x_name] - for x_name in [ - "intensity", - "return_num", - "num_returns", - "red", - "green", - "blue", - ] - ], - dtype=np.float32, - ).transpose() - - return_num_idx = self.x_features_names.index("return_num") - occluded_points = x[:, return_num_idx] > 1 - - x[:, return_num_idx] = (x[:, return_num_idx]) / ( - self.return_num_normalization_max_value - ) - num_return_idx = self.x_features_names.index("num_returns") - x[:, num_return_idx] = (x[:, num_return_idx]) / ( - self.return_num_normalization_max_value - ) - - for idx, c in enumerate(self.x_features_names): - if c in ["red", "green", "blue"]: - assert x[:, idx].max() <= self.colors_normalization_max_value - x[:, idx] = x[:, idx] / self.colors_normalization_max_value - x[occluded_points, idx] = 0 - - rgb_avg = ( - np.asarray( - [las[x_name] for x_name in ["red", "green", "blue"]], dtype=np.float32 - ) - .transpose() - .mean(axis=1, keepdims=True) - ) - - x = np.concatenate([x, rgb_avg], axis=1) - - try: - # for LAS format V1.2 - y = las.classification.array.astype(int) - except: - # for LAS format V1.4 - y = las.classification.astype(int) - - return Data( - pos=pos, - x=x, - y=y, - las_filepath=las_filepath, - x_features_names=self.x_features_names, - ) - - -def main(): - """Main logic to prepare a new set of Lidar tiles for model training.""" - - parser = argparse.ArgumentParser( - description="Prepare a Lidar dataset for deep learning." - ) - parser.add_argument( - "--split_csv", - type=str, - default="./split.csv", - help="Path to csv with a basename (e.g. '123_456.las') and split (train/val/test) columns specifying the dataset split.", - ) - parser.add_argument( - "--input_data_dir", - type=str, - default="./data/raw/", - help="Path to folder with las files stored in train/val/test subfolders.", - ) - parser.add_argument( - "--prepared_data_dir", - type=str, - default="./prepared/", - help="Path to folder to save Data object train/val/test subfolders.", - ) - parser.add_argument( - "--origin", - type=str, - default="FR", - ) - args = parser.parse_args() - if args.origin == "FR": - data_prepper = FrenchLidarDataLogic(**args.__dict__) - data_prepper.prepare() - if args.origin == "CH": - data_prepper = SwissTopoLidarDataLogic(**args.__dict__) - data_prepper.prepare() - - -if __name__ == "__main__": - main() diff --git a/lidar_multiclass/datamodules/datamodule.py b/lidar_multiclass/datamodules/datamodule.py deleted file mode 100755 index 3770989f..00000000 --- a/lidar_multiclass/datamodules/datamodule.py +++ /dev/null @@ -1,313 +0,0 @@ -import os.path as osp -import glob -import time -import numpy as np -from typing import Optional, List, AnyStr -from numbers import Number -from pytorch_lightning import LightningDataModule -from torch.utils.data import DataLoader, Dataset -from torch.utils.data.dataset import IterableDataset -from torch_geometric.transforms import RandomFlip -from torch_geometric.data.data import Data -from torch_geometric.transforms.center import Center -from lidar_multiclass.utils import utils -from lidar_multiclass.datamodules.transforms import * - -from lidar_multiclass.utils import utils - -log = utils.get_logger(__name__) - - -class DataModule(LightningDataModule): - """ - Datamdule to feed train and validation data to the model. - We use a custome sampler during training to consecutively load several - subtiles from a single tile, to reduce the I/O footprint. - """ - - def __init__(self, **kwargs): - super().__init__() - # TODO: try to use save_hyperparameters to lightne this code. - self.prepared_data_dir = kwargs.get("prepared_data_dir") - - self.num_workers = kwargs.get("num_workers", 0) - - self.subtile_width_meters = kwargs.get("subtile_width_meters", 50) - self.subtile_overlap = kwargs.get("subtile_overlap", 0) - self.batch_size = kwargs.get("batch_size", 32) - self.augment = kwargs.get("augment", True) - self.subsampler = kwargs.get("subsampler") - - self.dataset_description = kwargs.get("dataset_description") - self.classification_dict = self.dataset_description.get("classification_dict") - self.classification_preprocessing_dict = self.dataset_description.get( - "classification_preprocessing_dict" - ) - - self.train_data: Optional[Dataset] = None - self.val_data: Optional[Dataset] = None - self.test_data: Optional[Dataset] = None - self.predict_data: Optional[Dataset] = None - - self.load_las = self.dataset_description.get("load_las_func") - self._set_all_transforms() - - def setup(self, stage: Optional[str] = None): - """ - Load data. Set variables: self.data_train, self.data_val, self.data_test. - test_data = val data, because we only use all validation data after training. - Test data can be used but only after final model is chosen. - """ - if stage == "fit" or stage is None: - self._set_train_data() - self._set_val_data() - - if stage == "test" or stage is None: - self._set_test_data() - - def _set_train_data(self): - """Get the train dataset""" - files = glob.glob( - osp.join(self.prepared_data_dir, "train", "**", "*.data"), recursive=True - ) - self.train_data = LidarMapDataset( - files, - loading_function=torch.load, - transform=self._get_train_transforms(), - target_transform=TargetTransform( - self.classification_preprocessing_dict, - self.classification_dict, - ), - ) - - def _set_val_data(self): - """Get the validation dataset""" - files = glob.glob( - osp.join(self.prepared_data_dir, "val", "**", "*.data"), recursive=True - ) - log.info(f"Validation on {len(files)} subtiles.") - self.val_data = LidarMapDataset( - files, - loading_function=torch.load, - transform=self._get_val_transforms(), - target_transform=TargetTransform( - self.classification_preprocessing_dict, - self.classification_dict, - ), - ) - - def _set_test_data(self): - """Get the test dataset. User need to explicitely require the use of test set, which is kept out of experiment until the end.""" - files = glob.glob( - osp.join(self.prepared_data_dir, "test", "**", "*.las"), recursive=True - ) - self.test_data = LidarIterableDataset( - files, - loading_function=self.load_las, - transform=self._get_test_transforms(), - target_transform=TargetTransform( - self.classification_preprocessing_dict, self.classification_dict - ), - subtile_width_meters=self.subtile_width_meters, - subtile_overlap=self.subtile_overlap, - ) - - def _set_predict_data(self, files: List[str]): - """This is used in predict.py, with a single file in a list.""" - self.predict_data = LidarIterableDataset( - files, - loading_function=self.load_las, - transform=self._get_predict_transforms(), - target_transform=None, - subtile_width_meters=self.subtile_width_meters, - subtile_overlap=self.subtile_overlap, - ) - - def train_dataloader(self): - """Get train dataloader.""" - return DataLoader( - dataset=self.train_data, - batch_size=self.batch_size, - shuffle=True, - num_workers=self.num_workers, - collate_fn=collate_fn, - prefetch_factor=1, - ) - - def val_dataloader(self): - """Get val dataloader.""" - return DataLoader( - dataset=self.val_data, - batch_size=self.batch_size, - shuffle=False, - num_workers=self.num_workers, - collate_fn=collate_fn, - prefetch_factor=1, - ) - - def test_dataloader(self): - """Get test dataloader.""" - return DataLoader( - dataset=self.test_data, - batch_size=self.batch_size, - shuffle=False, - num_workers=1, - collate_fn=collate_fn, - prefetch_factor=1, - ) - - def predict_dataloader(self): - return DataLoader( - dataset=self.predict_data, - batch_size=self.batch_size, - shuffle=False, - num_workers=1, # b/c terable dataloader - collate_fn=collate_fn, - prefetch_factor=1, - ) - - def _set_all_transforms(self): - """ - Set transforms that are shared between train/val-test. - Called at initialization. - """ - - self.preparation = [ - EmptySubtileFilter(), - ToTensor(), - MakeCopyOfPosAndY(), - self.subsampler, - MakeCopyOfSampledPos(), - Center(), - ] - self.augmentation = [] - if self.augment: - self.augmentation = [RandomFlip(0, p=0.5), RandomFlip(1, p=0.5)] - self.normalization = [NormalizePos(), StandardizeFeatures()] - - def _get_train_transforms(self) -> CustomCompose: - """Create a transform composition for train phase.""" - return CustomCompose(self.preparation + self.augmentation + self.normalization) - - def _get_val_transforms(self) -> CustomCompose: - """Create a transform composition for val phase.""" - return CustomCompose(self.preparation + self.normalization) - - def _get_test_transforms(self) -> CustomCompose: - """Create a transform composition for test phase.""" - return self._get_val_transforms() - - def _get_predict_transforms(self) -> CustomCompose: - """Create a transform composition for predict phase.""" - return self._get_val_transforms() - - -class LidarMapDataset(Dataset): - def __init__( - self, - files: List[str], - loading_function=None, - transform=None, - target_transform=None, - ): - self.files = files - self.num_files = len(self.files) - - self.loading_function = loading_function - self.transform = transform - self.target_transform = target_transform - - def __getitem__(self, idx): - """Load a subtile and apply the transforms specified in datamodule.""" - filepath = self.files[idx] - - data = self.loading_function(filepath) - if self.transform: - data = self.transform(data) - if data is None: - return None - if self.target_transform: - data = self.target_transform(data) - - return data - - def __len__(self): - return self.num_files - - -class LidarIterableDataset(IterableDataset): - def __init__( - self, - files, - loading_function=None, - transform=None, - target_transform=None, - subtile_width_meters: Number = 50, - subtile_overlap: Number = 0, - ): - self.files = files - self.loading_function = loading_function - self.transform = transform - self.target_transform = target_transform - self.subtile_width_meters = subtile_width_meters - self.subtile_overlap = subtile_overlap - - def yield_transformed_subtile_data(self): - """Yield subtiles from all tiles in an exhaustive fashion.""" - - for idx, filepath in enumerate(self.files): - log.info(f"Parsing file {idx+1}/{len(self.files)} [{filepath}]") - tile_data = self.loading_function(filepath) - centers = self.get_all_subtiles_xy_min_corner(tile_data) - # TODO: change to process time function - ts = time.time() - for xy_min_corner in centers: - data = self.extract_subtile_from_tile_data(tile_data, xy_min_corner) - if self.transform: - data = self.transform(data) - if data is not None: - if self.target_transform: - data = self.target_transform(data) - yield data - - def __iter__(self): - return self.yield_transformed_subtile_data() - - def get_all_subtiles_xy_min_corner(self, data: Data): - """Get centers of square subtiles of specified width, assuming rectangular form of input cloud.""" - - low = data.pos[:, :2].min(0) - high = data.pos[:, :2].max(0) - xy_min_corners = [ - np.array([x, y]) - for x in np.arange( - start=low[0], - stop=high[0] + 1, - step=self.subtile_width_meters - self.subtile_overlap, - ) - for y in np.arange( - start=low[1], - stop=high[1] + 1, - step=self.subtile_width_meters - self.subtile_overlap, - ) - ] - # random.shuffle(centers) - return xy_min_corners - - def extract_subtile_from_tile_data(self, data: Data, low_xy): - """Extract the subset from xy_min_corner to xy_min_corner + self.subtile_width_meters - - Args: - tile_data (Data): The full tile data. - xy_min_corner (np.array): Coordonates of xy min corner of subtile to extract. - """ - high_xy = low_xy + self.subtile_width_meters - mask_x = (low_xy[0] <= data.pos[:, 0]) & (data.pos[:, 0] <= high_xy[0]) - mask_y = (low_xy[1] <= data.pos[:, 1]) & (data.pos[:, 1] <= high_xy[1]) - mask = mask_x & mask_y - - sub = data.clone() - sub.pos = sub.pos[mask] - sub.x = sub.x[mask] - sub.y = sub.y[mask] - return sub diff --git a/lidar_multiclass/datamodules/transforms.py b/lidar_multiclass/datamodules/transforms.py deleted file mode 100755 index 23fb1790..00000000 --- a/lidar_multiclass/datamodules/transforms.py +++ /dev/null @@ -1,333 +0,0 @@ -import math -from enum import Enum -from numbers import Number -from typing import Callable, Dict, List, Tuple - -import numpy as np -import torch -import torch_geometric -from torch_geometric.data import Batch, Data -from torch_geometric.transforms import BaseTransform -from torch_geometric.nn.pool import fps -from torch_scatter import scatter_add, scatter_mean -import torch.nn.functional as F -from lidar_multiclass.utils import utils - -log = utils.get_logger(__name__) - - -class ChannelNames(Enum): - """Names of custom additional LAS channel.""" - - PredictedClassification = "PredictedClassification" - ProbasEntropy = "entropy" - - -class CustomCompose(BaseTransform): - """ - Composes several transforms together. - Edited to bypass downstream transforms if None is returned by a transform. - - Args: - transforms (List[Callable]): List of transforms to compose. - - """ - - def __init__(self, transforms: List[Callable]): - self.transforms = transforms - - def __call__(self, data): - for transform in self.transforms: - if isinstance(data, (list, tuple)): - data = [transform(d) for d in data] - data = filter(lambda x: x is not None, data) - else: - data = transform(data) - if data is None: - return None - return data - - -class EmptySubtileFilter(BaseTransform): - """Filter out almost empty subtiles""" - - def __call__(self, data: Data, min_num_points_subtile: int = 50): - if len(data["x"]) < min_num_points_subtile: - return None - return data - - -class ToTensor(BaseTransform): - """Turn np.arrays specified by their keys into Tensor.""" - - def __init__(self, keys=["pos", "x", "y"]): - self.keys = keys - - def __call__(self, data: Data): - for key in data.keys: - if key in self.keys: - data[key] = torch.from_numpy(data[key]) - return data - - -class MakeCopyOfPosAndY(BaseTransform): - """Make a copy of the full cloud's positions and labels, for inference interpolation.""" - - def __call__(self, data: Data): - data["pos_copy"] = data["pos"].clone() - data["y_copy"] = data["y"].clone() - return data - - -class Subsampler(BaseTransform): - """Base class for custom cloud subsampler to inherit from. - - Subsampling to a unique size is needed for batching clouds with different initial size. - Subclasses are modified from https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/transforms/, - to preserve specific attributes of the data for inference interpolation - - """ - - sampling_keys: Tuple[str] = ("x", "pos", "y") - - def _call_(self, data: Data): - raise NotImplementedError("Use a non-abstract subsampler class instead.") - - -class RandomSampler(Subsampler): - """Samples a fixed number of points from a point cloud, randomly.""" - - def __init__(self, subsample_size: int = 12500): - self.subsample_size = subsample_size - - def __call__(self, data: Data): - num_nodes = data.num_nodes - choice = torch.cat( - [ - torch.randperm(num_nodes) - for _ in range(math.ceil(self.subsample_size / num_nodes)) - ], - dim=0, - )[: self.subsample_size] - - for key in self.sampling_keys: - data[key] = data[key][choice] - - return data - - -class FPSSampler(Subsampler): - """ - Samples a fixed number of points from a point cloud, using Fartest Point Sampling. - - In our experiments, FPS is slower by an order of magnitude than Random/Grid sampling, and yields worst results. - - See https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html?highlight=fps#torch_geometric.nn.pool.fps - - """ - - def __init__(self, subsample_size: int = 12500): - self.subsample_size = subsample_size - self.rs = RandomSampler(subsample_size=subsample_size) - - def __call__(self, data: Data): - num_nodes = data.num_nodes - # Random sampling if we are short in points - if num_nodes < self.subsample_size: - return self.rs(data) - - # Else, use Farthest Point Sampling - ratio = (self.subsample_size / num_nodes) + 0.01 - choice = fps(data.pos, ratio=ratio, random_start=False) - choice = choice[: self.subsample_size] - for key in self.sampling_keys: - data[key] = data[key][choice] - return data - - -class CustomGridSampler(Subsampler): - """Samples a point cloud, using a voxel grid. - - A final random sampling is then needed to have a fixed number of points. - See https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/transforms/grid_sampling.html#GridSampling - - """ - - def __init__(self, subsample_size: int = 12500, voxel_size: Number = 0.25): - self.subsample_size = subsample_size - self.rs = RandomSampler(subsample_size=subsample_size) - self.voxel_size = voxel_size - - def __call__(self, data: Data) -> Data: - num_nodes = data.num_nodes - - # Random sampling if we are short in points - if num_nodes < self.subsample_size: - return self.rs(data) - - batch = data.get("batch", None) - - c = torch_geometric.nn.voxel_grid(data.pos, self.voxel_size, batch, None, None) - c, perm = torch_geometric.nn.pool.consecutive.consecutive_cluster(c) - - for key in self.sampling_keys: - item = data[key] - if torch.is_tensor(item) and item.size(0) == num_nodes: - if key == "y": - item = F.one_hot(item) - item = scatter_add(item, c, dim=0) - data[key] = item.argmax(dim=-1) - elif key == "batch": - data[key] = item[perm] - else: - data[key] = scatter_mean(item, c, dim=0) - # Up or downsample to get to subsample_size - data = self.rs(data) - return data - - -class MakeCopyOfSampledPos(BaseTransform): - """Make a copy of the unormalized positions of subsampled points.""" - - def __call__(self, data: Data): - data["pos_copy_subsampled"] = data["pos"].clone() - return data - - -class StandardizeFeatures(BaseTransform): - """Scale features in 0-1 range. - Additionnaly : use reserved -0.75 value for occluded points colors(normal range is -0.5 to 0.5). - - """ - - def __call__(self, data: Data): - idx = data.x_features_names.index("intensity") - data.x[:, idx] = self._log(data.x[:, idx], shift=1) - data.x[:, idx] = self._standardize_channel(data.x[:, idx]) - idx = data.x_features_names.index("rgb_avg") - data.x[:, idx] = self._standardize_channel(data.x[:, idx]) - return data - - def _log(self, channel_data, shift: float = 0.0): - return torch.log(channel_data + shift) - - def _standardize_channel(self, channel_data: torch.Tensor, clamp_sigma: int = 3): - """Sample-wise standardization y* = (y-y_mean)/y_std""" - mean = channel_data.mean() - std = channel_data.std() + 10**-6 - standard = (channel_data - mean) / std - clamp = clamp_sigma * std - clamped = torch.clamp(input=standard, min=-clamp, max=clamp) - return clamped - - -class NormalizePos(BaseTransform): - """ - Normalizes positions: - - xy positions to be in the interval (-1, 1) - - z position to start at 0. - - preserve euclidian distances - - XYZ are expected to be centered already. - - """ - - def __call__(self, data): - xy_positive_amplitude = data.pos[:, :2].abs().max() - xy_scale = (1 / xy_positive_amplitude) * 0.999999 - data.pos[:, :2] = data.pos[:, :2] * xy_scale - data.pos[:, 2] = (data.pos[:, 2] - data.pos[:, 2].min()) * xy_scale - - return data - - def __repr__(self): - return "{}()".format(self.__class__.__name__) - - -class TargetTransform(BaseTransform): - """ - Make target vector based on input classification dictionnary. - - Example: - Source : y = [6,6,17,9,1] - Pre-processed: - - classification_preprocessing_dict = {17:1, 9:1} - - y' = [6,6,1,1,1] - Mapped to consecutive integers: - - classification_dict = {1:"unclassified", 6:"building"} - - y'' = [1,1,0,0,0] - - """ - - def __init__( - self, - classification_preprocessing_dict: Dict[int, int], - classification_dict: Dict[int, str], - ): - - self._set_preprocessing_mapper(classification_preprocessing_dict) - self._set_mapper(classification_dict) - - def __call__(self, data: Data): - data.y = self.transform(data.y) - data.y_copy = self.transform(data.y_copy) - return data - - def transform(self, y): - y = self.preprocessing_mapper(y) - y = self.mapper(y) - return torch.LongTensor(y) - - def _set_preprocessing_mapper(self, classification_preprocessing_dict): - """Set mapper from source classification code to another code.""" - d = {key: value for key, value in classification_preprocessing_dict.items()} - self.preprocessing_mapper = np.vectorize( - lambda class_code: d.get(class_code, class_code) - ) - - def _set_mapper(self, classification_dict): - """Set mapper from source classification code to consecutive integers.""" - d = { - class_code: class_index - for class_index, class_code in enumerate(classification_dict.keys()) - } - self.mapper = np.vectorize(lambda class_code: d.get(class_code)) - - -def collate_fn(data_list: List[Data]) -> Batch: - """ - Batch Data objects from a list, to be used in DataLoader. Modified from: - https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/loader/dense_data_loader.html?highlight=collate_fn - - """ - batch = Batch() - data_list = list(filter(lambda x: x is not None, data_list)) - - # 1: add everything as list of non-Tensor object to facilitate adding new attributes. - for key in data_list[0].keys: - batch[key] = [data[key] for data in data_list] - - # 2: define relevant Tensor in long PyG format. - keys_to_long_format = ["pos", "x", "y", "pos_copy", "pos_copy_subsampled", "y_copy"] - for key in keys_to_long_format: - batch[key] = torch.cat([data[key] for data in data_list]) - - # 3. Create a batch index - batch.batch_x = torch.from_numpy( - np.concatenate( - [ - np.full(shape=len(data["y"]), fill_value=i) - for i, data in enumerate(data_list) - ] - ) - ) - batch.batch_y = torch.from_numpy( - np.concatenate( - [ - np.full(shape=len(data["pos_copy"]), fill_value=i) - for i, data in enumerate(data_list) - ] - ) - ) - batch.batch_size = len(data_list) - return batch diff --git a/lidar_multiclass/models/interpolation.py b/lidar_multiclass/models/interpolation.py index 3e3efa3b..72107cd4 100644 --- a/lidar_multiclass/models/interpolation.py +++ b/lidar_multiclass/models/interpolation.py @@ -1,5 +1,6 @@ +"""How we turn from prediction made on a subsampled subset of a Las to a complete point cloud.""" + import os -from tokenize import Number from typing import Dict, List, Optional, Literal, Union import pdal @@ -11,7 +12,7 @@ from lidar_multiclass.utils import utils from torch.distributions import Categorical -from lidar_multiclass.datamodules.transforms import ChannelNames +from lidar_multiclass.data.transforms import ChannelNames log = utils.get_logger(__name__) diff --git a/lidar_multiclass/predict.py b/lidar_multiclass/predict.py index 63d21c01..92d6d16b 100644 --- a/lidar_multiclass/predict.py +++ b/lidar_multiclass/predict.py @@ -2,12 +2,11 @@ import hydra import torch from omegaconf import DictConfig, OmegaConf -from typing import Optional from pytorch_lightning import LightningDataModule, LightningModule from tqdm import tqdm from lidar_multiclass.utils import utils -from lidar_multiclass.datamodules.interpolation import Interpolator +from lidar_multiclass.models.interpolation import Interpolator log = utils.get_logger(__name__) From 614400f1ec26b22962dde0293ef457dae06f2ec8 Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Tue, 5 Apr 2022 11:04:21 +0200 Subject: [PATCH 4/8] add todo --- docs/source/apidoc/lidar_multiclass.model.rst | 6 ++++++ docs/source/index.rst | 1 + 2 files changed, 7 insertions(+) diff --git a/docs/source/apidoc/lidar_multiclass.model.rst b/docs/source/apidoc/lidar_multiclass.model.rst index 014ec8da..67e1ffed 100644 --- a/docs/source/apidoc/lidar_multiclass.model.rst +++ b/docs/source/apidoc/lidar_multiclass.model.rst @@ -5,4 +5,10 @@ Model ------------------------------------- .. automodule:: lidar_multiclass.models.model + :members: + +Interpolation +------------------------------------- + +.. automodule:: lidar_multiclass.models.interpolation :members: \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 813a4241..ace6711a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -29,6 +29,7 @@ Lidar-Deep-Segmentation > Documentation background/interpolation background/data_optimization +.. TODO: assure that all dosctrings are in third-personn mode. .. toctree:: :maxdepth: 1 From 60a0c8ee888c1797e216c3cde3e8e25ebe0fdd13 Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Tue, 5 Apr 2022 11:23:20 +0200 Subject: [PATCH 5/8] Add try clause to comet imports --- docs/requirements.txt | 5 +++-- docs/source/apidoc/configs.rst | 10 ++++++++++ docs/source/conf.py | 1 + docs/source/index.rst | 1 + lidar_multiclass/callbacks/comet_callbacks.py | 9 ++++++++- lidar_multiclass/train.py | 9 ++++++++- 6 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 docs/source/apidoc/configs.rst diff --git a/docs/requirements.txt b/docs/requirements.txt index 220eea6b..2fd2aab1 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,6 +3,9 @@ setuptools numpy numpydoc +tqdm +hydra-core +rich sphinx==4.5.* sphinx_rtd_theme==1.0.* @@ -13,8 +16,6 @@ sphinxnotes-mock==1.0.0b0 # still a beta sphinx-argparse==0.3.* # Using docutils==0.17 -hydra-core -rich comet_ml torch==1.10.1 diff --git a/docs/source/apidoc/configs.rst b/docs/source/apidoc/configs.rst new file mode 100644 index 00000000..d4f35557 --- /dev/null +++ b/docs/source/apidoc/configs.rst @@ -0,0 +1,10 @@ +Configs +=============================== + +Configs are managed with `hydra`. Here, we show the default configuration at a glance. +Refer to source documentation files for more info on their definition. + +.. _hydra: https://hydra.cc/ +.. File apidoc/configs_concatenation.yml is created at documentation build. + +.. autoyaml:: apidoc/configs_concatenation.yml \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 95ae36d3..1f9edcbc 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,6 +52,7 @@ "myst_parser", # supports markdown syntax for doc pages "sphinx_paramlinks", # allow to reference params, which is done in pytorch_lightning "sphinxnotes.mock", # ignore third-parties directive suche as "testcode" - see "mock_directive" args below + "sphinxcontrib.autoyaml", # Autodocumentation of yaml files. ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/index.rst b/docs/source/index.rst index ace6711a..9b1fae95 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -41,6 +41,7 @@ Lidar-Deep-Segmentation > Documentation apidoc/lidar_multiclass.models.modules apidoc/lidar_multiclass.callbacks apidoc/lidar_multiclass.utils + apidoc/configs Indices and Tables diff --git a/lidar_multiclass/callbacks/comet_callbacks.py b/lidar_multiclass/callbacks/comet_callbacks.py index 061f5655..bc2d0ef0 100755 --- a/lidar_multiclass/callbacks/comet_callbacks.py +++ b/lidar_multiclass/callbacks/comet_callbacks.py @@ -1,5 +1,12 @@ +# It is safer to import comet before all other imports. +try: + import comet_ml +except: + print( + "Warning: package comet_ml not found. This may break things if you use a comet callback." + ) + import os -import comet_ml from pathlib import Path from pytorch_lightning import Callback, Trainer diff --git a/lidar_multiclass/train.py b/lidar_multiclass/train.py index 16b3f92b..63043ddd 100755 --- a/lidar_multiclass/train.py +++ b/lidar_multiclass/train.py @@ -1,6 +1,13 @@ +# It is safer to import comet before all other imports. +try: + import comet_ml +except: + print( + "Warning: package comet_ml not found. This may break things if you use a comet callback." + ) + import copy import os -import comet_ml from typing import List, Optional import hydra From 436c20bdbc7a12bd4593b644a28b34f8ffa831d2 Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Tue, 5 Apr 2022 11:23:39 +0200 Subject: [PATCH 6/8] Remove trash file --- =0.17 | 1 - 1 file changed, 1 deletion(-) delete mode 100644 =0.17 diff --git a/=0.17 b/=0.17 deleted file mode 100644 index d99133c1..00000000 --- a/=0.17 +++ /dev/null @@ -1 +0,0 @@ -Requirement already satisfied: docutils in /home/CGaydon/anaconda3/envs/sphinx_doc_req/lib/python3.9/site-packages (0.17) From cfc3815ef95e55ce6406b5ee0f70d111aa16b714 Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Tue, 5 Apr 2022 16:40:41 +0200 Subject: [PATCH 7/8] Include default config to doc --- bash/setup_environment/requirements.txt | 1 - docs/requirements.txt | 3 +- docs/source/apidoc/configs.rst | 11 +- docs/source/apidoc/default_config.yml | 136 ++++++++++++++++++ docs/source/apidoc/lidar_multiclass.utils.rst | 8 -- docs/source/conf.py | 21 ++- docs/source/index.rst | 3 +- 7 files changed, 161 insertions(+), 22 deletions(-) create mode 100644 docs/source/apidoc/default_config.yml diff --git a/bash/setup_environment/requirements.txt b/bash/setup_environment/requirements.txt index 22ce3477..c2e651f6 100755 --- a/bash/setup_environment/requirements.txt +++ b/bash/setup_environment/requirements.txt @@ -1,7 +1,6 @@ # --------- hydra --------- # hydra-core==1.1.0 hydra-colorlog==1.1.0 -optuna>=2.5 # --------- RST Linter --------- # rstcheck==3.3.* diff --git a/docs/requirements.txt b/docs/requirements.txt index 2fd2aab1..f5d72c89 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -4,7 +4,8 @@ setuptools numpy numpydoc tqdm -hydra-core +hydra-core==1.1.0 +hydra-colorlog==1.1.* rich sphinx==4.5.* diff --git a/docs/source/apidoc/configs.rst b/docs/source/apidoc/configs.rst index d4f35557..ae8e215e 100644 --- a/docs/source/apidoc/configs.rst +++ b/docs/source/apidoc/configs.rst @@ -1,10 +1,11 @@ -Configs +Default configuration =============================== -Configs are managed with `hydra`. Here, we show the default configuration at a glance. -Refer to source documentation files for more info on their definition. +Configurations are managed with `hydra`. Here, we show the default configuration at a glance. + +Refer to source configurations files in folder `configs` for more information. .. _hydra: https://hydra.cc/ -.. File apidoc/configs_concatenation.yml is created at documentation build. -.. autoyaml:: apidoc/configs_concatenation.yml \ No newline at end of file +.. literalinclude:: default_config.yml + :language: yaml diff --git a/docs/source/apidoc/default_config.yml b/docs/source/apidoc/default_config.yml new file mode 100644 index 00000000..adce4220 --- /dev/null +++ b/docs/source/apidoc/default_config.yml @@ -0,0 +1,136 @@ +seed: 12345 +work_dir: ${hydra:runtime.cwd} +debug: false +print_config: true +ignore_warnings: true +trainer: + _target_: pytorch_lightning.Trainer + gpus: 0 + min_epochs: 1 + max_epochs: 30 + log_every_n_steps: 1 + check_val_every_n_epoch: 1 + weights_summary: null + progress_bar_refresh_rate: 1 + auto_lr_find: false + overfit_batches: 1 + num_sanity_val_steps: 0 +datamodule: + dataset_description: + _convert_: all + classification_preprocessing_dict: + 59: 6 + 50: 1 + classification_dict: + 1: unclassified + 2: ground + 6: building + d_in: 12 + num_classes: 3 + load_las_func: + _target_: functools.partial + _args_: + - ${get_method:lidar_multiclass.data.loading.FrenchLidarDataLogic.load_las} + subsampler: + _target_: lidar_multiclass.data.transforms.CustomGridSampler + subsample_size: 12500 + voxel_size: 0.25 + _target_: lidar_multiclass.data.datamodule.DataModule + prepared_data_dir: ${oc.env:PREPARED_DATA_DIR} + num_workers: 1 + batch_size: 16 + subtile_width_meters: 50 + subtile_overlap: ${predict.subtile_overlap} + augment: false + subsample_size: 12500 +callbacks: + log_code: + _target_: lidar_multiclass.callbacks.comet_callbacks.LogCode + code_dir: ${work_dir}/lidar_multiclass + log_logs_dir: + _target_: lidar_multiclass.callbacks.comet_callbacks.LogLogsPath + lr_monitor: + _target_: pytorch_lightning.callbacks.LearningRateMonitor + logging_interval: step + log_momentum: true + log_iou_by_class: + _target_: lidar_multiclass.callbacks.logging_callbacks.LogIoUByClass + classification_dict: ${datamodule.dataset_description.classification_dict} + interpolator: + _target_: lidar_multiclass.models.interpolation.Interpolator + interpolation_k: ${predict.interpolation_k} + classification_dict: ${datamodule.dataset_description.classification_dict} + probas_to_save: ${predict.probas_to_save} + output_dir: null + model_checkpoint: + _target_: pytorch_lightning.callbacks.ModelCheckpoint + monitor: val/loss_epoch + mode: min + save_top_k: 1 + save_last: true + verbose: true + dirpath: checkpoints/ + filename: epoch_{epoch:03d} + auto_insert_metric_name: false + early_stopping: + _target_: pytorch_lightning.callbacks.EarlyStopping + monitor: val/loss_epoch + mode: min + patience: 6 + min_delta: 0 +model: + optimizer: + _target_: functools.partial + _args_: + - ${get_method:torch.optim.Adam} + lr: ${model.lr} + lr_scheduler: + _target_: functools.partial + _args_: + - ${get_method:torch.optim.lr_scheduler.ReduceLROnPlateau} + mode: min + factor: 0.5 + patience: 5 + cooldown: 0 + _target_: lidar_multiclass.models.model.Model + d_in: ${datamodule.dataset_description.d_in} + num_classes: ${datamodule.dataset_description.num_classes} + ckpt_path: null + neural_net_class_name: RandLANet + neural_net_hparams: + num_classes: ${model.num_classes} + d_in: ${model.d_in} + num_neighbors: 16 + decimation: 4 + dropout: 0.5 + iou: + _target_: functools.partial + _args_: + - ${get_method:torchmetrics.JaccardIndex} + - ${model.num_classes} + absent_score: 1.0 + criterion: + _target_: torch.nn.CrossEntropyLoss + label_smoothing: 0.0 + lr: 0.004566395347136576 + momentum: 0.9 + monitor: val/loss_epoch +logger: + comet: + _target_: pytorch_lightning.loggers.comet.CometLogger + api_key: ${oc.env:COMET_API_TOKEN} + workspace: ${oc.env:COMET_WORKSPACE} + project_name: ${oc.env:COMET_PROJECT_NAME} + experiment_name: RandLaNetDebug + auto_log_co2: false + disabled: false +task: + task_name: fit +predict: + src_las: /path/to/input.las + output_dir: /path/to/output_dir/ + resume_from_checkpoint: /path/to/lightning_model.ckpt + gpus: 0 + probas_to_save: all + subtile_overlap: 25 + interpolation_k: 10 diff --git a/docs/source/apidoc/lidar_multiclass.utils.rst b/docs/source/apidoc/lidar_multiclass.utils.rst index 8b69aa99..c6528522 100644 --- a/docs/source/apidoc/lidar_multiclass.utils.rst +++ b/docs/source/apidoc/lidar_multiclass.utils.rst @@ -1,8 +1,6 @@ lidar\_multiclass.utils =============================== -Submodules ----------- lidar\_multiclass.utils.utils ------------------------------------ @@ -11,9 +9,3 @@ lidar\_multiclass.utils.utils :members: :show-inheritance: -Module contents ---------------- - -.. automodule:: lidar_multiclass.utils - :members: - :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py index 1f9edcbc..69db4569 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,23 +12,33 @@ # import os import sys +import yaml from unittest import mock -root_path = os.path.abspath("./../../") -sys.path.insert(0, root_path) +from hydra.experimental import compose, initialize +from omegaconf import OmegaConf -import yaml -with open(os.path.join(root_path, "package_metadata.yaml"), "r") as f: - pm = yaml.safe_load(f) +rel_root_path = "./../../" +abs_root_path = os.path.abspath(rel_root_path) +sys.path.insert(0, abs_root_path) + # -- Project information ----------------------------------------------------- +with open(os.path.join(abs_root_path, "package_metadata.yaml"), "r") as f: + pm = yaml.safe_load(f) release = pm["__version__"] project = pm["__name__"] author = pm["__author__"] copyright = "2021, Institut National de l'Information Géographique et Forestière" +# -- YAML main to print the config into --------------------------------------------------- +# We need to concatenate configs into a single file using hydra +with initialize(config_path=os.path.join(rel_root_path, "configs/"), job_name="config"): + cfg = compose(config_name="config") + print(OmegaConf.to_yaml(cfg)) + OmegaConf.save(cfg, "./apidoc/default_config.yml", resolve=False) # -- General configuration --------------------------------------------------- @@ -52,7 +62,6 @@ "myst_parser", # supports markdown syntax for doc pages "sphinx_paramlinks", # allow to reference params, which is done in pytorch_lightning "sphinxnotes.mock", # ignore third-parties directive suche as "testcode" - see "mock_directive" args below - "sphinxcontrib.autoyaml", # Autodocumentation of yaml files. ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/index.rst b/docs/source/index.rst index 9b1fae95..f7998c0e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -30,18 +30,19 @@ Lidar-Deep-Segmentation > Documentation background/data_optimization .. TODO: assure that all dosctrings are in third-personn mode. +.. TODO: find a way to document hydra config ; perhaps by switching to a full dataclasses mode. .. toctree:: :maxdepth: 1 :caption: Package Reference apidoc/scripts + apidoc/configs apidoc/lidar_multiclass.data apidoc/lidar_multiclass.model apidoc/lidar_multiclass.models.modules apidoc/lidar_multiclass.callbacks apidoc/lidar_multiclass.utils - apidoc/configs Indices and Tables From f6aad7995188da349d582149043f1855527fb378 Mon Sep 17 00:00:00 2001 From: Charles GAYDON Date: Tue, 5 Apr 2022 16:42:31 +0200 Subject: [PATCH 8/8] Bump version to V1.7.0 --- package_metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package_metadata.yaml b/package_metadata.yaml index e2167c4b..46c9968c 100644 --- a/package_metadata.yaml +++ b/package_metadata.yaml @@ -1,4 +1,4 @@ -__version__: "1.6.13" +__version__: "1.7.0" __name__: "lidar_multiclass" __url__: "https://github.com/IGNF/lidar-deep-segmentation" __description__: "Multiclass Semantic Segmentation for Lidar Point Cloud"