From 8df872823614cc05ab8e44a8a68fcb8a87018956 Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Mon, 4 Apr 2022 18:43:42 +0200
Subject: [PATCH 1/8] Move interpolation script with models where it belong

---
 configs/callbacks/default.yaml                            | 2 +-
 lidar_multiclass/{datamodules => models}/interpolation.py | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename lidar_multiclass/{datamodules => models}/interpolation.py (100%)

diff --git a/configs/callbacks/default.yaml b/configs/callbacks/default.yaml
index a77d2b11..55fe156e 100755
--- a/configs/callbacks/default.yaml
+++ b/configs/callbacks/default.yaml
@@ -18,7 +18,7 @@ log_iou_by_class:
   _target_: lidar_multiclass.callbacks.logging_callbacks.LogIoUByClass
   classification_dict: ${datamodule.dataset_description.classification_dict}
   interpolator:  # only used at test time
-    _target_: lidar_multiclass.datamodules.interpolation.Interpolator
+    _target_: lidar_multiclass.models.interpolation.Interpolator
     interpolation_k: ${predict.interpolation_k}
     classification_dict: ${datamodule.dataset_description.classification_dict}
     probas_to_save: ${predict.probas_to_save}  # replace by a list of string of class names to select specific probas to save
diff --git a/lidar_multiclass/datamodules/interpolation.py b/lidar_multiclass/models/interpolation.py
similarity index 100%
rename from lidar_multiclass/datamodules/interpolation.py
rename to lidar_multiclass/models/interpolation.py

From 8c86ee1e78f2570dce1aa4b148ff597a398fe84f Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Tue, 5 Apr 2022 10:57:23 +0200
Subject: [PATCH 2/8] Refactor name of data scripts and add some docu

---
 docs/requirements.txt                         |  3 ++-
 docs/source/apidoc/lidar_multiclass.data.rst  | 20 ++++++++++++++
 .../apidoc/lidar_multiclass.datamodules.rst   | 27 -------------------
 docs/source/apidoc/lidar_multiclass.model.rst |  2 +-
 docs/source/index.rst                         |  2 +-
 5 files changed, 24 insertions(+), 30 deletions(-)
 create mode 100644 docs/source/apidoc/lidar_multiclass.data.rst
 delete mode 100644 docs/source/apidoc/lidar_multiclass.datamodules.rst

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 67a6d0c1..220eea6b 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -10,8 +10,9 @@ myst_parser==0.17.*
 sphinx_paramlinks==0.5.*
 recommonmark==0.7.*
 sphinxnotes-mock==1.0.0b0  # still a beta
-
+sphinx-argparse==0.3.*  #  Using 
 docutils==0.17
+
 hydra-core
 rich
 comet_ml
diff --git a/docs/source/apidoc/lidar_multiclass.data.rst b/docs/source/apidoc/lidar_multiclass.data.rst
new file mode 100644
index 00000000..51fc84cf
--- /dev/null
+++ b/docs/source/apidoc/lidar_multiclass.data.rst
@@ -0,0 +1,20 @@
+lidar\_multiclass.data
+=====================================
+
+lidar\_multiclass.datamodules.datamodule
+-----------------------------------------------
+
+.. automodule:: lidar_multiclass.data.datamodule
+   :members:
+
+lidar\_multiclass.datamodules.loading
+-----------------------------------------
+
+.. automodule:: lidar_multiclass.data.loading
+   :members:
+
+lidar\_multiclass.datamodules.transforms
+-----------------------------------------------
+
+.. automodule:: lidar_multiclass.data.transforms
+   :members:
diff --git a/docs/source/apidoc/lidar_multiclass.datamodules.rst b/docs/source/apidoc/lidar_multiclass.datamodules.rst
deleted file mode 100644
index 61fe9f35..00000000
--- a/docs/source/apidoc/lidar_multiclass.datamodules.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-lidar\_multiclass.datamodules
-=====================================
-
-
-lidar\_multiclass.datamodules.data
------------------------------------------
-
-.. automodule:: lidar_multiclass.datamodules.data
-   :members:
-
-lidar\_multiclass.datamodules.datamodule
------------------------------------------------
-
-.. automodule:: lidar_multiclass.datamodules.datamodule
-   :members:
-
-lidar\_multiclass.datamodules.interpolation
---------------------------------------------------
-
-.. automodule:: lidar_multiclass.datamodules.interpolation
-   :members:
-
-lidar\_multiclass.datamodules.transforms
------------------------------------------------
-
-.. automodule:: lidar_multiclass.datamodules.transforms
-   :members:
diff --git a/docs/source/apidoc/lidar_multiclass.model.rst b/docs/source/apidoc/lidar_multiclass.model.rst
index 02fa70b7..014ec8da 100644
--- a/docs/source/apidoc/lidar_multiclass.model.rst
+++ b/docs/source/apidoc/lidar_multiclass.model.rst
@@ -4,5 +4,5 @@ lidar\_multiclass.models
 Model
 -------------------------------------
 
-.. autoclass:: lidar_multiclass.models.model.Model
+.. automodule:: lidar_multiclass.models.model
    :members:
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9277913e..813a4241 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -35,7 +35,7 @@ Lidar-Deep-Segmentation > Documentation
    :caption: Package Reference
 
    apidoc/scripts
-   apidoc/lidar_multiclass.datamodules
+   apidoc/lidar_multiclass.data
    apidoc/lidar_multiclass.model
    apidoc/lidar_multiclass.models.modules
    apidoc/lidar_multiclass.callbacks

From 28f98bcd86e081a4b6db9af80e5696ee80c2edb1 Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Tue, 5 Apr 2022 11:00:03 +0200
Subject: [PATCH 3/8] Refactor name of data scripts and add some docu

---
 bash/setup_environment/requirements.txt       |   7 +-
 configs/datamodule/datamodule.yaml            |   2 +-
 ...0220204_BuildingValidation_and_Ground.yaml |   2 +-
 .../SwissSURFACE3D_Building_class.yaml        |   2 +-
 .../SwissSURFACE3D_all_6_classes.yaml         |   2 +-
 configs/datamodule/subsampler/fps.yaml        |   2 +-
 configs/datamodule/subsampler/grid.yaml       |   2 +-
 configs/datamodule/subsampler/random.yaml     |   2 +-
 .../callbacks/logging_callbacks.py            |   2 +-
 lidar_multiclass/datamodules/__init__.py      |   0
 lidar_multiclass/datamodules/data.py          | 397 ------------------
 lidar_multiclass/datamodules/datamodule.py    | 313 --------------
 lidar_multiclass/datamodules/transforms.py    | 333 ---------------
 lidar_multiclass/models/interpolation.py      |   5 +-
 lidar_multiclass/predict.py                   |   3 +-
 15 files changed, 17 insertions(+), 1057 deletions(-)
 delete mode 100755 lidar_multiclass/datamodules/__init__.py
 delete mode 100755 lidar_multiclass/datamodules/data.py
 delete mode 100755 lidar_multiclass/datamodules/datamodule.py
 delete mode 100755 lidar_multiclass/datamodules/transforms.py

diff --git a/bash/setup_environment/requirements.txt b/bash/setup_environment/requirements.txt
index 985a5527..22ce3477 100755
--- a/bash/setup_environment/requirements.txt
+++ b/bash/setup_environment/requirements.txt
@@ -10,5 +10,8 @@ rstcheck==3.3.*
 sphinx==4.5.*
 sphinx_rtd_theme==1.0.*
 myst_parser==0.17.*
-sphinx_paramlinks
-recommonmark==0.7.*
\ No newline at end of file
+sphinx_paramlinks==0.5.*
+recommonmark==0.7.*
+sphinxnotes-mock==1.0.0b0  # still a beta
+sphinx-argparse==0.3.*  #  Using 
+docutils==0.17
\ No newline at end of file
diff --git a/configs/datamodule/datamodule.yaml b/configs/datamodule/datamodule.yaml
index 9c4c70ce..c36cd255 100755
--- a/configs/datamodule/datamodule.yaml
+++ b/configs/datamodule/datamodule.yaml
@@ -1,4 +1,4 @@
-_target_: lidar_multiclass.datamodules.datamodule.DataModule
+_target_: lidar_multiclass.data.datamodule.DataModule
 
 prepared_data_dir: ${oc.env:PREPARED_DATA_DIR}
 
diff --git a/configs/datamodule/dataset_description/20220204_BuildingValidation_and_Ground.yaml b/configs/datamodule/dataset_description/20220204_BuildingValidation_and_Ground.yaml
index be397f42..3ddf5601 100644
--- a/configs/datamodule/dataset_description/20220204_BuildingValidation_and_Ground.yaml
+++ b/configs/datamodule/dataset_description/20220204_BuildingValidation_and_Ground.yaml
@@ -12,4 +12,4 @@ num_classes: 3
 load_las_func:
   _target_: functools.partial
   _args_:
-    - "${get_method:lidar_multiclass.datamodules.data.FrenchLidarDataLogic.load_las}"
\ No newline at end of file
+    - "${get_method:lidar_multiclass.data.loading.FrenchLidarDataLogic.load_las}"
\ No newline at end of file
diff --git a/configs/datamodule/dataset_description/SwissSURFACE3D_Building_class.yaml b/configs/datamodule/dataset_description/SwissSURFACE3D_Building_class.yaml
index ff538881..fb8a5c83 100644
--- a/configs/datamodule/dataset_description/SwissSURFACE3D_Building_class.yaml
+++ b/configs/datamodule/dataset_description/SwissSURFACE3D_Building_class.yaml
@@ -12,4 +12,4 @@ num_classes: 2
 load_las_func:
   _target_: functools.partial
   _args_:
-    - "${get_method:lidar_multiclass.datamodules.data.SwissTopoLidarDataLogic.load_las}"
\ No newline at end of file
+    - "${get_method:lidar_multiclass.data.loading.SwissTopoLidarDataLogic.load_las}"
\ No newline at end of file
diff --git a/configs/datamodule/dataset_description/SwissSURFACE3D_all_6_classes.yaml b/configs/datamodule/dataset_description/SwissSURFACE3D_all_6_classes.yaml
index d2fe2ea5..d27390ea 100644
--- a/configs/datamodule/dataset_description/SwissSURFACE3D_all_6_classes.yaml
+++ b/configs/datamodule/dataset_description/SwissSURFACE3D_all_6_classes.yaml
@@ -12,4 +12,4 @@ num_classes: 6
 load_las_func:
   _target_: functools.partial
   _args_:
-    - "${get_method:lidar_multiclass.datamodules.data.SwissTopoLidarDataLogic.load_las}"
\ No newline at end of file
+    - "${get_method:lidar_multiclass.data.loading.SwissTopoLidarDataLogic.load_las}"
\ No newline at end of file
diff --git a/configs/datamodule/subsampler/fps.yaml b/configs/datamodule/subsampler/fps.yaml
index b5d2eac8..b811ec5a 100644
--- a/configs/datamodule/subsampler/fps.yaml
+++ b/configs/datamodule/subsampler/fps.yaml
@@ -1,2 +1,2 @@
-_target_: lidar_multiclass.datamodules.transforms.FPSSampler
+_target_: lidar_multiclass.data.transforms.FPSSampler
 subsample_size: 12500
diff --git a/configs/datamodule/subsampler/grid.yaml b/configs/datamodule/subsampler/grid.yaml
index 35ab35cf..8e66f66e 100644
--- a/configs/datamodule/subsampler/grid.yaml
+++ b/configs/datamodule/subsampler/grid.yaml
@@ -1,3 +1,3 @@
-_target_: lidar_multiclass.datamodules.transforms.CustomGridSampler
+_target_: lidar_multiclass.data.transforms.CustomGridSampler
 subsample_size: 12500
 voxel_size: 0.25
diff --git a/configs/datamodule/subsampler/random.yaml b/configs/datamodule/subsampler/random.yaml
index 8ed6dfa8..8dd4d7d4 100644
--- a/configs/datamodule/subsampler/random.yaml
+++ b/configs/datamodule/subsampler/random.yaml
@@ -1,2 +1,2 @@
-_target_: lidar_multiclass.datamodules.transforms.RandomSampler
+_target_: lidar_multiclass.data.transforms.RandomSampler
 subsample_size: 12500
\ No newline at end of file
diff --git a/lidar_multiclass/callbacks/logging_callbacks.py b/lidar_multiclass/callbacks/logging_callbacks.py
index 92770065..3b9cd522 100644
--- a/lidar_multiclass/callbacks/logging_callbacks.py
+++ b/lidar_multiclass/callbacks/logging_callbacks.py
@@ -6,7 +6,7 @@
 import torch
 from torchmetrics import JaccardIndex
 from torchmetrics.functional.classification.jaccard import _jaccard_from_confmat
-from lidar_multiclass.datamodules.interpolation import Interpolator
+from lidar_multiclass.models.interpolation import Interpolator
 from lidar_multiclass.utils import utils
 
 log = utils.get_logger(__name__)
diff --git a/lidar_multiclass/datamodules/__init__.py b/lidar_multiclass/datamodules/__init__.py
deleted file mode 100755
index e69de29b..00000000
diff --git a/lidar_multiclass/datamodules/data.py b/lidar_multiclass/datamodules/data.py
deleted file mode 100755
index 1ee83a21..00000000
--- a/lidar_multiclass/datamodules/data.py
+++ /dev/null
@@ -1,397 +0,0 @@
-"""This module contains
-
-1) Data loading logics specific to each data format.
-    The "load_las" class method can be passed to the datamodule at inference time.
-2) A data preparation script for deep learning training.
-    From a data directory containing point cloud in LAS format, and a scv specifying the dataset 
-    train/val/test split for each file (columns: split, basename, example: "val","123_456.las"),
-    split the dataset, chunk the point cloud tiles into smaller subtiles, and prepare each sample
-    as a pytorch geometric Data object.
-
-    Echo numbers and colors are scaled to be in 0-1 range. Intensity and average color
-    are not scaled b/c they are expected to be standardized later.
-
-To show help, run
-    cd lidar_multiclass/datamodules/
-    python prepare_french_lidar.py -h
-
-"""
-
-from abc import ABC, abstractmethod
-import argparse
-import os, glob
-import os.path as osp
-from shutil import copyfile
-from tqdm import tqdm
-import laspy
-import numpy as np
-import pandas as pd
-import torch
-from torch_geometric.data import Data
-
-
-class LidarDataLogic(ABC):
-    """Abstract class to load, chunk, and save a point cloud dataset according to a train/val/test split.
-    load_las and its needed parameters ares specified in child classes.
-
-    """
-
-    split = ["val", "train", "test"]
-    input_tile_width_meters = 1000
-    subtile_width_meters = 50
-    return_num_normalization_max_value = 7
-
-    def __init__(self, **kwargs):
-        self.input_data_dir = kwargs.get("input_data_dir")
-        self.prepared_data_dir = kwargs.get("prepared_data_dir")
-        self.split_csv = kwargs.get("split_csv")
-        self.range_by_axis = np.arange(
-            self.input_tile_width_meters // self.subtile_width_meters + 1
-        )
-
-    @abstractmethod
-    def load_las(self, las_filepath: str) -> None:
-        """Load a point cloud in LAS format to memory and turn it into torch-geometric Data object.
-
-        Args:
-            las_filepath (str): path to the LAS file.
-
-        Returns:
-            Data: the point cloud formatted for later deep learning training.
-        """
-        raise NotImplementedError
-
-    def prepare(self):
-        """Prepare a dataset for model training and model evaluation.
-
-        Iterates through LAS files listed in a csv metadata file. A `split` column
-        specifies the train/val/test split of the dataset to be created.
-        Depending on the set, this method will:
-
-        train/val:
-            Load LAS into memory as a Data object with selected features,
-            then iteratively extract 50m*50m subtiles by filtering along x
-            then y axis. Serialize the resulting Data object using torch.save.
-
-        test:
-            Simply copy the LAS to the new test folder.
-
-        """
-        split_df = pd.read_csv(self.split_csv)
-        for phase in tqdm(self.split, desc="Phases"):
-            basenames = split_df[split_df.split == phase].basename.tolist()
-            print(f"Subset: {phase}")
-            print("  -  ".join(basenames))
-            for file_basename in tqdm(basenames, desc="Files"):
-                filepath = self._find_file_in_dir(self.input_data_dir, file_basename)
-                output_subdir_path = osp.join(self.prepared_data_dir, phase)
-                if phase == "test":
-                    os.makedirs(output_subdir_path, exist_ok=True)
-                    target_file = osp.join(output_subdir_path, file_basename)
-                    copyfile(filepath, target_file)
-                elif phase in ["train", "val"]:
-                    output_subdir_path = osp.join(
-                        output_subdir_path, osp.basename(filepath)
-                    )
-                    os.makedirs(output_subdir_path, exist_ok=True)
-                    self.split_and_save(filepath, output_subdir_path)
-                else:
-                    raise KeyError("Phase should be one of train/val/test.")
-
-    def split_and_save(self, filepath: str, output_subdir_path: str) -> None:
-        """Parse a LAS, extract and save each subtile as a Data object.
-
-        Args:
-            filepath (str): input LAS file
-            output_subdir_path (str): output directory to save splitted `.data` objects.
-        """
-        data = self.load_las(filepath)
-        idx = 0
-        for _ in tqdm(self.range_by_axis):
-            if len(data.pos) == 0:
-                break
-            data_x_band = self._extract_by_x(data)
-            for _ in self.range_by_axis:
-                if len(data_x_band.pos) == 0:
-                    break
-                subtile_data = self._extract_by_y(data_x_band)
-                self._save(subtile_data, output_subdir_path, idx)
-                idx += 1
-
-    def _find_file_in_dir(self, input_data_dir: str, basename: str) -> str:
-        """Query files with .las extension in subfolder of input_data_dir.
-
-        Args:
-            input_data_dir (str): data directory
-
-        Returns:
-            [str]: first file path matching the query.
-
-        """
-        query = f"{input_data_dir}*{basename}"
-        files = glob.glob(query)
-        return files[0]
-
-    def _extract_by_axis(self, data: Data, axis=0) -> Data:
-        """Filter a data object on a chosen axis, using a relative position .
-        Modifies the original data object so that extracted future filters are faster.
-
-        Args:
-            data (Data): a pyg Data object with pos, x, and y attributes.
-            relative_pos (int): where the data to extract start on chosen axis (typically in range 0-1000)
-            axis (int, optional): 0 for x and 1 for y axis. Defaults to 0.
-
-        Returns:
-            Data: the data that is at most subtile_width_meters above relative_pos on the chosen axis.
-        """
-        sub_tile_data = data.clone()
-        pos_axis = sub_tile_data.pos[:, axis]
-        absolute_low = pos_axis.min(0)
-        absolute_high = absolute_low + self.subtile_width_meters
-        mask = (absolute_low <= pos_axis) & (pos_axis <= absolute_high)
-
-        # select
-        sub_tile_data.pos = sub_tile_data.pos[mask]
-        sub_tile_data.x = sub_tile_data.x[mask]
-        sub_tile_data.y = sub_tile_data.y[mask]
-
-        data.pos = data.pos[~mask]
-        data.x = data.x[~mask]
-        data.y = data.y[~mask]
-        return sub_tile_data
-
-    def _extract_by_x(self, data: Data) -> Data:
-        """extract_by_axis applied on first axis x"""
-        return self._extract_by_axis(data, axis=0)
-
-    def _extract_by_y(self, data: Data) -> Data:
-        """extract_by_axis applied on second axis y"""
-        return self._extract_by_axis(data, axis=1)
-
-    def _save(self, subtile_data: Data, output_subdir_path: str, idx: int) -> None:
-        """Save the subtile data object with torch.
-
-        Args:
-            subtile_data (Data): the object to save.
-            output_subdir_path (str): the subfolder to save it.
-            idx (int): an arbitrary but unique subtile identifier.
-        """
-        subtile_save_path = osp.join(output_subdir_path, f"{str(idx).zfill(4)}.data")
-        torch.save(subtile_data, subtile_save_path)
-
-
-class FrenchLidarDataLogic(LidarDataLogic):
-
-    x_features_names = [
-        "intensity",
-        "return_num",
-        "num_returns",
-        "red",
-        "green",
-        "blue",
-        "nir",
-        "rgb_avg",
-        "ndvi",
-    ]
-    colors_normalization_max_value = 255 * 256
-
-    @classmethod
-    def load_las(self, las_filepath: str):
-        """Load a point cloud in LAS format to memory and turn it into torch-geometric Data object.
-        Build a composite (average) color channel on the fly.
-        Calculate NDVI on the fly.
-
-        Args:
-            las_filepath (str): path to the LAS file.
-
-        Returns:
-            Data: the point cloud formatted for later deep learning training.
-
-        """
-        las = laspy.read(las_filepath)
-        pos = np.asarray([las.x, las.y, las.z], dtype=np.float32).transpose()
-
-        x = np.asarray(
-            [
-                las[x_name]
-                for x_name in [
-                    "intensity",
-                    "return_num",
-                    "num_returns",
-                    "red",
-                    "green",
-                    "blue",
-                    "nir",
-                ]
-            ],
-            dtype=np.float32,
-        ).transpose()
-
-        return_num_idx = self.x_features_names.index("return_num")
-        occluded_points = x[:, return_num_idx] > 1
-
-        x[:, return_num_idx] = (x[:, return_num_idx]) / (
-            self.return_num_normalization_max_value
-        )
-        num_return_idx = self.x_features_names.index("num_returns")
-        x[:, num_return_idx] = (x[:, num_return_idx]) / (
-            self.return_num_normalization_max_value
-        )
-
-        for idx, c in enumerate(self.x_features_names):
-            if c in ["red", "green", "blue", "nir"]:
-                assert x[:, idx].max() <= self.colors_normalization_max_value
-                x[:, idx] = x[:, idx] / self.colors_normalization_max_value
-                x[occluded_points, idx] = 0
-
-        red = x[:, self.x_features_names.index("red")]
-        green = x[:, self.x_features_names.index("green")]
-        blue = x[:, self.x_features_names.index("blue")]
-
-        rgb_avg = np.asarray([red, green, blue], dtype=np.float32).mean(axis=0)
-
-        nir = x[:, self.x_features_names.index("nir")]
-        ndvi = (nir - red) / (nir + red + 10**-6)
-        x = np.concatenate([x, rgb_avg[:, None], ndvi[:, None]], axis=1)
-
-        try:
-            # for LAS format V1.2
-            y = las.classification.array.astype(int)
-        except:
-            # for  LAS format V1.4
-            y = las.classification.astype(int)
-
-        return Data(
-            pos=pos,
-            x=x,
-            y=y,
-            las_filepath=las_filepath,
-            x_features_names=self.x_features_names,
-        )
-
-
-class SwissTopoLidarDataLogic(LidarDataLogic):
-    x_features_names = [
-        "intensity",
-        "return_num",
-        "num_returns",
-        "red",
-        "green",
-        "blue",
-        "rgb_avg",
-    ]
-    colors_normalization_max_value = 256
-
-    @classmethod
-    def load_las(self, las_filepath: str) -> Data:
-        """Load a point cloud in LAS format to memory and turn it into torch-geometric Data object.
-        Build a composite (average) color channel on the fly.
-
-        Args:
-            las_filepath (str): path to the LAS file.
-
-        Returns:
-            Data: the point cloud formatted for later deep learning training.
-
-        """
-        las = laspy.read(las_filepath)
-        pos = np.asarray([las.x, las.y, las.z], dtype=np.float32).transpose()
-
-        x = np.asarray(
-            [
-                las[x_name]
-                for x_name in [
-                    "intensity",
-                    "return_num",
-                    "num_returns",
-                    "red",
-                    "green",
-                    "blue",
-                ]
-            ],
-            dtype=np.float32,
-        ).transpose()
-
-        return_num_idx = self.x_features_names.index("return_num")
-        occluded_points = x[:, return_num_idx] > 1
-
-        x[:, return_num_idx] = (x[:, return_num_idx]) / (
-            self.return_num_normalization_max_value
-        )
-        num_return_idx = self.x_features_names.index("num_returns")
-        x[:, num_return_idx] = (x[:, num_return_idx]) / (
-            self.return_num_normalization_max_value
-        )
-
-        for idx, c in enumerate(self.x_features_names):
-            if c in ["red", "green", "blue"]:
-                assert x[:, idx].max() <= self.colors_normalization_max_value
-                x[:, idx] = x[:, idx] / self.colors_normalization_max_value
-                x[occluded_points, idx] = 0
-
-        rgb_avg = (
-            np.asarray(
-                [las[x_name] for x_name in ["red", "green", "blue"]], dtype=np.float32
-            )
-            .transpose()
-            .mean(axis=1, keepdims=True)
-        )
-
-        x = np.concatenate([x, rgb_avg], axis=1)
-
-        try:
-            # for LAS format V1.2
-            y = las.classification.array.astype(int)
-        except:
-            # for  LAS format V1.4
-            y = las.classification.astype(int)
-
-        return Data(
-            pos=pos,
-            x=x,
-            y=y,
-            las_filepath=las_filepath,
-            x_features_names=self.x_features_names,
-        )
-
-
-def main():
-    """Main logic to prepare a new set of Lidar tiles for model training."""
-
-    parser = argparse.ArgumentParser(
-        description="Prepare a Lidar dataset for deep learning."
-    )
-    parser.add_argument(
-        "--split_csv",
-        type=str,
-        default="./split.csv",
-        help="Path to csv with a basename (e.g. '123_456.las') and split (train/val/test) columns specifying the dataset split.",
-    )
-    parser.add_argument(
-        "--input_data_dir",
-        type=str,
-        default="./data/raw/",
-        help="Path to folder with las files stored in train/val/test subfolders.",
-    )
-    parser.add_argument(
-        "--prepared_data_dir",
-        type=str,
-        default="./prepared/",
-        help="Path to folder to save Data object train/val/test subfolders.",
-    )
-    parser.add_argument(
-        "--origin",
-        type=str,
-        default="FR",
-    )
-    args = parser.parse_args()
-    if args.origin == "FR":
-        data_prepper = FrenchLidarDataLogic(**args.__dict__)
-        data_prepper.prepare()
-    if args.origin == "CH":
-        data_prepper = SwissTopoLidarDataLogic(**args.__dict__)
-        data_prepper.prepare()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/lidar_multiclass/datamodules/datamodule.py b/lidar_multiclass/datamodules/datamodule.py
deleted file mode 100755
index 3770989f..00000000
--- a/lidar_multiclass/datamodules/datamodule.py
+++ /dev/null
@@ -1,313 +0,0 @@
-import os.path as osp
-import glob
-import time
-import numpy as np
-from typing import Optional, List, AnyStr
-from numbers import Number
-from pytorch_lightning import LightningDataModule
-from torch.utils.data import DataLoader, Dataset
-from torch.utils.data.dataset import IterableDataset
-from torch_geometric.transforms import RandomFlip
-from torch_geometric.data.data import Data
-from torch_geometric.transforms.center import Center
-from lidar_multiclass.utils import utils
-from lidar_multiclass.datamodules.transforms import *
-
-from lidar_multiclass.utils import utils
-
-log = utils.get_logger(__name__)
-
-
-class DataModule(LightningDataModule):
-    """
-    Datamdule to feed train and validation data to the model.
-    We use a custome sampler during training to consecutively load several
-    subtiles from a single tile, to reduce the I/O footprint.
-    """
-
-    def __init__(self, **kwargs):
-        super().__init__()
-        # TODO: try to use save_hyperparameters to lightne this code.
-        self.prepared_data_dir = kwargs.get("prepared_data_dir")
-
-        self.num_workers = kwargs.get("num_workers", 0)
-
-        self.subtile_width_meters = kwargs.get("subtile_width_meters", 50)
-        self.subtile_overlap = kwargs.get("subtile_overlap", 0)
-        self.batch_size = kwargs.get("batch_size", 32)
-        self.augment = kwargs.get("augment", True)
-        self.subsampler = kwargs.get("subsampler")
-
-        self.dataset_description = kwargs.get("dataset_description")
-        self.classification_dict = self.dataset_description.get("classification_dict")
-        self.classification_preprocessing_dict = self.dataset_description.get(
-            "classification_preprocessing_dict"
-        )
-
-        self.train_data: Optional[Dataset] = None
-        self.val_data: Optional[Dataset] = None
-        self.test_data: Optional[Dataset] = None
-        self.predict_data: Optional[Dataset] = None
-
-        self.load_las = self.dataset_description.get("load_las_func")
-        self._set_all_transforms()
-
-    def setup(self, stage: Optional[str] = None):
-        """
-        Load data. Set variables: self.data_train, self.data_val, self.data_test.
-        test_data = val data, because we only use all validation data after training.
-        Test data can be used but only after final model is chosen.
-        """
-        if stage == "fit" or stage is None:
-            self._set_train_data()
-            self._set_val_data()
-
-        if stage == "test" or stage is None:
-            self._set_test_data()
-
-    def _set_train_data(self):
-        """Get the train dataset"""
-        files = glob.glob(
-            osp.join(self.prepared_data_dir, "train", "**", "*.data"), recursive=True
-        )
-        self.train_data = LidarMapDataset(
-            files,
-            loading_function=torch.load,
-            transform=self._get_train_transforms(),
-            target_transform=TargetTransform(
-                self.classification_preprocessing_dict,
-                self.classification_dict,
-            ),
-        )
-
-    def _set_val_data(self):
-        """Get the validation dataset"""
-        files = glob.glob(
-            osp.join(self.prepared_data_dir, "val", "**", "*.data"), recursive=True
-        )
-        log.info(f"Validation on {len(files)} subtiles.")
-        self.val_data = LidarMapDataset(
-            files,
-            loading_function=torch.load,
-            transform=self._get_val_transforms(),
-            target_transform=TargetTransform(
-                self.classification_preprocessing_dict,
-                self.classification_dict,
-            ),
-        )
-
-    def _set_test_data(self):
-        """Get the test dataset. User need to explicitely require the use of test set, which is kept out of experiment until the end."""
-        files = glob.glob(
-            osp.join(self.prepared_data_dir, "test", "**", "*.las"), recursive=True
-        )
-        self.test_data = LidarIterableDataset(
-            files,
-            loading_function=self.load_las,
-            transform=self._get_test_transforms(),
-            target_transform=TargetTransform(
-                self.classification_preprocessing_dict, self.classification_dict
-            ),
-            subtile_width_meters=self.subtile_width_meters,
-            subtile_overlap=self.subtile_overlap,
-        )
-
-    def _set_predict_data(self, files: List[str]):
-        """This is used in predict.py, with a single file in a list."""
-        self.predict_data = LidarIterableDataset(
-            files,
-            loading_function=self.load_las,
-            transform=self._get_predict_transforms(),
-            target_transform=None,
-            subtile_width_meters=self.subtile_width_meters,
-            subtile_overlap=self.subtile_overlap,
-        )
-
-    def train_dataloader(self):
-        """Get train dataloader."""
-        return DataLoader(
-            dataset=self.train_data,
-            batch_size=self.batch_size,
-            shuffle=True,
-            num_workers=self.num_workers,
-            collate_fn=collate_fn,
-            prefetch_factor=1,
-        )
-
-    def val_dataloader(self):
-        """Get val dataloader."""
-        return DataLoader(
-            dataset=self.val_data,
-            batch_size=self.batch_size,
-            shuffle=False,
-            num_workers=self.num_workers,
-            collate_fn=collate_fn,
-            prefetch_factor=1,
-        )
-
-    def test_dataloader(self):
-        """Get test dataloader."""
-        return DataLoader(
-            dataset=self.test_data,
-            batch_size=self.batch_size,
-            shuffle=False,
-            num_workers=1,
-            collate_fn=collate_fn,
-            prefetch_factor=1,
-        )
-
-    def predict_dataloader(self):
-        return DataLoader(
-            dataset=self.predict_data,
-            batch_size=self.batch_size,
-            shuffle=False,
-            num_workers=1,  # b/c terable dataloader
-            collate_fn=collate_fn,
-            prefetch_factor=1,
-        )
-
-    def _set_all_transforms(self):
-        """
-        Set transforms that are shared between train/val-test.
-        Called at initialization.
-        """
-
-        self.preparation = [
-            EmptySubtileFilter(),
-            ToTensor(),
-            MakeCopyOfPosAndY(),
-            self.subsampler,
-            MakeCopyOfSampledPos(),
-            Center(),
-        ]
-        self.augmentation = []
-        if self.augment:
-            self.augmentation = [RandomFlip(0, p=0.5), RandomFlip(1, p=0.5)]
-        self.normalization = [NormalizePos(), StandardizeFeatures()]
-
-    def _get_train_transforms(self) -> CustomCompose:
-        """Create a transform composition for train phase."""
-        return CustomCompose(self.preparation + self.augmentation + self.normalization)
-
-    def _get_val_transforms(self) -> CustomCompose:
-        """Create a transform composition for val phase."""
-        return CustomCompose(self.preparation + self.normalization)
-
-    def _get_test_transforms(self) -> CustomCompose:
-        """Create a transform composition for test phase."""
-        return self._get_val_transforms()
-
-    def _get_predict_transforms(self) -> CustomCompose:
-        """Create a transform composition for predict phase."""
-        return self._get_val_transforms()
-
-
-class LidarMapDataset(Dataset):
-    def __init__(
-        self,
-        files: List[str],
-        loading_function=None,
-        transform=None,
-        target_transform=None,
-    ):
-        self.files = files
-        self.num_files = len(self.files)
-
-        self.loading_function = loading_function
-        self.transform = transform
-        self.target_transform = target_transform
-
-    def __getitem__(self, idx):
-        """Load a subtile and apply the transforms specified in datamodule."""
-        filepath = self.files[idx]
-
-        data = self.loading_function(filepath)
-        if self.transform:
-            data = self.transform(data)
-        if data is None:
-            return None
-        if self.target_transform:
-            data = self.target_transform(data)
-
-        return data
-
-    def __len__(self):
-        return self.num_files
-
-
-class LidarIterableDataset(IterableDataset):
-    def __init__(
-        self,
-        files,
-        loading_function=None,
-        transform=None,
-        target_transform=None,
-        subtile_width_meters: Number = 50,
-        subtile_overlap: Number = 0,
-    ):
-        self.files = files
-        self.loading_function = loading_function
-        self.transform = transform
-        self.target_transform = target_transform
-        self.subtile_width_meters = subtile_width_meters
-        self.subtile_overlap = subtile_overlap
-
-    def yield_transformed_subtile_data(self):
-        """Yield subtiles from all tiles in an exhaustive fashion."""
-
-        for idx, filepath in enumerate(self.files):
-            log.info(f"Parsing file {idx+1}/{len(self.files)} [{filepath}]")
-            tile_data = self.loading_function(filepath)
-            centers = self.get_all_subtiles_xy_min_corner(tile_data)
-            # TODO: change to process time function
-            ts = time.time()
-            for xy_min_corner in centers:
-                data = self.extract_subtile_from_tile_data(tile_data, xy_min_corner)
-                if self.transform:
-                    data = self.transform(data)
-                if data is not None:
-                    if self.target_transform:
-                        data = self.target_transform(data)
-                    yield data
-
-    def __iter__(self):
-        return self.yield_transformed_subtile_data()
-
-    def get_all_subtiles_xy_min_corner(self, data: Data):
-        """Get centers of square subtiles of specified width, assuming rectangular form of input cloud."""
-
-        low = data.pos[:, :2].min(0)
-        high = data.pos[:, :2].max(0)
-        xy_min_corners = [
-            np.array([x, y])
-            for x in np.arange(
-                start=low[0],
-                stop=high[0] + 1,
-                step=self.subtile_width_meters - self.subtile_overlap,
-            )
-            for y in np.arange(
-                start=low[1],
-                stop=high[1] + 1,
-                step=self.subtile_width_meters - self.subtile_overlap,
-            )
-        ]
-        # random.shuffle(centers)
-        return xy_min_corners
-
-    def extract_subtile_from_tile_data(self, data: Data, low_xy):
-        """Extract the subset from xy_min_corner to xy_min_corner + self.subtile_width_meters
-
-        Args:
-            tile_data (Data): The full tile data.
-            xy_min_corner (np.array): Coordonates of xy min corner of subtile to extract.
-        """
-        high_xy = low_xy + self.subtile_width_meters
-        mask_x = (low_xy[0] <= data.pos[:, 0]) & (data.pos[:, 0] <= high_xy[0])
-        mask_y = (low_xy[1] <= data.pos[:, 1]) & (data.pos[:, 1] <= high_xy[1])
-        mask = mask_x & mask_y
-
-        sub = data.clone()
-        sub.pos = sub.pos[mask]
-        sub.x = sub.x[mask]
-        sub.y = sub.y[mask]
-        return sub
diff --git a/lidar_multiclass/datamodules/transforms.py b/lidar_multiclass/datamodules/transforms.py
deleted file mode 100755
index 23fb1790..00000000
--- a/lidar_multiclass/datamodules/transforms.py
+++ /dev/null
@@ -1,333 +0,0 @@
-import math
-from enum import Enum
-from numbers import Number
-from typing import Callable, Dict, List, Tuple
-
-import numpy as np
-import torch
-import torch_geometric
-from torch_geometric.data import Batch, Data
-from torch_geometric.transforms import BaseTransform
-from torch_geometric.nn.pool import fps
-from torch_scatter import scatter_add, scatter_mean
-import torch.nn.functional as F
-from lidar_multiclass.utils import utils
-
-log = utils.get_logger(__name__)
-
-
-class ChannelNames(Enum):
-    """Names of custom additional LAS channel."""
-
-    PredictedClassification = "PredictedClassification"
-    ProbasEntropy = "entropy"
-
-
-class CustomCompose(BaseTransform):
-    """
-    Composes several transforms together.
-    Edited to bypass downstream transforms if None is returned by a transform.
-
-    Args:
-        transforms (List[Callable]): List of transforms to compose.
-
-    """
-
-    def __init__(self, transforms: List[Callable]):
-        self.transforms = transforms
-
-    def __call__(self, data):
-        for transform in self.transforms:
-            if isinstance(data, (list, tuple)):
-                data = [transform(d) for d in data]
-                data = filter(lambda x: x is not None, data)
-            else:
-                data = transform(data)
-                if data is None:
-                    return None
-        return data
-
-
-class EmptySubtileFilter(BaseTransform):
-    """Filter out almost empty subtiles"""
-
-    def __call__(self, data: Data, min_num_points_subtile: int = 50):
-        if len(data["x"]) < min_num_points_subtile:
-            return None
-        return data
-
-
-class ToTensor(BaseTransform):
-    """Turn np.arrays specified by their keys into Tensor."""
-
-    def __init__(self, keys=["pos", "x", "y"]):
-        self.keys = keys
-
-    def __call__(self, data: Data):
-        for key in data.keys:
-            if key in self.keys:
-                data[key] = torch.from_numpy(data[key])
-        return data
-
-
-class MakeCopyOfPosAndY(BaseTransform):
-    """Make a copy of the full cloud's positions and labels, for inference interpolation."""
-
-    def __call__(self, data: Data):
-        data["pos_copy"] = data["pos"].clone()
-        data["y_copy"] = data["y"].clone()
-        return data
-
-
-class Subsampler(BaseTransform):
-    """Base class for custom cloud subsampler to inherit from.
-
-    Subsampling to a unique size is needed for batching clouds with different initial size.
-    Subclasses are modified from https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/transforms/,
-    to preserve specific attributes of the data for inference interpolation
-
-    """
-
-    sampling_keys: Tuple[str] = ("x", "pos", "y")
-
-    def _call_(self, data: Data):
-        raise NotImplementedError("Use a non-abstract subsampler class instead.")
-
-
-class RandomSampler(Subsampler):
-    """Samples a fixed number of points from a point cloud, randomly."""
-
-    def __init__(self, subsample_size: int = 12500):
-        self.subsample_size = subsample_size
-
-    def __call__(self, data: Data):
-        num_nodes = data.num_nodes
-        choice = torch.cat(
-            [
-                torch.randperm(num_nodes)
-                for _ in range(math.ceil(self.subsample_size / num_nodes))
-            ],
-            dim=0,
-        )[: self.subsample_size]
-
-        for key in self.sampling_keys:
-            data[key] = data[key][choice]
-
-        return data
-
-
-class FPSSampler(Subsampler):
-    """
-    Samples a fixed number of points from a point cloud, using Fartest Point Sampling.
-
-    In our experiments, FPS is slower by an order of magnitude than Random/Grid sampling, and yields worst results.
-
-    See https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html?highlight=fps#torch_geometric.nn.pool.fps
-
-    """
-
-    def __init__(self, subsample_size: int = 12500):
-        self.subsample_size = subsample_size
-        self.rs = RandomSampler(subsample_size=subsample_size)
-
-    def __call__(self, data: Data):
-        num_nodes = data.num_nodes
-        # Random sampling if we are short in points
-        if num_nodes < self.subsample_size:
-            return self.rs(data)
-
-        # Else, use Farthest Point Sampling
-        ratio = (self.subsample_size / num_nodes) + 0.01
-        choice = fps(data.pos, ratio=ratio, random_start=False)
-        choice = choice[: self.subsample_size]
-        for key in self.sampling_keys:
-            data[key] = data[key][choice]
-        return data
-
-
-class CustomGridSampler(Subsampler):
-    """Samples a point cloud, using a voxel grid.
-
-    A final random sampling is then needed to have a fixed number of points.
-    See https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/transforms/grid_sampling.html#GridSampling
-
-    """
-
-    def __init__(self, subsample_size: int = 12500, voxel_size: Number = 0.25):
-        self.subsample_size = subsample_size
-        self.rs = RandomSampler(subsample_size=subsample_size)
-        self.voxel_size = voxel_size
-
-    def __call__(self, data: Data) -> Data:
-        num_nodes = data.num_nodes
-
-        # Random sampling if we are short in points
-        if num_nodes < self.subsample_size:
-            return self.rs(data)
-
-        batch = data.get("batch", None)
-
-        c = torch_geometric.nn.voxel_grid(data.pos, self.voxel_size, batch, None, None)
-        c, perm = torch_geometric.nn.pool.consecutive.consecutive_cluster(c)
-
-        for key in self.sampling_keys:
-            item = data[key]
-            if torch.is_tensor(item) and item.size(0) == num_nodes:
-                if key == "y":
-                    item = F.one_hot(item)
-                    item = scatter_add(item, c, dim=0)
-                    data[key] = item.argmax(dim=-1)
-                elif key == "batch":
-                    data[key] = item[perm]
-                else:
-                    data[key] = scatter_mean(item, c, dim=0)
-        # Up or downsample to get to subsample_size
-        data = self.rs(data)
-        return data
-
-
-class MakeCopyOfSampledPos(BaseTransform):
-    """Make a copy of the unormalized positions of subsampled points."""
-
-    def __call__(self, data: Data):
-        data["pos_copy_subsampled"] = data["pos"].clone()
-        return data
-
-
-class StandardizeFeatures(BaseTransform):
-    """Scale features in 0-1 range.
-    Additionnaly : use reserved -0.75 value for occluded points colors(normal range is -0.5 to 0.5).
-
-    """
-
-    def __call__(self, data: Data):
-        idx = data.x_features_names.index("intensity")
-        data.x[:, idx] = self._log(data.x[:, idx], shift=1)
-        data.x[:, idx] = self._standardize_channel(data.x[:, idx])
-        idx = data.x_features_names.index("rgb_avg")
-        data.x[:, idx] = self._standardize_channel(data.x[:, idx])
-        return data
-
-    def _log(self, channel_data, shift: float = 0.0):
-        return torch.log(channel_data + shift)
-
-    def _standardize_channel(self, channel_data: torch.Tensor, clamp_sigma: int = 3):
-        """Sample-wise standardization y* = (y-y_mean)/y_std"""
-        mean = channel_data.mean()
-        std = channel_data.std() + 10**-6
-        standard = (channel_data - mean) / std
-        clamp = clamp_sigma * std
-        clamped = torch.clamp(input=standard, min=-clamp, max=clamp)
-        return clamped
-
-
-class NormalizePos(BaseTransform):
-    """
-    Normalizes positions:
-        - xy positions to be in the interval (-1, 1)
-        - z position to start at 0.
-        - preserve euclidian distances
-
-    XYZ are expected to be centered already.
-
-    """
-
-    def __call__(self, data):
-        xy_positive_amplitude = data.pos[:, :2].abs().max()
-        xy_scale = (1 / xy_positive_amplitude) * 0.999999
-        data.pos[:, :2] = data.pos[:, :2] * xy_scale
-        data.pos[:, 2] = (data.pos[:, 2] - data.pos[:, 2].min()) * xy_scale
-
-        return data
-
-    def __repr__(self):
-        return "{}()".format(self.__class__.__name__)
-
-
-class TargetTransform(BaseTransform):
-    """
-    Make target vector based on input classification dictionnary.
-
-    Example:
-    Source : y = [6,6,17,9,1]
-    Pre-processed:
-    - classification_preprocessing_dict = {17:1, 9:1}
-    - y' = [6,6,1,1,1]
-    Mapped to consecutive integers:
-    - classification_dict = {1:"unclassified", 6:"building"}
-    - y'' = [1,1,0,0,0]
-
-    """
-
-    def __init__(
-        self,
-        classification_preprocessing_dict: Dict[int, int],
-        classification_dict: Dict[int, str],
-    ):
-
-        self._set_preprocessing_mapper(classification_preprocessing_dict)
-        self._set_mapper(classification_dict)
-
-    def __call__(self, data: Data):
-        data.y = self.transform(data.y)
-        data.y_copy = self.transform(data.y_copy)
-        return data
-
-    def transform(self, y):
-        y = self.preprocessing_mapper(y)
-        y = self.mapper(y)
-        return torch.LongTensor(y)
-
-    def _set_preprocessing_mapper(self, classification_preprocessing_dict):
-        """Set mapper from source classification code to another code."""
-        d = {key: value for key, value in classification_preprocessing_dict.items()}
-        self.preprocessing_mapper = np.vectorize(
-            lambda class_code: d.get(class_code, class_code)
-        )
-
-    def _set_mapper(self, classification_dict):
-        """Set mapper from source classification code to consecutive integers."""
-        d = {
-            class_code: class_index
-            for class_index, class_code in enumerate(classification_dict.keys())
-        }
-        self.mapper = np.vectorize(lambda class_code: d.get(class_code))
-
-
-def collate_fn(data_list: List[Data]) -> Batch:
-    """
-    Batch Data objects from a list, to be used in DataLoader. Modified from:
-    https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/loader/dense_data_loader.html?highlight=collate_fn
-
-    """
-    batch = Batch()
-    data_list = list(filter(lambda x: x is not None, data_list))
-
-    # 1: add everything as list of non-Tensor object to facilitate adding new attributes.
-    for key in data_list[0].keys:
-        batch[key] = [data[key] for data in data_list]
-
-    # 2: define relevant Tensor in long PyG format.
-    keys_to_long_format = ["pos", "x", "y", "pos_copy", "pos_copy_subsampled", "y_copy"]
-    for key in keys_to_long_format:
-        batch[key] = torch.cat([data[key] for data in data_list])
-
-    # 3. Create a batch index
-    batch.batch_x = torch.from_numpy(
-        np.concatenate(
-            [
-                np.full(shape=len(data["y"]), fill_value=i)
-                for i, data in enumerate(data_list)
-            ]
-        )
-    )
-    batch.batch_y = torch.from_numpy(
-        np.concatenate(
-            [
-                np.full(shape=len(data["pos_copy"]), fill_value=i)
-                for i, data in enumerate(data_list)
-            ]
-        )
-    )
-    batch.batch_size = len(data_list)
-    return batch
diff --git a/lidar_multiclass/models/interpolation.py b/lidar_multiclass/models/interpolation.py
index 3e3efa3b..72107cd4 100644
--- a/lidar_multiclass/models/interpolation.py
+++ b/lidar_multiclass/models/interpolation.py
@@ -1,5 +1,6 @@
+"""How we turn from prediction made on a subsampled subset of a Las to a complete point cloud."""
+
 import os
-from tokenize import Number
 from typing import Dict, List, Optional, Literal, Union
 
 import pdal
@@ -11,7 +12,7 @@
 from lidar_multiclass.utils import utils
 from torch.distributions import Categorical
 
-from lidar_multiclass.datamodules.transforms import ChannelNames
+from lidar_multiclass.data.transforms import ChannelNames
 
 log = utils.get_logger(__name__)
 
diff --git a/lidar_multiclass/predict.py b/lidar_multiclass/predict.py
index 63d21c01..92d6d16b 100644
--- a/lidar_multiclass/predict.py
+++ b/lidar_multiclass/predict.py
@@ -2,12 +2,11 @@
 import hydra
 import torch
 from omegaconf import DictConfig, OmegaConf
-from typing import Optional
 from pytorch_lightning import LightningDataModule, LightningModule
 from tqdm import tqdm
 
 from lidar_multiclass.utils import utils
-from lidar_multiclass.datamodules.interpolation import Interpolator
+from lidar_multiclass.models.interpolation import Interpolator
 
 
 log = utils.get_logger(__name__)

From 614400f1ec26b22962dde0293ef457dae06f2ec8 Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Tue, 5 Apr 2022 11:04:21 +0200
Subject: [PATCH 4/8] add todo

---
 docs/source/apidoc/lidar_multiclass.model.rst | 6 ++++++
 docs/source/index.rst                         | 1 +
 2 files changed, 7 insertions(+)

diff --git a/docs/source/apidoc/lidar_multiclass.model.rst b/docs/source/apidoc/lidar_multiclass.model.rst
index 014ec8da..67e1ffed 100644
--- a/docs/source/apidoc/lidar_multiclass.model.rst
+++ b/docs/source/apidoc/lidar_multiclass.model.rst
@@ -5,4 +5,10 @@ Model
 -------------------------------------
 
 .. automodule:: lidar_multiclass.models.model
+   :members:
+
+Interpolation
+-------------------------------------
+
+.. automodule:: lidar_multiclass.models.interpolation
    :members:
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 813a4241..ace6711a 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -29,6 +29,7 @@ Lidar-Deep-Segmentation > Documentation
    background/interpolation
    background/data_optimization
 
+.. TODO: assure that all dosctrings are in third-personn mode.
 
 .. toctree::
    :maxdepth: 1

From 60a0c8ee888c1797e216c3cde3e8e25ebe0fdd13 Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Tue, 5 Apr 2022 11:23:20 +0200
Subject: [PATCH 5/8] Add try clause to comet imports

---
 docs/requirements.txt                         |  5 +++--
 docs/source/apidoc/configs.rst                | 10 ++++++++++
 docs/source/conf.py                           |  1 +
 docs/source/index.rst                         |  1 +
 lidar_multiclass/callbacks/comet_callbacks.py |  9 ++++++++-
 lidar_multiclass/train.py                     |  9 ++++++++-
 6 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 docs/source/apidoc/configs.rst

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 220eea6b..2fd2aab1 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,6 +3,9 @@
 setuptools
 numpy
 numpydoc
+tqdm
+hydra-core
+rich
 
 sphinx==4.5.*
 sphinx_rtd_theme==1.0.*
@@ -13,8 +16,6 @@ sphinxnotes-mock==1.0.0b0  # still a beta
 sphinx-argparse==0.3.*  #  Using 
 docutils==0.17
 
-hydra-core
-rich
 comet_ml
 
 torch==1.10.1
diff --git a/docs/source/apidoc/configs.rst b/docs/source/apidoc/configs.rst
new file mode 100644
index 00000000..d4f35557
--- /dev/null
+++ b/docs/source/apidoc/configs.rst
@@ -0,0 +1,10 @@
+Configs
+===============================
+
+Configs are managed with `hydra`. Here, we show the default configuration at a glance.
+Refer to source documentation files for more info on their definition.
+
+.. _hydra: https://hydra.cc/
+.. File apidoc/configs_concatenation.yml is created at documentation build.
+
+.. autoyaml:: apidoc/configs_concatenation.yml
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 95ae36d3..1f9edcbc 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -52,6 +52,7 @@
     "myst_parser",  # supports markdown syntax for doc pages
     "sphinx_paramlinks",  # allow to reference params, which is done in pytorch_lightning
     "sphinxnotes.mock",  # ignore third-parties directive suche as "testcode" - see "mock_directive" args below
+    "sphinxcontrib.autoyaml",  # Autodocumentation of yaml files.
 ]
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index ace6711a..9b1fae95 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -41,6 +41,7 @@ Lidar-Deep-Segmentation > Documentation
    apidoc/lidar_multiclass.models.modules
    apidoc/lidar_multiclass.callbacks
    apidoc/lidar_multiclass.utils
+   apidoc/configs
 
 
 Indices and Tables
diff --git a/lidar_multiclass/callbacks/comet_callbacks.py b/lidar_multiclass/callbacks/comet_callbacks.py
index 061f5655..bc2d0ef0 100755
--- a/lidar_multiclass/callbacks/comet_callbacks.py
+++ b/lidar_multiclass/callbacks/comet_callbacks.py
@@ -1,5 +1,12 @@
+# It is safer to import comet before all other imports.
+try:
+    import comet_ml
+except:
+    print(
+        "Warning: package comet_ml not found. This may break things if you use a comet callback."
+    )
+
 import os
-import comet_ml
 from pathlib import Path
 
 from pytorch_lightning import Callback, Trainer
diff --git a/lidar_multiclass/train.py b/lidar_multiclass/train.py
index 16b3f92b..63043ddd 100755
--- a/lidar_multiclass/train.py
+++ b/lidar_multiclass/train.py
@@ -1,6 +1,13 @@
+# It is safer to import comet before all other imports.
+try:
+    import comet_ml
+except:
+    print(
+        "Warning: package comet_ml not found. This may break things if you use a comet callback."
+    )
+
 import copy
 import os
-import comet_ml
 from typing import List, Optional
 
 import hydra

From 436c20bdbc7a12bd4593b644a28b34f8ffa831d2 Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Tue, 5 Apr 2022 11:23:39 +0200
Subject: [PATCH 6/8] Remove trash file

---
 =0.17 | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 =0.17

diff --git a/=0.17 b/=0.17
deleted file mode 100644
index d99133c1..00000000
--- a/=0.17
+++ /dev/null
@@ -1 +0,0 @@
-Requirement already satisfied: docutils in /home/CGaydon/anaconda3/envs/sphinx_doc_req/lib/python3.9/site-packages (0.17)

From cfc3815ef95e55ce6406b5ee0f70d111aa16b714 Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Tue, 5 Apr 2022 16:40:41 +0200
Subject: [PATCH 7/8] Include default config to doc

---
 bash/setup_environment/requirements.txt       |   1 -
 docs/requirements.txt                         |   3 +-
 docs/source/apidoc/configs.rst                |  11 +-
 docs/source/apidoc/default_config.yml         | 136 ++++++++++++++++++
 docs/source/apidoc/lidar_multiclass.utils.rst |   8 --
 docs/source/conf.py                           |  21 ++-
 docs/source/index.rst                         |   3 +-
 7 files changed, 161 insertions(+), 22 deletions(-)
 create mode 100644 docs/source/apidoc/default_config.yml

diff --git a/bash/setup_environment/requirements.txt b/bash/setup_environment/requirements.txt
index 22ce3477..c2e651f6 100755
--- a/bash/setup_environment/requirements.txt
+++ b/bash/setup_environment/requirements.txt
@@ -1,7 +1,6 @@
 # --------- hydra --------- #
 hydra-core==1.1.0
 hydra-colorlog==1.1.0
-optuna>=2.5
 
 # --------- RST Linter --------- #
 rstcheck==3.3.*
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 2fd2aab1..f5d72c89 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -4,7 +4,8 @@ setuptools
 numpy
 numpydoc
 tqdm
-hydra-core
+hydra-core==1.1.0
+hydra-colorlog==1.1.*
 rich
 
 sphinx==4.5.*
diff --git a/docs/source/apidoc/configs.rst b/docs/source/apidoc/configs.rst
index d4f35557..ae8e215e 100644
--- a/docs/source/apidoc/configs.rst
+++ b/docs/source/apidoc/configs.rst
@@ -1,10 +1,11 @@
-Configs
+Default configuration
 ===============================
 
-Configs are managed with `hydra`. Here, we show the default configuration at a glance.
-Refer to source documentation files for more info on their definition.
+Configurations are managed with `hydra`. Here, we show the default configuration at a glance.
+
+Refer to source configurations files in folder `configs` for more information.
 
 .. _hydra: https://hydra.cc/
-.. File apidoc/configs_concatenation.yml is created at documentation build.
 
-.. autoyaml:: apidoc/configs_concatenation.yml
\ No newline at end of file
+.. literalinclude:: default_config.yml
+  :language: yaml
diff --git a/docs/source/apidoc/default_config.yml b/docs/source/apidoc/default_config.yml
new file mode 100644
index 00000000..adce4220
--- /dev/null
+++ b/docs/source/apidoc/default_config.yml
@@ -0,0 +1,136 @@
+seed: 12345
+work_dir: ${hydra:runtime.cwd}
+debug: false
+print_config: true
+ignore_warnings: true
+trainer:
+  _target_: pytorch_lightning.Trainer
+  gpus: 0
+  min_epochs: 1
+  max_epochs: 30
+  log_every_n_steps: 1
+  check_val_every_n_epoch: 1
+  weights_summary: null
+  progress_bar_refresh_rate: 1
+  auto_lr_find: false
+  overfit_batches: 1
+  num_sanity_val_steps: 0
+datamodule:
+  dataset_description:
+    _convert_: all
+    classification_preprocessing_dict:
+      59: 6
+      50: 1
+    classification_dict:
+      1: unclassified
+      2: ground
+      6: building
+    d_in: 12
+    num_classes: 3
+    load_las_func:
+      _target_: functools.partial
+      _args_:
+      - ${get_method:lidar_multiclass.data.loading.FrenchLidarDataLogic.load_las}
+  subsampler:
+    _target_: lidar_multiclass.data.transforms.CustomGridSampler
+    subsample_size: 12500
+    voxel_size: 0.25
+  _target_: lidar_multiclass.data.datamodule.DataModule
+  prepared_data_dir: ${oc.env:PREPARED_DATA_DIR}
+  num_workers: 1
+  batch_size: 16
+  subtile_width_meters: 50
+  subtile_overlap: ${predict.subtile_overlap}
+  augment: false
+  subsample_size: 12500
+callbacks:
+  log_code:
+    _target_: lidar_multiclass.callbacks.comet_callbacks.LogCode
+    code_dir: ${work_dir}/lidar_multiclass
+  log_logs_dir:
+    _target_: lidar_multiclass.callbacks.comet_callbacks.LogLogsPath
+  lr_monitor:
+    _target_: pytorch_lightning.callbacks.LearningRateMonitor
+    logging_interval: step
+    log_momentum: true
+  log_iou_by_class:
+    _target_: lidar_multiclass.callbacks.logging_callbacks.LogIoUByClass
+    classification_dict: ${datamodule.dataset_description.classification_dict}
+    interpolator:
+      _target_: lidar_multiclass.models.interpolation.Interpolator
+      interpolation_k: ${predict.interpolation_k}
+      classification_dict: ${datamodule.dataset_description.classification_dict}
+      probas_to_save: ${predict.probas_to_save}
+      output_dir: null
+  model_checkpoint:
+    _target_: pytorch_lightning.callbacks.ModelCheckpoint
+    monitor: val/loss_epoch
+    mode: min
+    save_top_k: 1
+    save_last: true
+    verbose: true
+    dirpath: checkpoints/
+    filename: epoch_{epoch:03d}
+    auto_insert_metric_name: false
+  early_stopping:
+    _target_: pytorch_lightning.callbacks.EarlyStopping
+    monitor: val/loss_epoch
+    mode: min
+    patience: 6
+    min_delta: 0
+model:
+  optimizer:
+    _target_: functools.partial
+    _args_:
+    - ${get_method:torch.optim.Adam}
+    lr: ${model.lr}
+  lr_scheduler:
+    _target_: functools.partial
+    _args_:
+    - ${get_method:torch.optim.lr_scheduler.ReduceLROnPlateau}
+    mode: min
+    factor: 0.5
+    patience: 5
+    cooldown: 0
+  _target_: lidar_multiclass.models.model.Model
+  d_in: ${datamodule.dataset_description.d_in}
+  num_classes: ${datamodule.dataset_description.num_classes}
+  ckpt_path: null
+  neural_net_class_name: RandLANet
+  neural_net_hparams:
+    num_classes: ${model.num_classes}
+    d_in: ${model.d_in}
+    num_neighbors: 16
+    decimation: 4
+    dropout: 0.5
+  iou:
+    _target_: functools.partial
+    _args_:
+    - ${get_method:torchmetrics.JaccardIndex}
+    - ${model.num_classes}
+    absent_score: 1.0
+  criterion:
+    _target_: torch.nn.CrossEntropyLoss
+    label_smoothing: 0.0
+  lr: 0.004566395347136576
+  momentum: 0.9
+  monitor: val/loss_epoch
+logger:
+  comet:
+    _target_: pytorch_lightning.loggers.comet.CometLogger
+    api_key: ${oc.env:COMET_API_TOKEN}
+    workspace: ${oc.env:COMET_WORKSPACE}
+    project_name: ${oc.env:COMET_PROJECT_NAME}
+    experiment_name: RandLaNetDebug
+    auto_log_co2: false
+    disabled: false
+task:
+  task_name: fit
+predict:
+  src_las: /path/to/input.las
+  output_dir: /path/to/output_dir/
+  resume_from_checkpoint: /path/to/lightning_model.ckpt
+  gpus: 0
+  probas_to_save: all
+  subtile_overlap: 25
+  interpolation_k: 10
diff --git a/docs/source/apidoc/lidar_multiclass.utils.rst b/docs/source/apidoc/lidar_multiclass.utils.rst
index 8b69aa99..c6528522 100644
--- a/docs/source/apidoc/lidar_multiclass.utils.rst
+++ b/docs/source/apidoc/lidar_multiclass.utils.rst
@@ -1,8 +1,6 @@
 lidar\_multiclass.utils
 ===============================
 
-Submodules
-----------
 
 lidar\_multiclass.utils.utils
 ------------------------------------
@@ -11,9 +9,3 @@ lidar\_multiclass.utils.utils
    :members:
    :show-inheritance:
 
-Module contents
----------------
-
-.. automodule:: lidar_multiclass.utils
-   :members:
-   :show-inheritance:
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 1f9edcbc..69db4569 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -12,23 +12,33 @@
 #
 import os
 import sys
+import yaml
 from unittest import mock
 
-root_path = os.path.abspath("./../../")
-sys.path.insert(0, root_path)
+from hydra.experimental import compose, initialize
+from omegaconf import OmegaConf
 
-import yaml
 
-with open(os.path.join(root_path, "package_metadata.yaml"), "r") as f:
-    pm = yaml.safe_load(f)
+rel_root_path = "./../../"
+abs_root_path = os.path.abspath(rel_root_path)
+sys.path.insert(0, abs_root_path)
+
 
 # -- Project information -----------------------------------------------------
+with open(os.path.join(abs_root_path, "package_metadata.yaml"), "r") as f:
+    pm = yaml.safe_load(f)
 
 release = pm["__version__"]
 project = pm["__name__"]
 author = pm["__author__"]
 copyright = "2021, Institut National de l'Information Géographique et Forestière"
 
+# -- YAML main to print the config into  ---------------------------------------------------
+# We need to concatenate configs into a single file using hydra
+with initialize(config_path=os.path.join(rel_root_path, "configs/"), job_name="config"):
+    cfg = compose(config_name="config")
+    print(OmegaConf.to_yaml(cfg))
+    OmegaConf.save(cfg, "./apidoc/default_config.yml", resolve=False)
 
 # -- General configuration ---------------------------------------------------
 
@@ -52,7 +62,6 @@
     "myst_parser",  # supports markdown syntax for doc pages
     "sphinx_paramlinks",  # allow to reference params, which is done in pytorch_lightning
     "sphinxnotes.mock",  # ignore third-parties directive suche as "testcode" - see "mock_directive" args below
-    "sphinxcontrib.autoyaml",  # Autodocumentation of yaml files.
 ]
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9b1fae95..f7998c0e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -30,18 +30,19 @@ Lidar-Deep-Segmentation > Documentation
    background/data_optimization
 
 .. TODO: assure that all dosctrings are in third-personn mode.
+.. TODO: find a way to document hydra config ; perhaps by switching to a full dataclasses mode.
 
 .. toctree::
    :maxdepth: 1
    :caption: Package Reference
 
    apidoc/scripts
+   apidoc/configs
    apidoc/lidar_multiclass.data
    apidoc/lidar_multiclass.model
    apidoc/lidar_multiclass.models.modules
    apidoc/lidar_multiclass.callbacks
    apidoc/lidar_multiclass.utils
-   apidoc/configs
 
 
 Indices and Tables

From f6aad7995188da349d582149043f1855527fb378 Mon Sep 17 00:00:00 2001
From: Charles GAYDON <charles.gaydon@gmail.com>
Date: Tue, 5 Apr 2022 16:42:31 +0200
Subject: [PATCH 8/8] Bump version to V1.7.0

---
 package_metadata.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package_metadata.yaml b/package_metadata.yaml
index e2167c4b..46c9968c 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "1.6.13"
+__version__: "1.7.0"
 __name__: "lidar_multiclass"
 __url__: "https://github.com/IGNF/lidar-deep-segmentation"
 __description__: "Multiclass Semantic Segmentation for Lidar Point Cloud"