WIP V3.*.* with torch-geometric RandLA-Net implementation (#39)

Development of PyG-RandLA-Net Co-authored-by: Michel Daab <[email protected]>
IGNF · Nov 28, 2022 · 9aebf83 · 9aebf83
1 parent 8a731ad
commit 9aebf83
Show file tree

Hide file tree

Showing 56 changed files with 1,298 additions and 506 deletions.
diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
@@ -26,14 +26,13 @@ jobs:
     - name: Build docker image
       run: docker build -t myria3d .
 
-    - name: Fast unit tests first
+    - name: Run pytest
       run: >
         docker run
-        -v /var/data/cicd/CICD_github_assets/myria3d_B2V0.5/inputs/:/myria3d/tests/data/large/
         --ipc=host
         myria3d
         python -m
-        pytest -rA -v -m "not slow"
+        pytest -rA -v
         --ignore=actions-runner
 
 
@@ -42,19 +41,20 @@ jobs:
     - name: Example inference run via Docker with inference-time subtiles overlap to smooth-out results.
       run: >
         docker run
-        -v /var/data/cicd/CICD_github_assets/myria3d_B2V0.5/inputs/:/inputs/
-        -v /var/data/cicd/CICD_github_assets/myria3d_B2V0.5/outputs/:/outputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.0.2/inputs/:/inputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.0.2/outputs/:/outputs/
         --ipc=host
         --shm-size=2gb
         myria3d
-        python /myria3d/myria3d/predict.py
-        experiment=predict
-        predict.ckpt_path=/inputs/proto151_V0.0_epoch_056_Myria3DV2.3.0.ckpt
+        python /app/myria3d/predict.py
+        --config-path /inputs/
+        --config-name proto151_V0.0_epoch_80_Myria3DV3.0.0_predict_config_Myria3DV3.0.0.yaml
+        predict.ckpt_path=/inputs/proto151_V0.0_epoch_80_Myria3DV3.0.0.ckpt
         predict.src_las=/inputs/792000_6272000_subset_buildings.las
         predict.output_dir=/outputs/
         predict.subtile_overlap=25
         datamodule.batch_size=20
-        hydra.run.dir=/myria3d
+        hydra.run.dir=/app
 
     - name: Check code neatness (linter)
       run: docker run myria3d flake8

diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
@@ -49,14 +49,14 @@ jobs:
         uses: actions/cache@v2
         with:
           path: ${{ env.CONDA }}/envs
-          key: conda-${{ runner.os }}--${{ runner.arch }}--${{ steps.get-date.outputs.today }}-${{ hashFiles('setup_env/requirements.yml') }}-${{ hashFiles('setup_env/requirements.txt') }}
+          key: conda-${{ runner.os }}--${{ runner.arch }}--${{ steps.get-date.outputs.today }}-${{ hashFiles('environment.yml') }}
         env:
-          # Increase this value to reset cache if etc/example-environment.yml has not changed
+          # Increase this value to reset cache if env has not changed.
           CACHE_NUMBER: 0
         id: cache
 
       - name: Update environment if there was no cached env.
-        run: mamba env update -n myria3d -f setup_env/requirements.yml
+        run: mamba env update -n myria3d -f environment.yml
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: List installed packages

diff --git a/.gitignore b/.gitignore
@@ -4,8 +4,11 @@ inputs/
 outputs/
 checkpoints/
 runs/
+data/
+
 # large files that are not tracked
 tests/data/large
+tests/data/toy_dataset.hdf5
 
 
 # Byte-compiled / optimized / DLL files

diff --git a/Dockerfile b/Dockerfile
@@ -1,57 +1,48 @@
-FROM nvidia/cuda:10.2-devel-ubuntu18.04
-# An nvidia image seems to be necessary for torch-points-kernel. 
-# Also, a "devel" image seems required for the same library
+FROM nvidia/cuda:11.3.1-base-ubuntu20.04
 
 # set the IGN proxy, otherwise apt-get and other applications don't work
 # Should be commented out outside of IGN
 ENV http_proxy 'http://192.168.4.9:3128/'
 ENV https_proxy 'http://192.168.4.9:3128/'
 
-# set the timezone, otherwise it asks for it... and freezes
-ENV TZ=Europe/Paris
-RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
-
-# Needed to use apt-get afterwards due to CUDA changes described here since April 27, 2022:
-# https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772
-# Not the recommpended method, but else we need wget installed afterwards.
-# We changed to 10.2-devel-ubuntu18.04 so that might not be needed. 
-RUN apt-get update && apt-get upgrade -y && apt-get install -y wget
-RUN apt-key del 7fa2af80
-RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
-
-# all the apt-get installs
-RUN apt-get update && apt-get upgrade -y && apt-get install -y \
-        software-properties-common  \
-        wget                        \
-        git                         \
-        libgl1-mesa-glx libegl1-mesa libxrandr2 libxrandr2 libxss1 libxcursor1 libxcomposite1 libasound2 libxi6 libxtst6   # package needed for anaconda
-
-RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh \
-        && /bin/bash ~/miniconda.sh -b -p /opt/conda \
-        && rm ~/miniconda.sh
-
-ENV PATH /opt/conda/bin:$PATH
-
-# Only copy necessary files to set up the environment, 
-# to use docker caching if requirements files were not updated.
-WORKDIR /setup_env
-COPY ./setup_env/ .
-
-# install mamba to setup the env faster
-RUN conda install -y mamba -n base -c conda-forge
-# Build the environment
-RUN mamba env create -f requirements.yml
-
-# Copy the repository content in /myria3d 
-WORKDIR /myria3d
+# Remove any third-party apt sources to avoid issues with expiring keys.
+RUN rm -f /etc/apt/sources.list.d/*.list
+
+# Install some basic utilities
+RUN apt-get update && apt-get install -y \
+        curl \
+        ca-certificates \
+        sudo \
+        git \
+        bzip2 \
+        libx11-6 \
+        && rm -rf /var/lib/apt/lists/*
+
+# Create a working directory
+RUN mkdir /app
+
+# Set up the Conda environment and make python accessible via PATH.
+ENV CONDA_AUTO_UPDATE_CONDA=false
+ENV PATH=/miniconda:/miniconda/bin:$PATH 
+COPY environment.yml /app/environment.yml
+RUN curl -sLo /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh \
+        && chmod +x /miniconda.sh \
+        && /miniconda.sh -b -p /miniconda \
+        && rm /miniconda.sh \
+        && /miniconda/bin/conda env update -n base -f /app/environment.yml \
+        && rm /app/environment.yml \
+        && /miniconda/bin/conda clean -ya
+
+# Need to export this for torch_geometric to find where cuda is.
+# See https://github.com/pyg-team/pytorch_geometric/issues/2040#issuecomment-766610625
+ENV LD_LIBRARY_PATH="/miniconda/lib/:$LD_LIBRARY_PATH"
+
+# Check succes of environment creation.
+RUN python -c "import torch_geometric;"
+
+# Copy the repository content in /app 
+WORKDIR /app
 COPY . .
 
-# Make RUN commands use the new environment:
-SHELL ["conda", "run", "-n", "myria3d", "/bin/bash", "-c"]
-
-# the entrypoint garanty that all command will be runned in the conda environment
-ENTRYPOINT ["conda",  \   
-        "run", \
-        "-n", \
-        "myria3d"]
-
+# Set the default command to bash for image inspection.
+CMD ["bash"]
diff --git a/configs/callbacks/default.yaml b/configs/callbacks/default.yaml
@@ -17,11 +17,6 @@ lr_monitor:
 log_iou_by_class:
   _target_: myria3d.callbacks.logging_callbacks.LogIoUByClass
   classification_dict: ${dataset_description.classification_dict}
-  interpolator:  # only used at test time
-    _target_: myria3d.models.interpolation.Interpolator
-    interpolation_k: ${predict.interpolation_k}
-    classification_dict: ${dataset_description.classification_dict}
-    probas_to_save: ${predict.probas_to_save}  # replace by a list of string of class names to select specific probas to save
 
 model_checkpoint:
   _target_: pytorch_lightning.callbacks.ModelCheckpoint

diff --git a/configs/config.yaml b/configs/config.yaml
@@ -29,7 +29,7 @@ defaults:
   - datamodule: hdf5_datamodule.yaml
   - dataset_description: 20220607_151_dalles_proto.yaml  # describes input features and classes
   - callbacks: default.yaml # set this to null if you don't want to use callbacks
-  - model: randla_net_model.yaml
+  - model: randla_net_model.yaml  # other option is pyg_randla_net_model
 
   - logger: comet # set logger here or use command line (e.g. `python run.py logger=wandb`)
   - task: default.yaml # set logger here or use command line (e.g. `python run.py logger=wandb`)

diff --git a/configs/datamodule/hdf5_datamodule.yaml b/configs/datamodule/hdf5_datamodule.yaml
@@ -23,9 +23,9 @@ subtile_shape: "square"  # "square" or "disk"
 subtile_overlap_train: 0
 subtile_overlap_predict: "${predict.subtile_overlap}"
 
-batch_size: 12
-num_workers: 2
-prefetch_factor: 2
+batch_size: 32
+num_workers: 3
+prefetch_factor: 3
 
 defaults:
   - transforms: default.yaml
diff --git a/configs/datamodule/transforms/augmentations/none.yaml b/configs/datamodule/transforms/augmentations/none.yaml
diff --git a/configs/datamodule/transforms/default.yaml b/configs/datamodule/transforms/default.yaml
@@ -1,6 +1,6 @@
 defaults:
   - preparations: default.yaml
-  - augmentations: light.yaml
+  - augmentations: none.yaml
   - normalizations: default.yaml
 
 # turn the config dict into ListConfig that will be fed directly to a Compose object

diff --git a/configs/datamodule/transforms/preparations/no_random_subsampling.yaml b/configs/datamodule/transforms/preparations/no_random_subsampling.yaml
@@ -0,0 +1,71 @@
+# default preparations with grid sampling and random sampling.
+
+train:
+  TargetTransform:
+    _target_: myria3d.pctl.transforms.transforms.TargetTransform
+    _args_:
+      - ${dataset_description.classification_preprocessing_dict}
+      - ${dataset_description.classification_dict}
+
+  GridSampling:
+    _target_: torch_geometric.transforms.GridSampling
+    _args_:
+      - 0.25
+
+  MinimumNumNodes:
+    _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes
+    _args_:
+      - 300
+
+  Center:
+    _target_: torch_geometric.transforms.Center
+
+eval:
+  TargetTransform:
+    _target_: myria3d.pctl.transforms.transforms.TargetTransform
+    _args_:
+      - ${dataset_description.classification_preprocessing_dict}
+      - ${dataset_description.classification_dict}
+
+  CopyFullPreparedTargets:
+    _target_: myria3d.pctl.transforms.transforms.CopyFullPreparedTargets
+
+  CopyFullPos:
+    _target_: myria3d.pctl.transforms.transforms.CopyFullPos
+
+  GridSampling:
+    _target_: torch_geometric.transforms.GridSampling
+    _args_:
+      - 0.25
+
+  MinimumNumNodes:
+    _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes
+    _args_:
+      - 300
+
+  # For interpolation
+  CopySampledPos:
+    _target_: myria3d.pctl.transforms.transforms.CopySampledPos
+
+  Center:
+    _target_: torch_geometric.transforms.Center
+
+predict:
+  CopyFullPos:
+    _target_: myria3d.pctl.transforms.transforms.CopyFullPos
+
+  GridSampling:
+    _target_: torch_geometric.transforms.GridSampling
+    _args_:
+      - 0.25
+
+  MinimumNumNodes:
+    _target_: myria3d.pctl.transforms.transforms.MinimumNumNodes
+    _args_:
+      - 300
+
+  CopySampledPos:
+    _target_: myria3d.pctl.transforms.transforms.CopySampledPos
+
+  Center:
+    _target_: torch_geometric.transforms.Center
diff --git a/configs/experiment/RandLaNet-Legacy-Overfit.yaml b/configs/experiment/RandLaNet-Legacy-Overfit.yaml
@@ -0,0 +1,25 @@
+# @package _global_
+
+# Nota : call "python myria3d/pctl/dataset/toy_dataset.py" to create a toy dataset before running this.
+defaults:
+  - override /datamodule/transforms/augmentations: none.yaml
+
+
+logger:
+  comet:
+    experiment_name: "RandLaNetOverfit"
+
+trainer:
+  min_epochs: 100
+  max_epochs: 100
+  overfit_batches: 1
+  num_sanity_val_steps: 0
+
+datamodule:
+  batch_size: 6
+  num_workers: 2
+  # runtime.cwd is where application is run from e.g. where run.py is.
+  data_dir: "${hydra:runtime.cwd}/tests/data/"
+  split_csv_path: "${hydra:runtime.cwd}/tests/data/toy_dataset_src/toy_dataset_split.csv"
+  hdf5_file_path: "${hydra:runtime.cwd}/tests/data/toy_dataset.hdf5"
+
diff --git a/configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml b/configs/experiment/RandLaNet-PyG-Overfit-NoRS.yaml
@@ -0,0 +1,25 @@
+# @package _global_
+
+# Nota : call "python myria3d/pctl/dataset/toy_dataset.py" to create a toy dataset before running this.
+defaults:
+  - override /model: pyg_randla_net_model.yaml
+  - override /datamodule/transforms/preparations: no_random_subsampling.yaml
+
+
+logger:
+  comet:
+    experiment_name: "RandLaNetOverfit"
+
+trainer:
+  min_epochs: 100
+  max_epochs: 100
+  overfit_batches: 1
+  num_sanity_val_steps: 0
+
+datamodule:
+  batch_size: 12
+  num_workers: 2
+  # runtime.cwd is where application is run from e.g. where run.py is.
+  data_dir: "${hydra:runtime.cwd}/tests/data/"
+  split_csv_path: "${hydra:runtime.cwd}/tests/data/toy_dataset_src/toy_dataset_split.csv"
+  hdf5_file_path: "${hydra:runtime.cwd}/tests/data/toy_dataset.hdf5"
diff --git a/configs/experiment/RandLaNet-PyG-Overfit.yaml b/configs/experiment/RandLaNet-PyG-Overfit.yaml
@@ -0,0 +1,25 @@
+# @package _global_
+
+# Nota : call "python myria3d/pctl/dataset/toy_dataset.py" to create a toy dataset before running this.
+defaults:
+  - override /model: pyg_randla_net_model.yaml
+  - override /datamodule/transforms/augmentations: none.yaml
+
+
+logger:
+  comet:
+    experiment_name: "RandLaNetOverfit"
+
+trainer:
+  min_epochs: 100
+  max_epochs: 100
+  overfit_batches: 1
+  num_sanity_val_steps: 0
+
+datamodule:
+  batch_size: 12
+  num_workers: 2
+  # runtime.cwd is where application is run from e.g. where run.py is.
+  data_dir: "${hydra:runtime.cwd}/tests/data/"
+  split_csv_path: "${hydra:runtime.cwd}/tests/data/toy_dataset_src/toy_dataset_split.csv"
+  hdf5_file_path: "${hydra:runtime.cwd}/tests/data/toy_dataset.hdf5"
diff --git a/configs/experiment/RandLaNet_base_run_FR_legacy.yaml b/configs/experiment/RandLaNet_base_run_FR_legacy.yaml
@@ -11,7 +11,7 @@ datamodule:
   batch_size: 32
 
 trainer:
-  num_sanity_val_steps: 0
+  num_sanity_val_steps: 2
   min_epochs: 100
   max_epochs: 150
   # gpus: [1]

diff --git a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml b/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net-MultiGPU.yaml
@@ -0,0 +1,18 @@
+# @package _global_
+defaults:
+  - RandLaNet_base_run_FR_pyg_randla_net.yaml
+
+logger:
+  comet:
+    experiment_name: "Pyg RandLaNet - FR Data - 2xGPUs"
+
+trainer:
+  strategy: ddp_find_unused_parameters_false
+  # Replace by gpu to simulate multi-gpus training.
+  accelerator: gpu
+  num_processes: 2
+  gpus: 2
+
+datamodule:
+  # You may want to divide batch size by number of gpus to keep number of steps by epoch unchanged.
+  batch_size: 16