Module d'identification de la végétation et des non-classés (#62)

Bump version to V1.7.0 Add vegetation and unclassified detections Co-authored-by: Charles GAYDON <[email protected]>
IGNF · Sep 13, 2022 · 7be3359 · 7be3359
1 parent 00d8b32
commit 7be3359
Show file tree

Hide file tree

Showing 41 changed files with 1,137 additions and 241 deletions.
diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
@@ -44,18 +44,32 @@ jobs:
         python -m
         pytest -rA -v -m "slow" --ignore=actions-runner --no-cov
 
-    - name: Test run from CLI on a LAS subset.
+    - name: Test detection of building.
       run: >
         docker run 
         -v /var/data/cicd/CICD_github_assets/M8.4/inputs/:/inputs/
         -v /var/data/cicd/CICD_github_assets/M8.4/outputs/:/outputs/
         lidar_prod_im
         python 
         lidar_prod/run.py
-        print_config=true
+        data_format=legacy.yaml
+        +task=apply_on_building
         paths.src_las=/inputs/730000_6360000.subset.prototype_format202.las
         paths.output_dir=/outputs/
 
+    - name: Test vegetation/unclassified detection from CLI on a LAS subset.
+      run: >
+        docker run 
+        -v /var/data/cicd/CICD_github_assets/M8.4/inputs/:/inputs/
+        -v /var/data/cicd/CICD_github_assets/M8.4/outputs/:/outputs/
+        lidar_prod_im
+        python 
+        lidar_prod/run.py
+        +task=identify_vegetation_unclassified
+        data_format=vegetation_unclassified.yaml
+        paths.src_las=/inputs/888000_6614000.subset.las
+        paths.output_dir=/outputs/
+
     # Everything ran so we tag the valid docker image to keep it
     # This happens for push events, which are in particular 
     # triggered when a pull request is merged.

diff --git a/__init__.py b/__init__.py
diff --git a/configs/basic_identification/default.yaml b/configs/basic_identification/default.yaml
@@ -0,0 +1,6 @@
+# parameters
+vegetation_threshold: 0.3923
+unclassified_threshold: 0.2455
+
+vegetation_nb_trials: 100
+unclassified_nb_trials: 100
diff --git a/configs/basic_identification/for_testing.yaml b/configs/basic_identification/for_testing.yaml
@@ -0,0 +1,6 @@
+# parameters
+vegetation_threshold: 0.5
+unclassified_threshold: 0.5
+
+vegetation_nb_trials: 3
+unclassified_nb_trials: 3
diff --git a/configs/config.yaml b/configs/config.yaml
@@ -9,4 +9,5 @@ defaults:
   - building_validation: default.yaml
   - building_identification: default.yaml
   - building_completion: default.yaml
+  - basic_identification: default.yaml
   - _self_  # needed by pdal for legacy reasons
diff --git a/configs/data_format/default.yaml b/configs/data_format/default.yaml
@@ -1,32 +1,42 @@
 # Those names connect the logics between successive tasks
 las_dimensions:
   # input
-  classification: classification  # las format
+  classification: Classification  # las format
 
   # Extra dims
   # ATTENTION: If extra dimensions are added, you may want to add them in cleaning.in parameter as well.
+  ai_vegetation_proba: vegetation
+  ai_unclassified_proba: unclassified
   ai_building_proba: building  # user-defined - output by deep learning model
   entropy: entropy # user-defined - output by deep learning model
 
+
   # Intermediary channels
   cluster_id: ClusterID  # pdal-defined -> created by clustering operations
   uni_db_overlay: BDTopoOverlay  # user-defined -> a 0/1 flag for presence of a BDUni vector
   candidate_buildings_flag: F_CandidateB # -> a 0/1 flag identifying candidate buildings found by rules-based classification  
-  ClusterID_candidate_building: CID_CandidateB  # -> Cluster index from BuildingValidator, 0 if no cluster, 1-n elsewise
-  ClusterID_isolated_plus_confirmed: CID_IsolatedOrConfirmed  # -> Cluster index from BuildingCompletor, 0 if no cluster, 1-n elsewise
+  ClusterID_candidate_building: CID_CandidateB  # -> Cluster index from BuildingValidator, 0 if no cluster, 1-n otherwise
+  ClusterID_isolated_plus_confirmed: CID_IsolatedOrConfirmed  # -> Cluster index from BuildingCompletor, 0 if no cluster, 1-n otherwise
 
   # Additionnal output channel
   ai_building_identified: Group
+  ai_vegetation_unclassified_groups: Group
 
 codes:
+  unclassified: 1
+  vegetation: 3
+  vegetation_target:
+      vegetation_low: 3
+      vegetation_medium: 4
+      vegetation_high: 5
   building:
     candidates: [202]  # found by rules-based classification (TerraScan)
     detailed:  # used for detailed output when doing threshold optimization
       unsure_by_entropy: 200 # unsure (based on entropy)
       unclustered: 202  # refuted
       ia_refuted: 110  # refuted
       ia_refuted_but_under_db_uni: 111  # unsure
-      both_unsure: 112  # unsure (elsewise)
+      both_unsure: 112  # unsure (otherwise)
       ia_confirmed_only: 113  # confirmed
       db_overlayed_only: 114  # confirmed
       both_confirmed: 115  # confirmed
@@ -47,16 +57,33 @@ codes:
 
 cleaning:
     # Extra dims that are kept when application starts. Others are removed to lighten the LAS.
-  input:
+  input_building:
     _target_: lidar_prod.tasks.cleaning.Cleaner
     extra_dims:
       - "${data_format.las_dimensions.ai_building_proba}=float"
       - "${data_format.las_dimensions.entropy}=float"
-  output:
+      # - "${data_format.las_dimensions.ai_vegetation_proba}=float"
+      # - "${data_format.las_dimensions.ai_unclassified_proba}=float"
+  output_building:
+    # Extra dims that are kept before final saving.
+    # You can override with "all" to keep all extra dimensions at development time.
+    _target_: lidar_prod.tasks.cleaning.Cleaner
+    extra_dims:
+      # - "${data_format.las_dimensions.ai_building_proba}=float"
+      # - "${data_format.las_dimensions.ai_vegetation_proba}=float"
+      - "${data_format.las_dimensions.entropy}=float"
+      - "${data_format.las_dimensions.ai_building_identified}=uint"
+      # - "${data_format.las_dimensions.ai_vegetation_unclassified_groups}=uint"
+  input_vegetation_unclassified:
+  # Extra dims added for storing the result of the vegetation/unclassified detection.
+    _target_: lidar_prod.tasks.cleaning.Cleaner
+    extra_dims:
+      - "${data_format.las_dimensions.ai_vegetation_unclassified_groups}=uint32"
+  output_vegetation_unclassified:
     # Extra dims that are kept before final saving.
     # You can override with "all" to keep all extra dimensions at development time.
     _target_: lidar_prod.tasks.cleaning.Cleaner
     extra_dims:
-      - "${data_format.las_dimensions.ai_building_proba}=float"
       - "${data_format.las_dimensions.entropy}=float"
-      - "${data_format.las_dimensions.ai_building_identified}=uint"
+      - "${data_format.las_dimensions.ai_building_proba}=uint"
+      - "${data_format.las_dimensions.ai_vegetation_unclassified_groups}=uint"
diff --git a/configs/data_format/legacy.yaml b/configs/data_format/legacy.yaml
@@ -0,0 +1,62 @@
+# Those names connect the logics between successive tasks
+las_dimensions:
+  # input
+  classification: Classification  # las format
+
+  # Extra dims
+  # ATTENTION: If extra dimensions are added, you may want to add them in cleaning.in parameter as well.
+  ai_building_proba: building  # user-defined - output by deep learning model
+  entropy: entropy # user-defined - output by deep learning model
+
+  # Intermediary channels
+  cluster_id: ClusterID  # pdal-defined -> created by clustering operations
+  uni_db_overlay: BDTopoOverlay  # user-defined -> a 0/1 flag for presence of a BDUni vector
+  candidate_buildings_flag: F_CandidateB # -> a 0/1 flag identifying candidate buildings found by rules-based classification  
+  ClusterID_candidate_building: CID_CandidateB  # -> Cluster index from BuildingValidator, 0 if no cluster, 1-n elsewise
+  ClusterID_isolated_plus_confirmed: CID_IsolatedOrConfirmed  # -> Cluster index from BuildingCompletor, 0 if no cluster, 1-n elsewise
+
+  # Additionnal output channel
+  ai_building_identified: Group
+
+codes:
+  building:
+    candidates: [202]  # found by rules-based classification (TerraScan)
+    detailed:  # used for detailed output when doing threshold optimization
+      unsure_by_entropy: 200 # unsure (based on entropy)
+      unclustered: 202  # refuted
+      ia_refuted: 110  # refuted
+      ia_refuted_but_under_db_uni: 111  # unsure
+      both_unsure: 112  # unsure (elsewise)
+      ia_confirmed_only: 113  # confirmed
+      db_overlayed_only: 114  # confirmed
+      both_confirmed: 115  # confirmed
+    final:  # used at the end of the building process
+      unsure: 214  # unsure
+      not_building: 208  # refuted
+      building: 6  # confirmed
+    detailed_to_final:
+      - ["${data_format.codes.building.detailed.unclustered}","${data_format.codes.building.final.not_building}"]
+      - ["${data_format.codes.building.detailed.ia_refuted}","${data_format.codes.building.final.not_building}"]
+      - ["${data_format.codes.building.detailed.ia_refuted_but_under_db_uni}","${data_format.codes.building.final.unsure}"]
+      - ["${data_format.codes.building.detailed.both_unsure}","${data_format.codes.building.final.unsure}"]
+      - ["${data_format.codes.building.detailed.unsure_by_entropy}","${data_format.codes.building.final.unsure}"]
+      - ["${data_format.codes.building.detailed.ia_confirmed_only}","${data_format.codes.building.final.building}"]
+      - ["${data_format.codes.building.detailed.db_overlayed_only}","${data_format.codes.building.final.building}"]
+      - ["${data_format.codes.building.detailed.both_confirmed}","${data_format.codes.building.final.building}"]
+
+
+cleaning:
+    # Extra dims that are kept when application starts. Others are removed to lighten the LAS.
+  input_building:
+    _target_: lidar_prod.tasks.cleaning.Cleaner
+    extra_dims:
+      - "${data_format.las_dimensions.ai_building_proba}=float"
+      - "${data_format.las_dimensions.entropy}=float"
+  output_building:
+    # Extra dims that are kept before final saving.
+    # You can override with "all" to keep all extra dimensions at development time.
+    _target_: lidar_prod.tasks.cleaning.Cleaner
+    extra_dims:
+      - "${data_format.las_dimensions.ai_building_proba}=float"
+      - "${data_format.las_dimensions.entropy}=float"
+      - "${data_format.las_dimensions.ai_building_identified}=uint"
diff --git a/configs/data_format/vegetation_unclassified.yaml b/configs/data_format/vegetation_unclassified.yaml
@@ -0,0 +1,89 @@
+# Those names connect the logics between successive tasks
+las_dimensions:
+  # input
+  classification: classification  # las format
+
+  # Extra dims
+  # ATTENTION: If extra dimensions are added, you may want to add them in cleaning.in parameter as well.
+  ai_vegetation_proba: vegetation
+  ai_unclassified_proba: unclassified
+  ai_building_proba: building  # user-defined - output by deep learning model
+  entropy: entropy # user-defined - output by deep learning model
+
+
+  # Intermediary channels
+  cluster_id: ClusterID  # pdal-defined -> created by clustering operations
+  uni_db_overlay: BDTopoOverlay  # user-defined -> a 0/1 flag for presence of a BDUni vector
+  candidate_buildings_flag: F_CandidateB # -> a 0/1 flag identifying candidate buildings found by rules-based classification  
+  ClusterID_candidate_building: CID_CandidateB  # -> Cluster index from BuildingValidator, 0 if no cluster, 1-n otherwise
+  ClusterID_isolated_plus_confirmed: CID_IsolatedOrConfirmed  # -> Cluster index from BuildingCompletor, 0 if no cluster, 1-n otherwise
+
+  # Additionnal output channel
+  ai_building_identified: Group
+  ai_vegetation_unclassified_groups: Group
+
+codes:
+  vegetation: 3
+  vegetation_target:
+      vegetation_low: 3
+      vegetation_medium: 4
+      vegetation_high: 5
+  unclassified: 1
+  building:
+    candidates: [202]  # found by rules-based classification (TerraScan)
+    detailed:  # used for detailed output when doing threshold optimization
+      unsure_by_entropy: 200 # unsure (based on entropy)
+      unclustered: 202  # refuted
+      ia_refuted: 110  # refuted
+      ia_refuted_but_under_db_uni: 111  # unsure
+      both_unsure: 112  # unsure (otherwise)
+      ia_confirmed_only: 113  # confirmed
+      db_overlayed_only: 114  # confirmed
+      both_confirmed: 115  # confirmed
+    final:  # used at the end of the building process
+      unsure: 214  # unsure
+      not_building: 208  # refuted
+      building: 6  # confirmed
+    detailed_to_final:
+      - ["${data_format.codes.building.detailed.unclustered}","${data_format.codes.building.final.not_building}"]
+      - ["${data_format.codes.building.detailed.ia_refuted}","${data_format.codes.building.final.not_building}"]
+      - ["${data_format.codes.building.detailed.ia_refuted_but_under_db_uni}","${data_format.codes.building.final.unsure}"]
+      - ["${data_format.codes.building.detailed.both_unsure}","${data_format.codes.building.final.unsure}"]
+      - ["${data_format.codes.building.detailed.unsure_by_entropy}","${data_format.codes.building.final.unsure}"]
+      - ["${data_format.codes.building.detailed.ia_confirmed_only}","${data_format.codes.building.final.building}"]
+      - ["${data_format.codes.building.detailed.db_overlayed_only}","${data_format.codes.building.final.building}"]
+      - ["${data_format.codes.building.detailed.both_confirmed}","${data_format.codes.building.final.building}"]
+
+
+cleaning:
+    # Extra dims that are kept when application starts. Others are removed to lighten the LAS.
+  input:
+    _target_: lidar_prod.tasks.cleaning.Cleaner
+    extra_dims:
+      # - "${data_format.las_dimensions.ai_building_proba}=float"
+      - "${data_format.las_dimensions.entropy}=float"
+      - "${data_format.las_dimensions.ai_vegetation_proba}=float"
+      - "${data_format.las_dimensions.ai_unclassified_proba}=float"
+  output:
+    # Extra dims that are kept before final saving.
+    # You can override with "all" to keep all extra dimensions at development time.
+    _target_: lidar_prod.tasks.cleaning.Cleaner
+    extra_dims:
+      # - "${data_format.las_dimensions.ai_building_proba}=float"
+      # - "${data_format.las_dimensions.ai_vegetation_proba}=float"
+      - "${data_format.las_dimensions.entropy}=float"
+      # - "${data_format.las_dimensions.ai_building_identified}=uint"
+      - "${data_format.las_dimensions.ai_vegetation_unclassified_groups}=uint"
+  input_vegetation_unclassified:
+  # Extra dims added for storing the result of the vegetation/unclassified detection.
+    _target_: lidar_prod.tasks.cleaning.Cleaner
+    extra_dims:
+      - "${data_format.las_dimensions.ai_vegetation_unclassified_groups}=uint32"
+  output_vegetation_unclassified:
+    # Extra dims that are kept before final saving.
+    # You can override with "all" to keep all extra dimensions at development time.
+    _target_: lidar_prod.tasks.cleaning.Cleaner
+    extra_dims:
+      - "${data_format.las_dimensions.entropy}=float"
+      - "${data_format.las_dimensions.ai_building_identified}=uint"
+      - "${data_format.las_dimensions.ai_vegetation_unclassified_groups}=uint"
diff --git a/docs/source/background/overview.md b/docs/source/background/overview.md
@@ -0,0 +1,34 @@
+# Overview of the process
+
+A las file goes through several steps, which alter its content. 
+At first we have a `Raw` las file, sent to another process (`Myria3D`) that infers the probabilities of various classes.
+From there, the las file is used a first time by this module to decide if the points are vegetation, unclassified or something else, and sent to an external process, a `rule-based segmentation`.
+The las file is used a second time by this module to decide if the points are buildings or not. 
+
+# schema of the overall process
+The arrows represent dimensions in the las file, where they come from and where they are consumed.
+
+.. mermaid::
+	sequenceDiagram
+		participant Raw
+		participant AI inference (Myria3D)
+		participant vegetation and unclassified detection
+		participant Rule based segmentation
+		participant Building module
+		actor Human inspection
+		Raw->>AI inference (Myria3D): Intensity
+		Raw->>AI inference (Myria3D): ReturnNumber
+		Raw->>AI inference (Myria3D): NumberOfReturns
+		Raw->>AI inference (Myria3D): Red, Green, Blue
+		Raw->>AI inference (Myria3D): Infrared
+		AI inference (Myria3D)->>vegetation and unclassified detection: vegetation
+		AI inference (Myria3D)->>vegetation and unclassified detection: unclassified
+		vegetation and unclassified detection->>Rule based segmentation: Group (1: vegetation, 3: unclassified)
+		note right of Rule based segmentation: uses vegetation and unclassified
+		AI inference (Myria3D)->>Building module: building
+		AI inference (Myria3D)->>Building module: entropy
+		Building module->>Human inspection: Group
+		Raw-->>vegetation and unclassified detection: Classification
+		Raw-->>Building module: Classification
+		note right of Building module: Building module updates Classification
+		Building module->>Human inspection: Classification
diff --git a/docs/source/background/production_process.md b/docs/source/background/production_process.md
@@ -2,9 +2,19 @@
 
 The end goal of the tool is to edit the input (rules-based) classification as much as we confidently can, and to highlight remaining areas of uncertainty for human inspection.
 
-**Input**: point cloud that went through a first geometric algorithm that identified `candidates building points` based on geometric rules (e.g. plane surfaces, above 1.5m of the ground, etc.), and for which a semantic segmentation model produced a point-level probability of being a building. The default name for this extra dimension is `building`. You can leverage this [package for aerial lidar deep learning segmentation](https://github.com/IGNF/lidar-deep-segmentation).
+**Input**: point cloud that went through a first geometric algorithm that identified `candidates building points` based on geometric rules (e.g. plane surfaces, above 1.5m of the ground, etc.), and for which a semantic segmentation model produced a point-level probability of being a building, vegetation and/or unclassified, and calculate the associated entropy. The default name for those extra dimensions are `building`, `vegetation`, `unclassified` and `entropy` respectively. You can leverage this [package for aerial lidar deep learning segmentation](https://github.com/IGNF/lidar-deep-segmentation).
 
-## A) Building Validation
+## A.1) Vegetation detection
+**Goal**: Confirm or refute points as vegetation.
+
+The identification is done by comparing the vegetation probability  of a point against a `threshold`. That threshold has been previously established as the best on a test set of las files.
+
+## A.2) Unclassified detection
+**Goal**: Confirm or refute points as unclassified.
+
+Exactly as with vegetation detection, the identification is done by comparing the unclassified probability of a point against a `threshold`. That threshold has been previously established as the best on a test set of las files.
+
+## B.1) Building Validation
 
 **Goal**: Confirm or refute groups of candidate building points when possible, mark them as unsure elsewise.
 
@@ -32,7 +42,7 @@ Current performances on a 15km² validation dataset, expressed as percentages of
 
 ![](/img/LidarBati-BuildingValidationM7.1V2.0.png)
 
-## B) Building Completion
+## B.2) Building Completion
 
 **Goal**: Confirm points that were too isolated to make up a group but have high-enough probability nevertheless (e.g. walls)
 
@@ -44,7 +54,7 @@ therefore confirmed as well.
 ![](/img/LidarBati-BuildingCompletion.png)
 
 
-## C) Building Identification
+## B.3) Building Identification
 
 **Goal**: Highlight potential buildings that were missed by the rule-based algorithm, for human inspection. 
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -11,6 +11,7 @@ Lidar-Prod > Documentation
    :maxdepth: 1
    :caption: Background
 
+   background/overview
    background/production_process
    background/thresholds_optimization_process
 

diff --git a/docs/source/tutorials/use.md b/docs/source/tutorials/use.md
@@ -41,3 +41,17 @@ For developments and debugging, you can run the package directly from python sou
 conda activate lidar_prod
 python lidar_prod/run.py paths.src_las=[/path/to/file.las]
 ```
+## Run Different tasks
+
+Different tasks can be runned from the application. Currently, they are:
+- `apply_on_building` to identify building
+- `identify_vegetation_unclassified` to identify unclassified and vegetation
+- `optimize_building` to evaluate the best parameters for building detection
+- `optimize_veg_id` to evaluate the best parameters for vegetation detection
+- `optimize_unc_id` to evaluate the best parameters for unclassified detection
+- `cleaning` to prepare las file with the correct dimension (mostly useful in development)
+
+To use on of those tasks, simply add "+task=[task_name]" to the options list, like this:
+```bash
+python lidar_prod/run.py +task=apply_on_building ...
+```