IGNF · CharlesGaydon · Nov 3, 2022 · Oct 24, 2022 · Oct 24, 2022 · Oct 24, 2022
diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
@@ -46,7 +46,7 @@ jobs:
         python -m
         pytest -rA -v -m "slow" --ignore=actions-runner --no-cov
 
-    - name: Test detection of building.
+    - name: Test building module from CLI on a LAS subset.
       run: >
         docker run --network host
         -v /var/data/cicd/CICD_github_assets/M8.4/inputs/:/inputs/

diff --git a/configs/building_validation/optimization/default.yaml b/configs/building_validation/optimization/default.yaml
@@ -48,7 +48,7 @@ study:
         - "${get_method:lidar_prod.tasks.building_validation_optimization.constraints_func}"
 
 design:
-  n_trials: 300
+  n_trials: 400
   constraints:
     min_precision_constraint: 0.98
     min_recall_constraint: 0.98

diff --git a/lidar_prod/tasks/building_validation.py b/lidar_prod/tasks/building_validation.py
@@ -17,7 +17,7 @@
     get_pdal_writer,
     split_idx_by_dim,
     get_pipeline,
-    request_bd_uni_for_building_shapefile
+    request_bd_uni_for_building_shapefile,
 )
 
 log = logging.getLogger(__name__)
@@ -99,7 +99,9 @@ def run(
                 "Preparation : Clustering of candidates buildings & Requesting BDUni"
             )
             if type(input_values) == str:
-                log.info(f"Applying Building Validation to file \n{input_values}")
+                log.info(
+                    f"Applying Building Validation to file \n{input_values}"
+                )
                 temp_f = osp.join(td, osp.basename(input_values))
             else:
                 temp_f = ""
@@ -108,7 +110,12 @@ def run(
             self.update()
         return target_las_path
 
-    def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las_path: str, save_result: bool = False) -> None:
+    def prepare(
+        self,
+        input_values: Union[str, pdal.pipeline.Pipeline],
+        prepared_las_path: str,
+        save_result: bool = False,
+    ) -> None:
         f"""
         Prepare las for later decision process. .
         1. Cluster candidates points, in a new `{self.data_format.las_dimensions.ClusterID_candidate_building}`
@@ -130,7 +137,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las
 
         """
 
-        dim_candidate_flag = self.data_format.las_dimensions.candidate_buildings_flag
+        dim_candidate_flag = (
+            self.data_format.las_dimensions.candidate_buildings_flag
+        )
         dim_cluster_id_pdal = self.data_format.las_dimensions.cluster_id
         dim_cluster_id_candidates = (
             self.data_format.las_dimensions.ClusterID_candidate_building
@@ -139,7 +148,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las
 
         self.pipeline = get_pipeline(input_values)
         # Identify candidates buildings points with a boolean flag
-        self.pipeline |= pdal.Filter.ferry(dimensions=f"=>{dim_candidate_flag}")
+        self.pipeline |= pdal.Filter.ferry(
+            dimensions=f"=>{dim_candidate_flag}"
+        )
         _is_candidate_building = (
             "("
             + " || ".join(
@@ -165,15 +176,19 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las
         )
         self.pipeline |= pdal.Filter.assign(value=f"{dim_cluster_id_pdal} = 0")
         self.pipeline.execute()
-        bbox = get_integer_bbox(self.pipeline, buffer=self.bd_uni_request.buffer)
+        bbox = get_integer_bbox(
+            self.pipeline, buffer=self.bd_uni_request.buffer
+        )
 
         self.pipeline |= pdal.Filter.ferry(dimensions=f"=>{dim_overlay}")
 
         if self.shp_path:
-            temp_dirpath = None     # no need for a temporay directory to add the shapefile in it, we already have the shapefile
+            temp_dirpath = None  # no need for a temporay directory to add the shapefile in it, we already have the shapefile
             _shp_p = self.shp_path
             gdf = geopandas.read_file(_shp_p)
-            buildings_in_bd_topo = not len(gdf) == 0    # check if there are buildings in the shp
+            buildings_in_bd_topo = (
+                not len(gdf) == 0
+            )  # check if there are buildings in the shp
 
         else:
             temp_dirpath = mkdtemp()
@@ -201,7 +216,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las
         if temp_dirpath:
             shutil.rmtree(temp_dirpath)
 
-    def update(self, src_las_path: str = None, target_las_path: str = None) -> None:
+    def update(
+        self, src_las_path: str = None, target_las_path: str = None
+    ) -> None:
         """Updates point cloud classification channel."""
         if src_las_path:
             self.pipeline = pdal.Pipeline()
@@ -316,7 +333,8 @@ def _make_detailed_group_decision(
         p_heq_relaxed_threshold = infos.probabilities >= relaxed_threshold
 
         ia_confirmed_flag = np.logical_or(
-            p_heq_threshold, np.logical_and(infos.overlays, p_heq_relaxed_threshold)
+            p_heq_threshold,
+            np.logical_and(infos.overlays, p_heq_relaxed_threshold),
         )
 
         ia_confirmed = (
@@ -326,26 +344,32 @@ def _make_detailed_group_decision(
         # REFUTATION
         ia_refuted = (
             np.mean(
-                (1 - infos.probabilities) >= self.thresholds.min_confidence_refutation
+                (1 - infos.probabilities)
+                >= self.thresholds.min_confidence_refutation
             )
             >= self.thresholds.min_frac_refutation
         )
         uni_overlayed = (
             np.mean(infos.overlays) >= self.thresholds.min_uni_db_overlay_frac
         )
+        # If low entropy, we may trust AI to confirm/refute
+        if not high_entropy:
+            if ia_refuted:
+                if uni_overlayed:
+                    return self.codes.detailed.ia_refuted_but_under_db_uni
+                return self.codes.detailed.ia_refuted
+            if ia_confirmed:
+                if uni_overlayed:
+                    return self.codes.detailed.both_confirmed
+                return self.codes.detailed.ia_confirmed_only
+        # Else, we may still use BDUni information
+        if uni_overlayed:
+            return self.codes.detailed.db_overlayed_only
 
+        # Else: we are uncertain, and we specify why we can specify if entropy was
+        # involved to conclude to uncertainty.
         if high_entropy:
             return self.codes.detailed.unsure_by_entropy
-        if ia_refuted:
-            if uni_overlayed:
-                return self.codes.detailed.ia_refuted_but_under_db_uni
-            return self.codes.detailed.ia_refuted
-        if ia_confirmed:
-            if uni_overlayed:
-                return self.codes.detailed.both_confirmed
-            return self.codes.detailed.ia_confirmed_only
-        if uni_overlayed:
-            return self.codes.detailed.db_overlayed_only
         return self.codes.detailed.both_unsure
 
 

diff --git a/lidar_prod/tasks/building_validation_optimization.py b/lidar_prod/tasks/building_validation_optimization.py
@@ -11,6 +11,7 @@
 from tqdm import tqdm
 import os.path as osp
 import pdal
+import math
 
 from lidar_prod.tasks.building_validation import (
     BuildingValidator,
@@ -94,26 +95,27 @@ def setup(self):
         codes to adapt to those of the optimization dataset.
 
         """
-        las_paths = glob(osp.join(self.paths.input_las_dir, "*.las"))
-        laz_paths = glob(osp.join(self.paths.input_las_dir, "*.laz"))
-        self.las_filepaths = sorted(las_paths + laz_paths)
-        if not self.las_filepaths:
-            raise ValueError(
-                "No LAS/LAZ found in {self.paths.input_las_dir} (i.e. input_las_dir) while"
-                "globbing *las and *laz extensions (lowercase)."
-            )
-        if self.debug:
-            self.las_filepaths = self.las_filepaths[:1]
-        os.makedirs(self.paths.prepared_las_dir, exist_ok=True)
-        self.prepared_las_filepaths = [
-            osp.join(self.paths.prepared_las_dir, osp.basename(f))
-            for f in self.las_filepaths
-        ]
-        os.makedirs(self.paths.updated_las_dir, exist_ok=True)
-        self.out_las_filepaths = [
-            osp.join(self.paths.updated_las_dir, osp.basename(f))
-            for f in self.las_filepaths
-        ]
+        if "prepare" in self.todo or "update" in self.todo:
+            las_paths = glob(osp.join(self.paths.input_las_dir, "*.las"))
+            laz_paths = glob(osp.join(self.paths.input_las_dir, "*.laz"))
+            self.las_filepaths = sorted(las_paths + laz_paths)
+            if not self.las_filepaths:
+                raise ValueError(
+                    "No LAS/LAZ found in {self.paths.input_las_dir} (i.e. input_las_dir) while"
+                    "globbing *las and *laz extensions (lowercase)."
+                )
+            if self.debug:
+                self.las_filepaths = self.las_filepaths[:1]
+            os.makedirs(self.paths.prepared_las_dir, exist_ok=True)
+            self.prepared_las_filepaths = [
+                osp.join(self.paths.prepared_las_dir, osp.basename(f))
+                for f in self.las_filepaths
+            ]
+            os.makedirs(self.paths.updated_las_dir, exist_ok=True)
+            self.out_las_filepaths = [
+                osp.join(self.paths.updated_las_dir, osp.basename(f))
+                for f in self.las_filepaths
+            ]
 
         # We must adapt BuildingValidator to corrected data by specifying the codes to use as candidates
         self.bv.candidate_buildings_codes = (
@@ -332,8 +334,11 @@ def _objective(
             "min_frac_confirmation_factor_if_bd_uni_overlay": trial.suggest_float(
                 "min_frac_confirmation_factor_if_bd_uni_overlay", 0.5, 1.0
             ),
+            # Max entropy for 7 classes. When looking at prediction's entropy,
+            # the observed maximal value is aqual to the Shannon entropy divided by two,
+            # so this is what we consider as the max for the min entropy for uncertainty.
             "min_entropy_uncertainty": trial.suggest_float(
-                "min_entropy_uncertainty", 0.5, 1.0
+                "min_entropy_uncertainty", 0.0, -math.log2(1 / 7) / 2.0
             ),
             "min_frac_entropy_uncertain": trial.suggest_float(
                 "min_frac_entropy_uncertain", 0.33, 1.0

diff --git a/package_metadata.yaml b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "1.7.4"
+__version__: "1.8.0"
 __name__: "lidar_prod"
 __url__: "https://github.com/IGNF/lidar-prod-quality-control"
 __description__: "A 3D semantic segmentation production tool to augment rules-based Lidar classification with AI and databases."