diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index f3ba1a54..36962005 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -46,7 +46,7 @@ jobs: python -m pytest -rA -v -m "slow" --ignore=actions-runner --no-cov - - name: Test detection of building. + - name: Test building module from CLI on a LAS subset. run: > docker run --network host -v /var/data/cicd/CICD_github_assets/M8.4/inputs/:/inputs/ diff --git a/configs/building_validation/optimization/default.yaml b/configs/building_validation/optimization/default.yaml index 2084c473..24631dfb 100644 --- a/configs/building_validation/optimization/default.yaml +++ b/configs/building_validation/optimization/default.yaml @@ -48,7 +48,7 @@ study: - "${get_method:lidar_prod.tasks.building_validation_optimization.constraints_func}" design: - n_trials: 300 + n_trials: 400 constraints: min_precision_constraint: 0.98 min_recall_constraint: 0.98 diff --git a/lidar_prod/tasks/building_validation.py b/lidar_prod/tasks/building_validation.py index 8f11a64a..58c6c388 100644 --- a/lidar_prod/tasks/building_validation.py +++ b/lidar_prod/tasks/building_validation.py @@ -17,7 +17,7 @@ get_pdal_writer, split_idx_by_dim, get_pipeline, - request_bd_uni_for_building_shapefile + request_bd_uni_for_building_shapefile, ) log = logging.getLogger(__name__) @@ -99,7 +99,9 @@ def run( "Preparation : Clustering of candidates buildings & Requesting BDUni" ) if type(input_values) == str: - log.info(f"Applying Building Validation to file \n{input_values}") + log.info( + f"Applying Building Validation to file \n{input_values}" + ) temp_f = osp.join(td, osp.basename(input_values)) else: temp_f = "" @@ -108,7 +110,12 @@ def run( self.update() return target_las_path - def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las_path: str, save_result: bool = False) -> None: + def prepare( + self, + input_values: Union[str, pdal.pipeline.Pipeline], + prepared_las_path: str, + save_result: bool = False, + ) -> None: f""" Prepare las for later decision process. . 1. Cluster candidates points, in a new `{self.data_format.las_dimensions.ClusterID_candidate_building}` @@ -130,7 +137,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las """ - dim_candidate_flag = self.data_format.las_dimensions.candidate_buildings_flag + dim_candidate_flag = ( + self.data_format.las_dimensions.candidate_buildings_flag + ) dim_cluster_id_pdal = self.data_format.las_dimensions.cluster_id dim_cluster_id_candidates = ( self.data_format.las_dimensions.ClusterID_candidate_building @@ -139,7 +148,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las self.pipeline = get_pipeline(input_values) # Identify candidates buildings points with a boolean flag - self.pipeline |= pdal.Filter.ferry(dimensions=f"=>{dim_candidate_flag}") + self.pipeline |= pdal.Filter.ferry( + dimensions=f"=>{dim_candidate_flag}" + ) _is_candidate_building = ( "(" + " || ".join( @@ -165,15 +176,19 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las ) self.pipeline |= pdal.Filter.assign(value=f"{dim_cluster_id_pdal} = 0") self.pipeline.execute() - bbox = get_integer_bbox(self.pipeline, buffer=self.bd_uni_request.buffer) + bbox = get_integer_bbox( + self.pipeline, buffer=self.bd_uni_request.buffer + ) self.pipeline |= pdal.Filter.ferry(dimensions=f"=>{dim_overlay}") if self.shp_path: - temp_dirpath = None # no need for a temporay directory to add the shapefile in it, we already have the shapefile + temp_dirpath = None # no need for a temporay directory to add the shapefile in it, we already have the shapefile _shp_p = self.shp_path gdf = geopandas.read_file(_shp_p) - buildings_in_bd_topo = not len(gdf) == 0 # check if there are buildings in the shp + buildings_in_bd_topo = ( + not len(gdf) == 0 + ) # check if there are buildings in the shp else: temp_dirpath = mkdtemp() @@ -201,7 +216,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las if temp_dirpath: shutil.rmtree(temp_dirpath) - def update(self, src_las_path: str = None, target_las_path: str = None) -> None: + def update( + self, src_las_path: str = None, target_las_path: str = None + ) -> None: """Updates point cloud classification channel.""" if src_las_path: self.pipeline = pdal.Pipeline() @@ -316,7 +333,8 @@ def _make_detailed_group_decision( p_heq_relaxed_threshold = infos.probabilities >= relaxed_threshold ia_confirmed_flag = np.logical_or( - p_heq_threshold, np.logical_and(infos.overlays, p_heq_relaxed_threshold) + p_heq_threshold, + np.logical_and(infos.overlays, p_heq_relaxed_threshold), ) ia_confirmed = ( @@ -326,26 +344,32 @@ def _make_detailed_group_decision( # REFUTATION ia_refuted = ( np.mean( - (1 - infos.probabilities) >= self.thresholds.min_confidence_refutation + (1 - infos.probabilities) + >= self.thresholds.min_confidence_refutation ) >= self.thresholds.min_frac_refutation ) uni_overlayed = ( np.mean(infos.overlays) >= self.thresholds.min_uni_db_overlay_frac ) + # If low entropy, we may trust AI to confirm/refute + if not high_entropy: + if ia_refuted: + if uni_overlayed: + return self.codes.detailed.ia_refuted_but_under_db_uni + return self.codes.detailed.ia_refuted + if ia_confirmed: + if uni_overlayed: + return self.codes.detailed.both_confirmed + return self.codes.detailed.ia_confirmed_only + # Else, we may still use BDUni information + if uni_overlayed: + return self.codes.detailed.db_overlayed_only + # Else: we are uncertain, and we specify why we can specify if entropy was + # involved to conclude to uncertainty. if high_entropy: return self.codes.detailed.unsure_by_entropy - if ia_refuted: - if uni_overlayed: - return self.codes.detailed.ia_refuted_but_under_db_uni - return self.codes.detailed.ia_refuted - if ia_confirmed: - if uni_overlayed: - return self.codes.detailed.both_confirmed - return self.codes.detailed.ia_confirmed_only - if uni_overlayed: - return self.codes.detailed.db_overlayed_only return self.codes.detailed.both_unsure diff --git a/lidar_prod/tasks/building_validation_optimization.py b/lidar_prod/tasks/building_validation_optimization.py index 8d4637ab..593b5aa4 100644 --- a/lidar_prod/tasks/building_validation_optimization.py +++ b/lidar_prod/tasks/building_validation_optimization.py @@ -11,6 +11,7 @@ from tqdm import tqdm import os.path as osp import pdal +import math from lidar_prod.tasks.building_validation import ( BuildingValidator, @@ -94,26 +95,27 @@ def setup(self): codes to adapt to those of the optimization dataset. """ - las_paths = glob(osp.join(self.paths.input_las_dir, "*.las")) - laz_paths = glob(osp.join(self.paths.input_las_dir, "*.laz")) - self.las_filepaths = sorted(las_paths + laz_paths) - if not self.las_filepaths: - raise ValueError( - "No LAS/LAZ found in {self.paths.input_las_dir} (i.e. input_las_dir) while" - "globbing *las and *laz extensions (lowercase)." - ) - if self.debug: - self.las_filepaths = self.las_filepaths[:1] - os.makedirs(self.paths.prepared_las_dir, exist_ok=True) - self.prepared_las_filepaths = [ - osp.join(self.paths.prepared_las_dir, osp.basename(f)) - for f in self.las_filepaths - ] - os.makedirs(self.paths.updated_las_dir, exist_ok=True) - self.out_las_filepaths = [ - osp.join(self.paths.updated_las_dir, osp.basename(f)) - for f in self.las_filepaths - ] + if "prepare" in self.todo or "update" in self.todo: + las_paths = glob(osp.join(self.paths.input_las_dir, "*.las")) + laz_paths = glob(osp.join(self.paths.input_las_dir, "*.laz")) + self.las_filepaths = sorted(las_paths + laz_paths) + if not self.las_filepaths: + raise ValueError( + "No LAS/LAZ found in {self.paths.input_las_dir} (i.e. input_las_dir) while" + "globbing *las and *laz extensions (lowercase)." + ) + if self.debug: + self.las_filepaths = self.las_filepaths[:1] + os.makedirs(self.paths.prepared_las_dir, exist_ok=True) + self.prepared_las_filepaths = [ + osp.join(self.paths.prepared_las_dir, osp.basename(f)) + for f in self.las_filepaths + ] + os.makedirs(self.paths.updated_las_dir, exist_ok=True) + self.out_las_filepaths = [ + osp.join(self.paths.updated_las_dir, osp.basename(f)) + for f in self.las_filepaths + ] # We must adapt BuildingValidator to corrected data by specifying the codes to use as candidates self.bv.candidate_buildings_codes = ( @@ -332,8 +334,11 @@ def _objective( "min_frac_confirmation_factor_if_bd_uni_overlay": trial.suggest_float( "min_frac_confirmation_factor_if_bd_uni_overlay", 0.5, 1.0 ), + # Max entropy for 7 classes. When looking at prediction's entropy, + # the observed maximal value is aqual to the Shannon entropy divided by two, + # so this is what we consider as the max for the min entropy for uncertainty. "min_entropy_uncertainty": trial.suggest_float( - "min_entropy_uncertainty", 0.5, 1.0 + "min_entropy_uncertainty", 0.0, -math.log2(1 / 7) / 2.0 ), "min_frac_entropy_uncertain": trial.suggest_float( "min_frac_entropy_uncertain", 0.33, 1.0 diff --git a/package_metadata.yaml b/package_metadata.yaml index 204d4fb9..c8bd6804 100644 --- a/package_metadata.yaml +++ b/package_metadata.yaml @@ -1,4 +1,4 @@ -__version__: "1.7.4" +__version__: "1.8.0" __name__: "lidar_prod" __url__: "https://github.com/IGNF/lidar-prod-quality-control" __description__: "A 3D semantic segmentation production tool to augment rules-based Lidar classification with AI and databases."