diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index b6ba5d44..9337bec8 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -13,7 +13,7 @@ jobs: steps: - name: Checkout branch - uses: actions/checkout@v1 + uses: actions/checkout@v2 - name: build docker image run: docker build -t lidar_prod_im . @@ -35,7 +35,7 @@ jobs: building_validation.optimization.todo='prepare+evaluate+update' building_validation.optimization.paths.input_las_dir=/CICD_github_assets/M8.0/20220204_building_val_V0.0_model/20211001_buiding_val_val/ building_validation.optimization.paths.results_output_dir=/CICD_github_assets/opti/ - building_validation.optimization.paths.building_validation_thresholds_pickle=/CICD_github_assets/M8.0/20220204_building_val_V0.0_model/M8.0B2V0.0_buildingvalidation_thresholds.pickle + building_validation.optimization.paths.building_validation_thresholds_pickle=/CICD_github_assets/M8.3B2V0.0/optimized_thresholds.pickle - name: clean the server for further uses if: always() # always do it, even if something failed diff --git a/configs/building_identification/default.yaml b/configs/building_identification/default.yaml index 72723cdb..f09041aa 100644 --- a/configs/building_identification/default.yaml +++ b/configs/building_identification/default.yaml @@ -2,8 +2,8 @@ _target_: lidar_prod.tasks.building_identification.BuildingIdentifier data_format: ${data_format} -min_building_proba: ${building_validation.application.rules.min_confidence_confirmation} -min_building_proba_relaxation_if_bd_uni_overlay: 1.0 +min_building_proba: ${building_validation.application.thresholds.min_confidence_confirmation} +min_frac_confirmation_factor_if_bd_uni_overlay: ${building_validation.application.thresholds.min_frac_confirmation_factor_if_bd_uni_overlay} cluster: min_points: 200 # Large so that small artefact are ignored diff --git a/configs/building_validation/application/default.yaml b/configs/building_validation/application/default.yaml index e5d27d01..7cf50e8b 100644 --- a/configs/building_validation/application/default.yaml +++ b/configs/building_validation/application/default.yaml @@ -20,9 +20,11 @@ cluster: bd_uni_request: buffer: 50 -rules: - min_confidence_confirmation: 0.768 - min_frac_confirmation: 0.732 +# TODO: update min_frac_confirmation_factor_if_bd_uni_overlay and others after optimization... +thresholds: + min_confidence_confirmation: 0.697 + min_frac_confirmation: 0.384 + min_frac_confirmation_factor_if_bd_uni_overlay: 0.808 min_uni_db_overlay_frac: 0.508 - min_confidence_refutation: 0.872 - min_frac_refutation: 0.964 + min_confidence_refutation: 0.973 + min_frac_refutation: 0.285 diff --git a/dockerfile b/dockerfile index 73587958..ae4591dc 100644 --- a/dockerfile +++ b/dockerfile @@ -51,4 +51,4 @@ CMD ["python", \ "paths.src_las=/CICD_github_assets/M8.0/20220204_building_val_V0.0_model/subsets/871000_6617000_subset_with_probas.las", \ "paths.output_dir=/CICD_github_assets/app/", \ "data_format.codes.building.candidates=[202]", \ - "building_validation.application.building_validation_thresholds_pickle=/CICD_github_assets/M8.0/20220204_building_val_V0.0_model/M8.0B2V0.0_buildingvalidation_thresholds.pickle"] + "building_validation.application.building_validation_thresholds_pickle=/CICD_github_assets/M8.3B2V0.0/optimized_thresholds.pickle"] diff --git a/lidar_prod/tasks/building_identification.py b/lidar_prod/tasks/building_identification.py index 1d13e4ce..ec3167f9 100644 --- a/lidar_prod/tasks/building_identification.py +++ b/lidar_prod/tasks/building_identification.py @@ -16,21 +16,21 @@ class BuildingIdentifier: High enough probability means : - p>=min_building_proba OR, IF point fall in a building vector from the BDUni: - - p>=(min_building_proba*min_building_proba_relaxation_if_bd_uni_overlay). + - p>=(min_building_proba*min_frac_confirmation_factor_if_bd_uni_overlay). """ def __init__( self, min_building_proba: float = 0.75, - min_building_proba_relaxation_if_bd_uni_overlay: float = 1.0, + min_frac_confirmation_factor_if_bd_uni_overlay: float = 1.0, cluster=None, data_format=None, ): self.cluster = cluster self.data_format = data_format self.min_building_proba = min_building_proba - self.min_building_proba_relaxation_if_bd_uni_overlay = ( - min_building_proba_relaxation_if_bd_uni_overlay + self.min_frac_confirmation_factor_if_bd_uni_overlay = ( + min_frac_confirmation_factor_if_bd_uni_overlay ) def run(self, in_f: str, out_f: str) -> str: @@ -67,12 +67,10 @@ def prepare(self, in_f: str, out_f: str) -> None: f"({self.data_format.las_dimensions.candidate_buildings_flag} == 0)" ) p_heq_threshold = f"(building>={self.min_building_proba})" - A = f"(building>={self.min_building_proba * self.min_building_proba_relaxation_if_bd_uni_overlay})" + A = f"(building>={self.min_building_proba * self.min_frac_confirmation_factor_if_bd_uni_overlay})" B = f"({self.data_format.las_dimensions.uni_db_overlay} > 0)" - p_heq_threshold_under_bd_uni = f"({A} && {B})" - where = ( - f"{non_candidates} && ({p_heq_threshold} || {p_heq_threshold_under_bd_uni})" - ) + p_heq_modified_threshold_under_bd_uni = f"({A} && {B})" + where = f"{non_candidates} && ({p_heq_threshold} || {p_heq_modified_threshold_under_bd_uni})" pipeline |= pdal.Filter.cluster( min_points=self.cluster.min_points, tolerance=self.cluster.tolerance, diff --git a/lidar_prod/tasks/building_validation.py b/lidar_prod/tasks/building_validation.py index a809eb1f..670381a8 100644 --- a/lidar_prod/tasks/building_validation.py +++ b/lidar_prod/tasks/building_validation.py @@ -31,8 +31,8 @@ def __init__( bd_uni_connection_params=None, cluster=None, bd_uni_request=None, - rules=None, data_format=None, + thresholds=None, building_validation_thresholds_pickle: str = None, use_final_classification_codes: bool = True, ): @@ -40,8 +40,7 @@ def __init__( self.cluster = cluster self.bd_uni_request = bd_uni_request self.use_final_classification_codes = use_final_classification_codes - self.rules = rules - + self.thresholds = thresholds # default values self.data_format = data_format # For easier access self.codes = data_format.codes.building @@ -52,8 +51,10 @@ def __init__( def setup(self, building_validation_thresholds_pickle): """Setup, loading optimized thresholds if available.""" if osp.exists(building_validation_thresholds_pickle): - self._set_rules_from_pickle(building_validation_thresholds_pickle) - log.info(f"Using best trial from: {building_validation_thresholds_pickle}") + self._set_thresholds_from_pickle(building_validation_thresholds_pickle) + log.info( + f"Using optimized thresholds from: {building_validation_thresholds_pickle}" + ) else: log.warning( "Using default decision thresholds - specify " @@ -220,22 +221,35 @@ def _make_group_decision(self, *args, **kwargs) -> int: detailed_code = self._make_detailed_group_decision(*args, **kwargs) return self.detailed_to_final[detailed_code] - def _make_detailed_group_decision(self, probas_arr, overlay_bools_arr): + def _make_detailed_group_decision(self, probas, bduni_flag): """Decision process at the cluster level. Confirm or refute candidate building groups based on fraction of confirmed/refuted points and on fraction of points overlayed by a building vector in BDUni. + See Readme for details of this group-level decision process. + """ + p_heq_threshold = probas >= self.thresholds.min_confidence_confirmation + + relaxed_threshold = ( + self.thresholds.min_confidence_confirmation + * self.thresholds.min_frac_confirmation_factor_if_bd_uni_overlay + ) + p_heq_relaxed_threshold = probas >= relaxed_threshold + + ia_confirmed_flag = np.logical_or( + p_heq_threshold, np.logical_and(bduni_flag, p_heq_relaxed_threshold) + ) + ia_confirmed = ( - np.mean(probas_arr >= self.rules.min_confidence_confirmation) - >= self.rules.min_frac_confirmation + np.mean(ia_confirmed_flag) >= self.thresholds.min_frac_confirmation ) ia_refuted = ( - np.mean((1 - probas_arr) >= self.rules.min_confidence_refutation) - >= self.rules.min_frac_refutation + np.mean((1 - probas) >= self.thresholds.min_confidence_refutation) + >= self.thresholds.min_frac_refutation ) - uni_overlayed = np.mean(overlay_bools_arr) >= self.rules.min_uni_db_overlay_frac + uni_overlayed = np.mean(bduni_flag) >= self.thresholds.min_uni_db_overlay_frac if ia_refuted: if uni_overlayed: @@ -249,7 +263,7 @@ def _make_detailed_group_decision(self, probas_arr, overlay_bools_arr): return self.codes.detailed.db_overlayed_only return self.codes.detailed.both_unsure - def _set_rules_from_pickle(self, building_validation_thresholds_pickle: str): + def _set_thresholds_from_pickle(self, building_validation_thresholds_pickle: str): """Specifiy all thresholds from serialized rules. This is used in thresholds optimization. @@ -257,7 +271,7 @@ def _set_rules_from_pickle(self, building_validation_thresholds_pickle: str): building_validation_thresholds_pickle (str): _description_ """ with open(building_validation_thresholds_pickle, "rb") as f: - self.rules: rules = pickle.load(f) + self.thresholds: thresholds = pickle.load(f) def request_bd_uni_for_building_shapefile( @@ -309,11 +323,12 @@ def request_bd_uni_for_building_shapefile( @dataclass -class rules: +class thresholds: """The deciison threshold for cluser-level decisions.""" min_confidence_confirmation: float min_frac_confirmation: float + min_frac_confirmation_factor_if_bd_uni_overlay: float min_uni_db_overlay_frac: float min_confidence_refutation: float min_frac_refutation: float diff --git a/lidar_prod/tasks/building_validation_optimization.py b/lidar_prod/tasks/building_validation_optimization.py index 2576ebfc..1b721335 100644 --- a/lidar_prod/tasks/building_validation_optimization.py +++ b/lidar_prod/tasks/building_validation_optimization.py @@ -6,13 +6,13 @@ import pickle import numpy as np from sklearn.metrics import confusion_matrix -from typing import Any, Dict +from typing import Any, Dict, List import optuna from tqdm import tqdm import os.path as osp import laspy -from lidar_prod.tasks.building_validation import BuildingValidator, rules +from lidar_prod.tasks.building_validation import BuildingValidator, thresholds from lidar_prod.tasks.utils import split_idx_by_dim log = logging.getLogger(__name__) @@ -123,7 +123,7 @@ def optimize(self): objective = functools.partial(self._objective, clusters=clusters) self.study.optimize(objective, n_trials=self.design.n_trials) best_rules = self._select_best_rules(self.study) - log.info(f"Best_trial rules: \n{best_rules}") + log.info(f"Best_trial thresholds: \n{best_rules}") self.__dump_best_rules(best_rules) def evaluate(self): @@ -134,7 +134,9 @@ def evaluate(self): """ clusters = self.__load_clusters() - self.bv._set_rules_from_pickle(self.paths.building_validation_thresholds_pickle) + self.bv._set_thresholds_from_pickle( + self.paths.building_validation_thresholds_pickle + ) decisions = np.array( [ self.bv._make_group_decision(c.probabilities, c.overlays) @@ -153,7 +155,9 @@ def update(self): """ log.info(f"Updated las will be saved in {self.paths.results_output_dir}") - self.bv._set_rules_from_pickle(self.paths.building_validation_thresholds_pickle) + self.bv._set_thresholds_from_pickle( + self.paths.building_validation_thresholds_pickle + ) for prep_f, out_f in tqdm( zip(self.prepared_las_filepaths, self.out_las_filepaths), total=len(self.prepared_las_filepaths), @@ -214,9 +218,17 @@ def __compute_penalty(self, auto, precision, recall): penalty += self.design.constraints.min_automation_constraint - auto return [penalty] - def _objective(self, trial, clusters=None): - """Objective function for optuna optimization. Inner definition to access list of array of probas and list of targets.""" - # TODO: incude as configurable parameters + def _objective(self, trial, clusters: List[BuildngValidationClusterInfo] = None): + """Objective function for optuna optimization. + Use prepared list to access group-level probas and targets. + + Args: + trial: optuna trial + clusters (List[BuildngValidationClusterInfo], optional): _description_. Defaults to None. + + Returns: + float, float, float: automatisation, precision, recall + """ params = { "min_confidence_confirmation": trial.suggest_float( "min_confidence_confirmation", 0.0, 1.0 @@ -229,10 +241,13 @@ def _objective(self, trial, clusters=None): ), "min_frac_refutation": trial.suggest_float("min_frac_refutation", 0.0, 1.0), "min_uni_db_overlay_frac": trial.suggest_float( - "min_uni_db_overlay_frac", 0.50, 1.0 + "min_uni_db_overlay_frac", 0.5, 1.0 + ), + "min_frac_confirmation_factor_if_bd_uni_overlay": trial.suggest_float( + "min_frac_confirmation_factor_if_bd_uni_overlay", 0.5, 1.0 ), } - self.bv.rules = rules(**params) + self.bv.thresholds = thresholds(**params) decisions = np.array( [ self.bv._make_group_decision(c.probabilities, c.overlays) @@ -276,7 +291,7 @@ def _select_best_rules(self, study): study.best_trials, key=lambda x: np.product(x.values), reverse=True ) best = trials[0] - best_rules = rules(**best.params) + best_rules = thresholds(**best.params) return best_rules def __dump_best_rules(self, best_trial_params): diff --git a/setup.py b/setup.py index 2decb6c5..66960921 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="lidar_prod", - version="1.2.5", + version="1.3.0", description="A 3D semantic segmentation production tool to augment rules-based Lidar classification with AI and databases.", author="Charles GAYDON", author_email="charles.gaydon@gmail.com",