Skip to content

Commit

Permalink
Merge M8.3
Browse files Browse the repository at this point in the history
M8.3 - Optimization of relaxation factor for decision threshold under BDUni
  • Loading branch information
CharlesGaydon authored Mar 24, 2022
2 parents 8a52aaa + 0110cda commit 27d048d
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 45 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
steps:

- name: Checkout branch
uses: actions/checkout@v1
uses: actions/checkout@v2

- name: build docker image
run: docker build -t lidar_prod_im .
Expand All @@ -35,7 +35,7 @@ jobs:
building_validation.optimization.todo='prepare+evaluate+update'
building_validation.optimization.paths.input_las_dir=/CICD_github_assets/M8.0/20220204_building_val_V0.0_model/20211001_buiding_val_val/
building_validation.optimization.paths.results_output_dir=/CICD_github_assets/opti/
building_validation.optimization.paths.building_validation_thresholds_pickle=/CICD_github_assets/M8.0/20220204_building_val_V0.0_model/M8.0B2V0.0_buildingvalidation_thresholds.pickle
building_validation.optimization.paths.building_validation_thresholds_pickle=/CICD_github_assets/M8.3B2V0.0/optimized_thresholds.pickle
- name: clean the server for further uses
if: always() # always do it, even if something failed
Expand Down
4 changes: 2 additions & 2 deletions configs/building_identification/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ _target_: lidar_prod.tasks.building_identification.BuildingIdentifier

data_format: ${data_format}

min_building_proba: ${building_validation.application.rules.min_confidence_confirmation}
min_building_proba_relaxation_if_bd_uni_overlay: 1.0
min_building_proba: ${building_validation.application.thresholds.min_confidence_confirmation}
min_frac_confirmation_factor_if_bd_uni_overlay: ${building_validation.application.thresholds.min_frac_confirmation_factor_if_bd_uni_overlay}

cluster:
min_points: 200 # Large so that small artefact are ignored
Expand Down
12 changes: 7 additions & 5 deletions configs/building_validation/application/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ cluster:
bd_uni_request:
buffer: 50

rules:
min_confidence_confirmation: 0.768
min_frac_confirmation: 0.732
# TODO: update min_frac_confirmation_factor_if_bd_uni_overlay and others after optimization...
thresholds:
min_confidence_confirmation: 0.697
min_frac_confirmation: 0.384
min_frac_confirmation_factor_if_bd_uni_overlay: 0.808
min_uni_db_overlay_frac: 0.508
min_confidence_refutation: 0.872
min_frac_refutation: 0.964
min_confidence_refutation: 0.973
min_frac_refutation: 0.285
2 changes: 1 addition & 1 deletion dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ CMD ["python", \
"paths.src_las=/CICD_github_assets/M8.0/20220204_building_val_V0.0_model/subsets/871000_6617000_subset_with_probas.las", \
"paths.output_dir=/CICD_github_assets/app/", \
"data_format.codes.building.candidates=[202]", \
"building_validation.application.building_validation_thresholds_pickle=/CICD_github_assets/M8.0/20220204_building_val_V0.0_model/M8.0B2V0.0_buildingvalidation_thresholds.pickle"]
"building_validation.application.building_validation_thresholds_pickle=/CICD_github_assets/M8.3B2V0.0/optimized_thresholds.pickle"]
16 changes: 7 additions & 9 deletions lidar_prod/tasks/building_identification.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@ class BuildingIdentifier:
High enough probability means :
- p>=min_building_proba
OR, IF point fall in a building vector from the BDUni:
- p>=(min_building_proba*min_building_proba_relaxation_if_bd_uni_overlay).
- p>=(min_building_proba*min_frac_confirmation_factor_if_bd_uni_overlay).
"""

def __init__(
self,
min_building_proba: float = 0.75,
min_building_proba_relaxation_if_bd_uni_overlay: float = 1.0,
min_frac_confirmation_factor_if_bd_uni_overlay: float = 1.0,
cluster=None,
data_format=None,
):
self.cluster = cluster
self.data_format = data_format
self.min_building_proba = min_building_proba
self.min_building_proba_relaxation_if_bd_uni_overlay = (
min_building_proba_relaxation_if_bd_uni_overlay
self.min_frac_confirmation_factor_if_bd_uni_overlay = (
min_frac_confirmation_factor_if_bd_uni_overlay
)

def run(self, in_f: str, out_f: str) -> str:
Expand Down Expand Up @@ -67,12 +67,10 @@ def prepare(self, in_f: str, out_f: str) -> None:
f"({self.data_format.las_dimensions.candidate_buildings_flag} == 0)"
)
p_heq_threshold = f"(building>={self.min_building_proba})"
A = f"(building>={self.min_building_proba * self.min_building_proba_relaxation_if_bd_uni_overlay})"
A = f"(building>={self.min_building_proba * self.min_frac_confirmation_factor_if_bd_uni_overlay})"
B = f"({self.data_format.las_dimensions.uni_db_overlay} > 0)"
p_heq_threshold_under_bd_uni = f"({A} && {B})"
where = (
f"{non_candidates} && ({p_heq_threshold} || {p_heq_threshold_under_bd_uni})"
)
p_heq_modified_threshold_under_bd_uni = f"({A} && {B})"
where = f"{non_candidates} && ({p_heq_threshold} || {p_heq_modified_threshold_under_bd_uni})"
pipeline |= pdal.Filter.cluster(
min_points=self.cluster.min_points,
tolerance=self.cluster.tolerance,
Expand Down
43 changes: 29 additions & 14 deletions lidar_prod/tasks/building_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,16 @@ def __init__(
bd_uni_connection_params=None,
cluster=None,
bd_uni_request=None,
rules=None,
data_format=None,
thresholds=None,
building_validation_thresholds_pickle: str = None,
use_final_classification_codes: bool = True,
):
self.bd_uni_connection_params = bd_uni_connection_params
self.cluster = cluster
self.bd_uni_request = bd_uni_request
self.use_final_classification_codes = use_final_classification_codes
self.rules = rules

self.thresholds = thresholds # default values
self.data_format = data_format
# For easier access
self.codes = data_format.codes.building
Expand All @@ -52,8 +51,10 @@ def __init__(
def setup(self, building_validation_thresholds_pickle):
"""Setup, loading optimized thresholds if available."""
if osp.exists(building_validation_thresholds_pickle):
self._set_rules_from_pickle(building_validation_thresholds_pickle)
log.info(f"Using best trial from: {building_validation_thresholds_pickle}")
self._set_thresholds_from_pickle(building_validation_thresholds_pickle)
log.info(
f"Using optimized thresholds from: {building_validation_thresholds_pickle}"
)
else:
log.warning(
"Using default decision thresholds - specify "
Expand Down Expand Up @@ -220,22 +221,35 @@ def _make_group_decision(self, *args, **kwargs) -> int:
detailed_code = self._make_detailed_group_decision(*args, **kwargs)
return self.detailed_to_final[detailed_code]

def _make_detailed_group_decision(self, probas_arr, overlay_bools_arr):
def _make_detailed_group_decision(self, probas, bduni_flag):
"""Decision process at the cluster level.
Confirm or refute candidate building groups based on fraction of confirmed/refuted points and
on fraction of points overlayed by a building vector in BDUni.
See Readme for details of this group-level decision process.
"""
p_heq_threshold = probas >= self.thresholds.min_confidence_confirmation

relaxed_threshold = (
self.thresholds.min_confidence_confirmation
* self.thresholds.min_frac_confirmation_factor_if_bd_uni_overlay
)
p_heq_relaxed_threshold = probas >= relaxed_threshold

ia_confirmed_flag = np.logical_or(
p_heq_threshold, np.logical_and(bduni_flag, p_heq_relaxed_threshold)
)

ia_confirmed = (
np.mean(probas_arr >= self.rules.min_confidence_confirmation)
>= self.rules.min_frac_confirmation
np.mean(ia_confirmed_flag) >= self.thresholds.min_frac_confirmation
)
ia_refuted = (
np.mean((1 - probas_arr) >= self.rules.min_confidence_refutation)
>= self.rules.min_frac_refutation
np.mean((1 - probas) >= self.thresholds.min_confidence_refutation)
>= self.thresholds.min_frac_refutation
)
uni_overlayed = np.mean(overlay_bools_arr) >= self.rules.min_uni_db_overlay_frac
uni_overlayed = np.mean(bduni_flag) >= self.thresholds.min_uni_db_overlay_frac

if ia_refuted:
if uni_overlayed:
Expand All @@ -249,15 +263,15 @@ def _make_detailed_group_decision(self, probas_arr, overlay_bools_arr):
return self.codes.detailed.db_overlayed_only
return self.codes.detailed.both_unsure

def _set_rules_from_pickle(self, building_validation_thresholds_pickle: str):
def _set_thresholds_from_pickle(self, building_validation_thresholds_pickle: str):
"""Specifiy all thresholds from serialized rules.
This is used in thresholds optimization.
Args:
building_validation_thresholds_pickle (str): _description_
"""
with open(building_validation_thresholds_pickle, "rb") as f:
self.rules: rules = pickle.load(f)
self.thresholds: thresholds = pickle.load(f)


def request_bd_uni_for_building_shapefile(
Expand Down Expand Up @@ -309,11 +323,12 @@ def request_bd_uni_for_building_shapefile(


@dataclass
class rules:
class thresholds:
"""The deciison threshold for cluser-level decisions."""

min_confidence_confirmation: float
min_frac_confirmation: float
min_frac_confirmation_factor_if_bd_uni_overlay: float
min_uni_db_overlay_frac: float
min_confidence_refutation: float
min_frac_refutation: float
37 changes: 26 additions & 11 deletions lidar_prod/tasks/building_validation_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import pickle
import numpy as np
from sklearn.metrics import confusion_matrix
from typing import Any, Dict
from typing import Any, Dict, List
import optuna
from tqdm import tqdm
import os.path as osp
import laspy

from lidar_prod.tasks.building_validation import BuildingValidator, rules
from lidar_prod.tasks.building_validation import BuildingValidator, thresholds
from lidar_prod.tasks.utils import split_idx_by_dim

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -123,7 +123,7 @@ def optimize(self):
objective = functools.partial(self._objective, clusters=clusters)
self.study.optimize(objective, n_trials=self.design.n_trials)
best_rules = self._select_best_rules(self.study)
log.info(f"Best_trial rules: \n{best_rules}")
log.info(f"Best_trial thresholds: \n{best_rules}")
self.__dump_best_rules(best_rules)

def evaluate(self):
Expand All @@ -134,7 +134,9 @@ def evaluate(self):
"""
clusters = self.__load_clusters()
self.bv._set_rules_from_pickle(self.paths.building_validation_thresholds_pickle)
self.bv._set_thresholds_from_pickle(
self.paths.building_validation_thresholds_pickle
)
decisions = np.array(
[
self.bv._make_group_decision(c.probabilities, c.overlays)
Expand All @@ -153,7 +155,9 @@ def update(self):
"""
log.info(f"Updated las will be saved in {self.paths.results_output_dir}")
self.bv._set_rules_from_pickle(self.paths.building_validation_thresholds_pickle)
self.bv._set_thresholds_from_pickle(
self.paths.building_validation_thresholds_pickle
)
for prep_f, out_f in tqdm(
zip(self.prepared_las_filepaths, self.out_las_filepaths),
total=len(self.prepared_las_filepaths),
Expand Down Expand Up @@ -214,9 +218,17 @@ def __compute_penalty(self, auto, precision, recall):
penalty += self.design.constraints.min_automation_constraint - auto
return [penalty]

def _objective(self, trial, clusters=None):
"""Objective function for optuna optimization. Inner definition to access list of array of probas and list of targets."""
# TODO: incude as configurable parameters
def _objective(self, trial, clusters: List[BuildngValidationClusterInfo] = None):
"""Objective function for optuna optimization.
Use prepared list to access group-level probas and targets.
Args:
trial: optuna trial
clusters (List[BuildngValidationClusterInfo], optional): _description_. Defaults to None.
Returns:
float, float, float: automatisation, precision, recall
"""
params = {
"min_confidence_confirmation": trial.suggest_float(
"min_confidence_confirmation", 0.0, 1.0
Expand All @@ -229,10 +241,13 @@ def _objective(self, trial, clusters=None):
),
"min_frac_refutation": trial.suggest_float("min_frac_refutation", 0.0, 1.0),
"min_uni_db_overlay_frac": trial.suggest_float(
"min_uni_db_overlay_frac", 0.50, 1.0
"min_uni_db_overlay_frac", 0.5, 1.0
),
"min_frac_confirmation_factor_if_bd_uni_overlay": trial.suggest_float(
"min_frac_confirmation_factor_if_bd_uni_overlay", 0.5, 1.0
),
}
self.bv.rules = rules(**params)
self.bv.thresholds = thresholds(**params)
decisions = np.array(
[
self.bv._make_group_decision(c.probabilities, c.overlays)
Expand Down Expand Up @@ -276,7 +291,7 @@ def _select_best_rules(self, study):
study.best_trials, key=lambda x: np.product(x.values), reverse=True
)
best = trials[0]
best_rules = rules(**best.params)
best_rules = thresholds(**best.params)
return best_rules

def __dump_best_rules(self, best_trial_params):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="lidar_prod",
version="1.2.5",
version="1.3.0",
description="A 3D semantic segmentation production tool to augment rules-based Lidar classification with AI and databases.",
author="Charles GAYDON",
author_email="[email protected]",
Expand Down

0 comments on commit 27d048d

Please sign in to comment.