Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

M10.0 v1.7.5 reorder entropy for uncertainty #75

Merged
merged 8 commits into from
Nov 3, 2022
2 changes: 1 addition & 1 deletion .github/workflows/cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
python -m
pytest -rA -v -m "slow" --ignore=actions-runner --no-cov

- name: Test detection of building.
- name: Test building module from CLI on a LAS subset.
run: >
docker run --network host
-v /var/data/cicd/CICD_github_assets/M8.4/inputs/:/inputs/
Expand Down
2 changes: 1 addition & 1 deletion configs/building_validation/optimization/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ study:
- "${get_method:lidar_prod.tasks.building_validation_optimization.constraints_func}"

design:
n_trials: 300
n_trials: 400
constraints:
min_precision_constraint: 0.98
min_recall_constraint: 0.98
Expand Down
66 changes: 45 additions & 21 deletions lidar_prod/tasks/building_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
get_pdal_writer,
split_idx_by_dim,
get_pipeline,
request_bd_uni_for_building_shapefile
request_bd_uni_for_building_shapefile,
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -99,7 +99,9 @@ def run(
"Preparation : Clustering of candidates buildings & Requesting BDUni"
)
if type(input_values) == str:
log.info(f"Applying Building Validation to file \n{input_values}")
CharlesGaydon marked this conversation as resolved.
Show resolved Hide resolved
log.info(
f"Applying Building Validation to file \n{input_values}"
)
temp_f = osp.join(td, osp.basename(input_values))
else:
temp_f = ""
Expand All @@ -108,7 +110,12 @@ def run(
self.update()
return target_las_path

def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las_path: str, save_result: bool = False) -> None:
def prepare(
self,
input_values: Union[str, pdal.pipeline.Pipeline],
prepared_las_path: str,
save_result: bool = False,
) -> None:
f"""
Prepare las for later decision process. .
1. Cluster candidates points, in a new `{self.data_format.las_dimensions.ClusterID_candidate_building}`
Expand All @@ -130,7 +137,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las

"""

dim_candidate_flag = self.data_format.las_dimensions.candidate_buildings_flag
dim_candidate_flag = (
CharlesGaydon marked this conversation as resolved.
Show resolved Hide resolved
self.data_format.las_dimensions.candidate_buildings_flag
)
dim_cluster_id_pdal = self.data_format.las_dimensions.cluster_id
dim_cluster_id_candidates = (
self.data_format.las_dimensions.ClusterID_candidate_building
Expand All @@ -139,7 +148,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las

self.pipeline = get_pipeline(input_values)
# Identify candidates buildings points with a boolean flag
self.pipeline |= pdal.Filter.ferry(dimensions=f"=>{dim_candidate_flag}")
self.pipeline |= pdal.Filter.ferry(
dimensions=f"=>{dim_candidate_flag}"
)
_is_candidate_building = (
"("
+ " || ".join(
Expand All @@ -165,15 +176,19 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las
)
self.pipeline |= pdal.Filter.assign(value=f"{dim_cluster_id_pdal} = 0")
self.pipeline.execute()
bbox = get_integer_bbox(self.pipeline, buffer=self.bd_uni_request.buffer)
bbox = get_integer_bbox(
self.pipeline, buffer=self.bd_uni_request.buffer
)

self.pipeline |= pdal.Filter.ferry(dimensions=f"=>{dim_overlay}")

if self.shp_path:
temp_dirpath = None # no need for a temporay directory to add the shapefile in it, we already have the shapefile
temp_dirpath = None # no need for a temporay directory to add the shapefile in it, we already have the shapefile
_shp_p = self.shp_path
gdf = geopandas.read_file(_shp_p)
buildings_in_bd_topo = not len(gdf) == 0 # check if there are buildings in the shp
buildings_in_bd_topo = (
not len(gdf) == 0
) # check if there are buildings in the shp

else:
temp_dirpath = mkdtemp()
Expand Down Expand Up @@ -201,7 +216,9 @@ def prepare(self, input_values: Union[str, pdal.pipeline.Pipeline], prepared_las
if temp_dirpath:
shutil.rmtree(temp_dirpath)

def update(self, src_las_path: str = None, target_las_path: str = None) -> None:
def update(
self, src_las_path: str = None, target_las_path: str = None
) -> None:
"""Updates point cloud classification channel."""
if src_las_path:
self.pipeline = pdal.Pipeline()
Expand Down Expand Up @@ -316,7 +333,8 @@ def _make_detailed_group_decision(
p_heq_relaxed_threshold = infos.probabilities >= relaxed_threshold

ia_confirmed_flag = np.logical_or(
p_heq_threshold, np.logical_and(infos.overlays, p_heq_relaxed_threshold)
p_heq_threshold,
np.logical_and(infos.overlays, p_heq_relaxed_threshold),
)

ia_confirmed = (
Expand All @@ -326,26 +344,32 @@ def _make_detailed_group_decision(
# REFUTATION
ia_refuted = (
np.mean(
(1 - infos.probabilities) >= self.thresholds.min_confidence_refutation
(1 - infos.probabilities)
>= self.thresholds.min_confidence_refutation
)
>= self.thresholds.min_frac_refutation
)
uni_overlayed = (
np.mean(infos.overlays) >= self.thresholds.min_uni_db_overlay_frac
)
# If low entropy, we may trust AI to confirm/refute
if not high_entropy:
if ia_refuted:
if uni_overlayed:
return self.codes.detailed.ia_refuted_but_under_db_uni
return self.codes.detailed.ia_refuted
if ia_confirmed:
if uni_overlayed:
return self.codes.detailed.both_confirmed
return self.codes.detailed.ia_confirmed_only
# Else, we may still use BDUni information
if uni_overlayed:
return self.codes.detailed.db_overlayed_only

# Else: we are uncertain, and we specify why we can specify if entropy was
# involved to conclude to uncertainty.
if high_entropy:
return self.codes.detailed.unsure_by_entropy
if ia_refuted:
if uni_overlayed:
return self.codes.detailed.ia_refuted_but_under_db_uni
return self.codes.detailed.ia_refuted
if ia_confirmed:
if uni_overlayed:
return self.codes.detailed.both_confirmed
return self.codes.detailed.ia_confirmed_only
if uni_overlayed:
return self.codes.detailed.db_overlayed_only
return self.codes.detailed.both_unsure
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On ne passe dans cette ligne (return self.codes.detailed.both_unsure) que si on est à "not high_entropy", je suggère de la passer dans le bloc "if" correspondant, en ligne 365.

Eventuellement en plus, en traitant le cas "high entropy" en premier et en finissant sur le bloc "if not high_entropy" il est possible de supprimer la ligne du "if", puisqu'on est sorti de la méthode en cas de "high entrpoy" just avant. Cela fait une indentation de moins dans un bloc qui en a beaucoup (c'est juste une question de lisibilité)

Copy link
Collaborator Author

@CharlesGaydon CharlesGaydon Nov 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On ne passe dans cette ligne (return self.codes.detailed.both_unsure) que si on est à "not high_entropy", je suggère de la passer dans le bloc "if" correspondant, en ligne 365.

On y passe si on n'est pas high entropy, mais on laisse tout de même la possibilité de passer par la BDUni avant, donc impossible de mettre ce return dans le premier bloc.
C'est un peu complexe en l'était mais je ne vois pas vraiment comment simpliifer, à part en se passant totalement de l'entropie. Ca ne me semble pas forcément souhaitable, puisque lorsqu'elle a été ajoutée c'est qu'elle conduisait à une amélioration...



Expand Down
47 changes: 26 additions & 21 deletions lidar_prod/tasks/building_validation_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from tqdm import tqdm
import os.path as osp
import pdal
import math

from lidar_prod.tasks.building_validation import (
BuildingValidator,
Expand Down Expand Up @@ -94,26 +95,27 @@ def setup(self):
codes to adapt to those of the optimization dataset.

"""
las_paths = glob(osp.join(self.paths.input_las_dir, "*.las"))
laz_paths = glob(osp.join(self.paths.input_las_dir, "*.laz"))
self.las_filepaths = sorted(las_paths + laz_paths)
if not self.las_filepaths:
raise ValueError(
"No LAS/LAZ found in {self.paths.input_las_dir} (i.e. input_las_dir) while"
"globbing *las and *laz extensions (lowercase)."
)
if self.debug:
self.las_filepaths = self.las_filepaths[:1]
os.makedirs(self.paths.prepared_las_dir, exist_ok=True)
self.prepared_las_filepaths = [
osp.join(self.paths.prepared_las_dir, osp.basename(f))
for f in self.las_filepaths
]
os.makedirs(self.paths.updated_las_dir, exist_ok=True)
self.out_las_filepaths = [
osp.join(self.paths.updated_las_dir, osp.basename(f))
for f in self.las_filepaths
]
if "prepare" in self.todo or "update" in self.todo:
las_paths = glob(osp.join(self.paths.input_las_dir, "*.las"))
laz_paths = glob(osp.join(self.paths.input_las_dir, "*.laz"))
self.las_filepaths = sorted(las_paths + laz_paths)
CharlesGaydon marked this conversation as resolved.
Show resolved Hide resolved
if not self.las_filepaths:
raise ValueError(
"No LAS/LAZ found in {self.paths.input_las_dir} (i.e. input_las_dir) while"
"globbing *las and *laz extensions (lowercase)."
)
if self.debug:
self.las_filepaths = self.las_filepaths[:1]
os.makedirs(self.paths.prepared_las_dir, exist_ok=True)
self.prepared_las_filepaths = [
osp.join(self.paths.prepared_las_dir, osp.basename(f))
for f in self.las_filepaths
]
os.makedirs(self.paths.updated_las_dir, exist_ok=True)
self.out_las_filepaths = [
osp.join(self.paths.updated_las_dir, osp.basename(f))
for f in self.las_filepaths
]

# We must adapt BuildingValidator to corrected data by specifying the codes to use as candidates
self.bv.candidate_buildings_codes = (
Expand Down Expand Up @@ -332,8 +334,11 @@ def _objective(
"min_frac_confirmation_factor_if_bd_uni_overlay": trial.suggest_float(
"min_frac_confirmation_factor_if_bd_uni_overlay", 0.5, 1.0
),
# Max entropy for 7 classes. When looking at prediction's entropy,
# the observed maximal value is aqual to the Shannon entropy divided by two,
# so this is what we consider as the max for the min entropy for uncertainty.
"min_entropy_uncertainty": trial.suggest_float(
"min_entropy_uncertainty", 0.5, 1.0
"min_entropy_uncertainty", 0.0, -math.log2(1 / 7) / 2.0
CharlesGaydon marked this conversation as resolved.
Show resolved Hide resolved
),
"min_frac_entropy_uncertain": trial.suggest_float(
"min_frac_entropy_uncertain", 0.33, 1.0
Expand Down
2 changes: 1 addition & 1 deletion package_metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__: "1.7.4"
__version__: "1.8.0"
__name__: "lidar_prod"
__url__: "https://github.com/IGNF/lidar-prod-quality-control"
__description__: "A 3D semantic segmentation production tool to augment rules-based Lidar classification with AI and databases."
Expand Down