jpcbertoldo · jpcbertoldo · Aug 18, 2023 · Aug 18, 2023 · Aug 18, 2023 · Aug 18, 2023
diff --git a/...ks/500_use_cases/502_perimg_metrics.ipynb → ...2_perimg_metrics/502_perimg_metrics.ipynb b/...ks/500_use_cases/502_perimg_metrics.ipynb → ...2_perimg_metrics/502_perimg_metrics.ipynb
diff --git a/notebooks/500_use_cases/502_perimg_metrics/502b_compare_models.ipynb b/notebooks/500_use_cases/502_perimg_metrics/502b_compare_models.ipynb
diff --git a/src/anomalib/utils/metrics/perimg/__init__.py b/src/anomalib/utils/metrics/perimg/__init__.py
@@ -1,8 +1,39 @@
 """Per-Image Metrics.
 
 Overall approach:
-Thresholds are computed across all images, but the metrics are computed per-image.
-Metrics here are based on binary classification metrics (e.g. FPR, TPR, Precision) over a range of thresholds.
+
+    Thresholds are applied across all images, but each image is measured independently.
+    In other words, thresholds are shared, but metrics are *per-image*.
+
+    Thresholds are then indexed by a metric \\in [0, 1] so that any (model, dataset) can be compared.
+    Key insight: the indexing metric is **only measured on normal images** in the test set.
+
+        `PImO`: the shared metric is the mean of per-image FPR (`shared_fpr`).
+
+    The indexing metric is then used as the X-axis of curve, where the Y-axis is the per-image metric.
+
+        `PImO`: the Y-axis is the per-image TPR, or "Overlap" [between the predicted and ground-truth masks
+        Therefore `PImO` stands for "Per-Image Overlap [curve]".
+
+        Note: by definition, it is only defined on anomalous images.
+
+    Finally, the area under each curve is computed.
+
+        `PImO` --> `AUPImO` (Area Under the PImO curve).
+
+    The shared metric is also used to restrict the threshold range.
+
+        `PImO`: one can limit the upper bound (maximum value) of the shared FPR, which is the lower bound of thresholds.
+
+    In such cases, the area under the curve is computed over the restricted range and normalized to [0, 1].
+    Note: that this corresponds to taking the average value of the Y-axis over the restricted range.
+
+
+Metrics here are generaly based on binary classification metrics (e.g. FPR, TPR, Precision) over a range of thresholds.
+
+Several plot functions are provided to visualize these metrics.
+
+Utilities are also provided to measure statistics over the per-image metric values, especially using boxplots.
 """
 
 from .binclf_curve import PerImageBinClfCurve

diff --git a/src/anomalib/utils/metrics/perimg/binclf_curve.py b/src/anomalib/utils/metrics/perimg/binclf_curve.py
@@ -1,15 +1,10 @@
-"""Per-Image Binary Classification Curve.
+"""Per-Image Binary Classification (BinClf) Curve.
 
-Binary classification (threshold-dependent) matrix with shared thresholds but per-image counts/rates.
+This is a generalization of the binary classification matrix (TP, FP, FN, TN) to a range of thresholds.
 
-Known issue:
+At each threshold (shared by all images), the binary classification matrix is computed for each image independently.
 
-Computing the binary classification matrix curve depends on knowing the min and max anomaly scores across all images,
-and the current approach is to just stock all anomaly maps and masks in memory and compute the min and max at the end;
-a better approach would be to do the computation in two phases/epochs:
-    1. compute the min and max anomaly scores across all images (no need to stock the anomaly maps and masks)
-    2. do the actual computation of the binary classification matrix curve, which can actually be done in batches
-        once the thresholds are known
+This module is used as a building block for other modules like `pimo`.
 """
 
 from __future__ import annotations