From fa580a0d2226d9b3000bac1b3d48d37da3a00a9f Mon Sep 17 00:00:00 2001 From: Enrique Noriega Date: Tue, 16 Jan 2024 13:12:25 -0700 Subject: [PATCH] Fixed #755 --- skema/rest/utils.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/skema/rest/utils.py b/skema/rest/utils.py index 319a6265f23..765add351a6 100644 --- a/skema/rest/utils.py +++ b/skema/rest/utils.py @@ -18,6 +18,7 @@ async def get_client(): yield client # close the client when the request is done + def fn_preprocessor(function_network: Dict[str, Any]): fn_data = function_network.copy() @@ -180,23 +181,32 @@ def compute_text_reading_evaluation(gt_data: list, attributes: AttributeCollecti page = a["page"] annotations_by_page[page].append(a) + def annotation_key(a: Dict): + return a['page'], tuple(a['start_xy']), a['text'] + # Count the matches tp, tn, fp, fn = 0, 0, 0, 0 + matched_annotations = set() for e in extractions: + matched = False for m in e.mentions: - if m.extraction_source is not None: - te = m.extraction_source - if te.page is not None: - e_page = te.page - page_annotations = annotations_by_page[e_page] - matched = False - for a in page_annotations: - if extraction_matches_annotation(m, a, json_contents): - matched = True - tp += 1 - break - if not matched: - fp += 1 + if not matched: + if m.extraction_source is not None: + te = m.extraction_source + if te.page is not None: + e_page = te.page + page_annotations = annotations_by_page[e_page] + + for a in page_annotations: + key = annotation_key(a) + if key not in matched_annotations: + if extraction_matches_annotation(m, a, json_contents): + matched_annotations.add(key) + matched = True + tp += 1 + break + if not matched: + fp += 1 recall = tp / len(gt_data) precision = tp / (tp + fp + 0.00000000001)