From 19d62278c654b593e263194bb0ee7607a8123820 Mon Sep 17 00:00:00 2001 From: dilyararimovna Date: Tue, 13 Dec 2022 15:33:04 +0800 Subject: [PATCH 1/2] feat: consider cls and regexp intents both --- .../IntentCatcherTransformers/server.py | 25 +++++++++++-------- annotators/IntentCatcherTransformers/utils.py | 4 +-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/annotators/IntentCatcherTransformers/server.py b/annotators/IntentCatcherTransformers/server.py index 30eff91212..0018f570ac 100644 --- a/annotators/IntentCatcherTransformers/server.py +++ b/annotators/IntentCatcherTransformers/server.py @@ -1,3 +1,4 @@ +import json import logging import os from itertools import chain @@ -37,14 +38,17 @@ parsed = parse_config(CONFIG_NAME) with open(expand_path(parsed["metadata"]["variables"]["MODEL_PATH"]).joinpath("classes.dict"), "r") as f: intents = f.read().strip().splitlines() -intents = [el.strip().split("\t")[0] for el in intents] -logger.info(f"Considered intents: {intents}") +CLS_INTENTS = [el.strip().split("\t")[0] for el in intents] +ALL_INTENTS = list(json.load(open(INTENT_PHRASES_PATH))["intent_phrases"].keys()) +logger.info(f"Considered intents for classifier: {CLS_INTENTS}") +logger.info(f"Considered intents from json file: {ALL_INTENTS}") -def get_classifier_predictions(batch_texts: List[List[str]], intents, intents_model, thresholds): +def get_classifier_predictions(batch_texts: List[List[str]], intents_model, thresholds): + global CLS_INTENTS if thresholds is None: # if we do not given thresholds, use 0.5 as default - thresholds = [0.5] * len(intents) + thresholds = [0.5] * len(CLS_INTENTS) thresholds = np.array(thresholds) # make a 1d-list of texts for classifier sentences = list(chain.from_iterable(batch_texts)) @@ -61,18 +65,19 @@ def get_classifier_predictions(batch_texts: List[List[str]], intents, intents_mo maximized_probas = np.max(pred_probas[sentences_text_ids == text_id], axis=0) resp = { intent: {"detected": int(float(proba) > thresh), "confidence": round(float(proba), 3)} - for intent, thresh, proba in zip(intents, thresholds, maximized_probas) + for intent, thresh, proba in zip(CLS_INTENTS, thresholds, maximized_probas) } result += [resp] return result -def predict_intents(batch_texts: List[List[str]], intents, regexp, intents_model, thresholds=None): +def predict_intents(batch_texts: List[List[str]], regexp, intents_model, thresholds=None): + global ALL_INTENTS responds = [] not_detected_utterances = [] for text_id, text in enumerate(batch_texts): - resp = {intent: {"detected": 0, "confidence": 0.0} for intent in intents} + resp = {intent: {"detected": 0, "confidence": 0.0} for intent in ALL_INTENTS} not_detected_utterance = text.copy() for intent, reg in regexp.items(): for i, utt in enumerate(text): @@ -86,8 +91,8 @@ def predict_intents(batch_texts: List[List[str]], intents, regexp, intents_model responds.append(resp) if len(not_detected_utterances) > 0 and len(not_detected_utterances[0]) > 0: - classifier_result = get_classifier_predictions(not_detected_utterances, intents, intents_model, thresholds) - return unite_responses(classifier_result, responds) + classifier_result = get_classifier_predictions(not_detected_utterances, intents_model, thresholds) + return unite_responses(classifier_result, responds, ALL_INTENTS) else: return responds @@ -96,7 +101,7 @@ def predict_intents(batch_texts: List[List[str]], intents, regexp, intents_model def detect(): utterances = request.json["sentences"] logger.info(f"Input: `{utterances}`.") - results = predict_intents(utterances, intents, regexp, intents_model) + results = predict_intents(utterances, regexp, intents_model) logger.info(f"Output: `{results}`.") return jsonify(results) diff --git a/annotators/IntentCatcherTransformers/utils.py b/annotators/IntentCatcherTransformers/utils.py index fc0e2a1d67..870ca1e97f 100644 --- a/annotators/IntentCatcherTransformers/utils.py +++ b/annotators/IntentCatcherTransformers/utils.py @@ -22,12 +22,12 @@ def get_regexp(intent_phrases_path): return regexp -def unite_responses(responses_a: List[dict], responses_b: List[dict]): +def unite_responses(responses_a: List[dict], responses_b: List[dict], all_intents_to_consider): assert len(responses_a) == len(responses_b) result = [] for a, b in zip(responses_a, responses_b): resp = {} - for intent in a: + for intent in all_intents_to_consider: resp[intent] = { "detected": max(a[intent]["detected"], b[intent]["detected"]), "confidence": max(a[intent]["confidence"], b[intent]["confidence"]), From e8179a385266c1786c926d55039c32746b75376d Mon Sep 17 00:00:00 2001 From: dilyararimovna Date: Tue, 13 Dec 2022 16:32:22 +0300 Subject: [PATCH 2/2] fix: get intnt --- annotators/IntentCatcherTransformers/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/annotators/IntentCatcherTransformers/utils.py b/annotators/IntentCatcherTransformers/utils.py index 870ca1e97f..826135e0b8 100644 --- a/annotators/IntentCatcherTransformers/utils.py +++ b/annotators/IntentCatcherTransformers/utils.py @@ -28,9 +28,11 @@ def unite_responses(responses_a: List[dict], responses_b: List[dict], all_intent for a, b in zip(responses_a, responses_b): resp = {} for intent in all_intents_to_consider: + a_intent = a.get(intent, {"detected": 0, "confidence": 0.0}) + b_intent = b.get(intent, {"detected": 0, "confidence": 0.0}) resp[intent] = { - "detected": max(a[intent]["detected"], b[intent]["detected"]), - "confidence": max(a[intent]["confidence"], b[intent]["confidence"]), + "detected": max(a_intent["detected"], b_intent["detected"]), + "confidence": max(a_intent["confidence"], b_intent["confidence"]), } result.append(resp) return result