From 19d62278c654b593e263194bb0ee7607a8123820 Mon Sep 17 00:00:00 2001
From: dilyararimovna <dilyara.rimovna@gmail.com>
Date: Tue, 13 Dec 2022 15:33:04 +0800
Subject: [PATCH 1/2] feat: consider cls and regexp intents both

---
 .../IntentCatcherTransformers/server.py       | 25 +++++++++++--------
 annotators/IntentCatcherTransformers/utils.py |  4 +--
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/annotators/IntentCatcherTransformers/server.py b/annotators/IntentCatcherTransformers/server.py
index 30eff91212..0018f570ac 100644
--- a/annotators/IntentCatcherTransformers/server.py
+++ b/annotators/IntentCatcherTransformers/server.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 from itertools import chain
@@ -37,14 +38,17 @@
 parsed = parse_config(CONFIG_NAME)
 with open(expand_path(parsed["metadata"]["variables"]["MODEL_PATH"]).joinpath("classes.dict"), "r") as f:
     intents = f.read().strip().splitlines()
-intents = [el.strip().split("\t")[0] for el in intents]
-logger.info(f"Considered intents: {intents}")
+CLS_INTENTS = [el.strip().split("\t")[0] for el in intents]
+ALL_INTENTS = list(json.load(open(INTENT_PHRASES_PATH))["intent_phrases"].keys())
+logger.info(f"Considered intents for classifier: {CLS_INTENTS}")
+logger.info(f"Considered intents from json file: {ALL_INTENTS}")
 
 
-def get_classifier_predictions(batch_texts: List[List[str]], intents, intents_model, thresholds):
+def get_classifier_predictions(batch_texts: List[List[str]], intents_model, thresholds):
+    global CLS_INTENTS
     if thresholds is None:
         # if we do not given thresholds, use 0.5 as default
-        thresholds = [0.5] * len(intents)
+        thresholds = [0.5] * len(CLS_INTENTS)
     thresholds = np.array(thresholds)
     # make a 1d-list of texts for classifier
     sentences = list(chain.from_iterable(batch_texts))
@@ -61,18 +65,19 @@ def get_classifier_predictions(batch_texts: List[List[str]], intents, intents_mo
             maximized_probas = np.max(pred_probas[sentences_text_ids == text_id], axis=0)
             resp = {
                 intent: {"detected": int(float(proba) > thresh), "confidence": round(float(proba), 3)}
-                for intent, thresh, proba in zip(intents, thresholds, maximized_probas)
+                for intent, thresh, proba in zip(CLS_INTENTS, thresholds, maximized_probas)
             }
             result += [resp]
     return result
 
 
-def predict_intents(batch_texts: List[List[str]], intents, regexp, intents_model, thresholds=None):
+def predict_intents(batch_texts: List[List[str]], regexp, intents_model, thresholds=None):
+    global ALL_INTENTS
     responds = []
     not_detected_utterances = []
     for text_id, text in enumerate(batch_texts):
 
-        resp = {intent: {"detected": 0, "confidence": 0.0} for intent in intents}
+        resp = {intent: {"detected": 0, "confidence": 0.0} for intent in ALL_INTENTS}
         not_detected_utterance = text.copy()
         for intent, reg in regexp.items():
             for i, utt in enumerate(text):
@@ -86,8 +91,8 @@ def predict_intents(batch_texts: List[List[str]], intents, regexp, intents_model
         responds.append(resp)
 
     if len(not_detected_utterances) > 0 and len(not_detected_utterances[0]) > 0:
-        classifier_result = get_classifier_predictions(not_detected_utterances, intents, intents_model, thresholds)
-        return unite_responses(classifier_result, responds)
+        classifier_result = get_classifier_predictions(not_detected_utterances, intents_model, thresholds)
+        return unite_responses(classifier_result, responds, ALL_INTENTS)
     else:
         return responds
 
@@ -96,7 +101,7 @@ def predict_intents(batch_texts: List[List[str]], intents, regexp, intents_model
 def detect():
     utterances = request.json["sentences"]
     logger.info(f"Input: `{utterances}`.")
-    results = predict_intents(utterances, intents, regexp, intents_model)
+    results = predict_intents(utterances, regexp, intents_model)
     logger.info(f"Output: `{results}`.")
     return jsonify(results)
 
diff --git a/annotators/IntentCatcherTransformers/utils.py b/annotators/IntentCatcherTransformers/utils.py
index fc0e2a1d67..870ca1e97f 100644
--- a/annotators/IntentCatcherTransformers/utils.py
+++ b/annotators/IntentCatcherTransformers/utils.py
@@ -22,12 +22,12 @@ def get_regexp(intent_phrases_path):
     return regexp
 
 
-def unite_responses(responses_a: List[dict], responses_b: List[dict]):
+def unite_responses(responses_a: List[dict], responses_b: List[dict], all_intents_to_consider):
     assert len(responses_a) == len(responses_b)
     result = []
     for a, b in zip(responses_a, responses_b):
         resp = {}
-        for intent in a:
+        for intent in all_intents_to_consider:
             resp[intent] = {
                 "detected": max(a[intent]["detected"], b[intent]["detected"]),
                 "confidence": max(a[intent]["confidence"], b[intent]["confidence"]),

From e8179a385266c1786c926d55039c32746b75376d Mon Sep 17 00:00:00 2001
From: dilyararimovna <dilyara.rimovna@gmail.com>
Date: Tue, 13 Dec 2022 16:32:22 +0300
Subject: [PATCH 2/2] fix: get intnt

---
 annotators/IntentCatcherTransformers/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/annotators/IntentCatcherTransformers/utils.py b/annotators/IntentCatcherTransformers/utils.py
index 870ca1e97f..826135e0b8 100644
--- a/annotators/IntentCatcherTransformers/utils.py
+++ b/annotators/IntentCatcherTransformers/utils.py
@@ -28,9 +28,11 @@ def unite_responses(responses_a: List[dict], responses_b: List[dict], all_intent
     for a, b in zip(responses_a, responses_b):
         resp = {}
         for intent in all_intents_to_consider:
+            a_intent = a.get(intent, {"detected": 0, "confidence": 0.0})
+            b_intent = b.get(intent, {"detected": 0, "confidence": 0.0})
             resp[intent] = {
-                "detected": max(a[intent]["detected"], b[intent]["detected"]),
-                "confidence": max(a[intent]["confidence"], b[intent]["confidence"]),
+                "detected": max(a_intent["detected"], b_intent["detected"]),
+                "confidence": max(a_intent["confidence"], b_intent["confidence"]),
             }
         result.append(resp)
     return result