diff --git a/annotators/BadlistedWordsDetector/requirements.txt b/annotators/BadlistedWordsDetector/requirements.txt index a49c964a8c..7e1ff810e3 100644 --- a/annotators/BadlistedWordsDetector/requirements.txt +++ b/annotators/BadlistedWordsDetector/requirements.txt @@ -6,4 +6,4 @@ sentry-sdk==0.12.3 spacy==3.0.5 click==7.1.2 jinja2<=3.0.3 -Werkzeug<=2.0.3 \ No newline at end of file +Werkzeug<=2.0.3 diff --git a/annotators/BadlistedWordsDetector_ru/requirements.txt b/annotators/BadlistedWordsDetector_ru/requirements.txt index 5311bf5bc4..750a4c8864 100644 --- a/annotators/BadlistedWordsDetector_ru/requirements.txt +++ b/annotators/BadlistedWordsDetector_ru/requirements.txt @@ -7,4 +7,4 @@ spacy==3.0.5 click==7.1.2 pymorphy2==0.9.1 jinja2<=3.0.3 -Werkzeug<=2.0.3 \ No newline at end of file +Werkzeug<=2.0.3 diff --git a/annotators/combined_classification/Dockerfile b/annotators/combined_classification/Dockerfile index c72ffa395a..1104ccd9af 100644 --- a/annotators/combined_classification/Dockerfile +++ b/annotators/combined_classification/Dockerfile @@ -1,14 +1,8 @@ -FROM deeppavlov/base-gpu:0.12.1 -RUN pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.12.1 +FROM deeppavlov/base-gpu:0.17.5 -#RUN rm DeepPavlov +RUN pip install git+https://github.com/deeppavlov/DeepPavlov.git@a53c42062e4bccf6ec63021ec6bd7b9fbe23f091 -#Set up git lfs for your user account: git lfs install -WORKDIR /base -RUN rm -rf DeepPavlov -RUN git clone https://github.com/dimakarp1996/DeepPavlov.git WORKDIR /base/DeepPavlov -RUN git checkout pal-bert+ner ARG CONFIG @@ -21,9 +15,7 @@ RUN mkdir common COPY annotators/combined_classification/ ./ COPY common/ common/ -RUN ls /tmp RUN pip install -r requirements.txt -ARG DATA_URL=http://files.deeppavlov.ai/alexaprize_data/pal_bert_7in1/model.pth.tar -ADD $DATA_URL /tmp + CMD gunicorn --workers=1 --bind 0.0.0.0:8087 --timeout=300 server:app diff --git a/annotators/combined_classification/README.md b/annotators/combined_classification/README.md index 8f0615c925..98fa4681f7 100644 --- a/annotators/combined_classification/README.md +++ b/annotators/combined_classification/README.md @@ -1 +1,22 @@ -BERT Base model for 6 tasks - cobot topics cobot dialogact topics cobot dialogact intent emotion sentiment toxic +This model is based on the transformer-agnostic multitask neural architecture. It can solve several tasks similtaneously, almost as good as single-task models. + +The models were trained on the following datasets: + +**Factoid classification** : For the Factoid task, we used the same Yahoo ConversVsInfo dataset that was used to train the Dream socialbot in Alexa Prize . Note that the valid set in this task was equal to the test set. + +**Midas classification** : For the Midas task, we used the same Midas classification dataset that was used to train the Dream socialbot in Alexa Prize . Note that the valid set in this task was equal to the test set. + +**Emotion classification** :For the Emotion classification task, we used the emo\_go\_emotions dataset, with all the 28 classes compressed into the seven basic emotions as in the original paper. Note that these 7 emotions are not exactly the same as the 7 emotions in the original Dream socialbot in Alexa Prize: 1 emotion differs (love VS disgust), so the scores are incomparable with the original model. Note that this task is multiclass. + +**Topic classification**: For the Topic classification task, we used the dataset made by Dilyara Zharikova. The dataset was further filtered and improved for the final model version, to make the model suitable for DREAM. Note that the original topics model doesn’t account for that dataset changes(which were also about class number) and thus its scores are not compatible with the scores we have. + +**Sentiment classification** : For the Sentiment classification task, we used the Dynabench dataset (r1 + r2). + +**Toxic classification** : For the toxic classification task, we used the dataset from kaggle the 7 toxic classes that pose an interest to us. Note that this task is multilabel. + +The model also contains 3 replacement models for Amazon services. + +The models (multitask and comparative single task) were trained with initial learning rate 2e-5(with validation patience 2 it could be dropped 2 times), batch size 32,optimizer adamW(betas (0.9,0.99) and early stop on 3 epochs. The criteria on early stopping was average accuracy for all tasks for multitask models, or the single-task accuracy for singletask models. + +This model(with a distilbert-base-uncased backbone) takes only 2439 Mb for 9 tasks, whereas single-task models with the same backbone for every of these tasks take up almost the same memory(~2437 Mb for every of these 9 tasks). + diff --git a/annotators/combined_classification/combined_classifier.json b/annotators/combined_classification/combined_classifier.json index 9737826142..fd6dad87de 100644 --- a/annotators/combined_classification/combined_classifier.json +++ b/annotators/combined_classification/combined_classifier.json @@ -1,182 +1,214 @@ { - "chainer":{ - "in":[ - "x", - "x_with_history" + "metadata": { + "variables": { + "BACKBONE": "distilbert-base-uncased", + "MODELS_PATH": "~/.deeppavlov/models", + "SINGLE_SENTENCE_LEN": 64, + "MULTI_SENTENCE_LEN": 128 + }, + "download":[{ + "url": "http://files.deeppavlov.ai/dream_data/distil9in1/9in1_v1.tar.gz", + "subdir": "{MODELS_PATH}" + }] + }, + "chainer": { + "in": [ + "x_emo", + "x_sentiment", + "x_toxic", + "x_factoid", + "x_midas", + "x_deeppavlov_topics", + "x_topics", + "x_dialogact_topics", + "x_dialogact_intents" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_emo" ], - "pipe":[ - { - "class_name":"torch_transformers_preprocessor", - "vocab_file":"bert-base-uncased", - "max_seq_length":32, - "in":[ - "x" - ], - "out":[ - "bert_features_x" - ] - }, - { - "class_name":"torch_transformers_preprocessor", - "vocab_file":"bert-base-uncased", - "max_seq_length":64, - "in":[ - "x_with_history" - ], - "out":[ - "bert_features_x_with_history" - ] - }, - { - "id":"multitask_pal_bert", - "class_name":"multitask_pal_bert", - "optimizer_parameters":{ - "lr":4e-5 - }, - "gradient_accumulation_steps":1, - "steps_per_epoch":1, - "learning_rate_drop_patience":2, - "learning_rate_drop_div":2.0, - "return_probas":true, - "multilabel": false, - "one_hot_labels":true, - "log_model_summary":false, - "save_path":"{MODELS_PATH}/model", - "load_path":"{MODELS_PATH}/model", - "tasks":{ - "topics":{ - "n_classes":22 - }, - "da_topics":{ - "n_classes":11 - }, - "da_intents":{ - "n_classes":11 - }, - "emo":{ - "n_classes":7 - }, - "sentiment":{ - "n_classes":3 - }, - "toxic":{ - "n_classes":8 - }, - "factoid":{"n_classes": 2} - }, - "in_distribution":{ - "topics":1, - "da_topics":1, - "da_intents":1, - "emo":1, - "sentiment":1, - "toxic":1, - "factoid":1 - }, - "in":[ - "bert_features_x_with_history", - "bert_features_x_with_history", - "bert_features_x_with_history", - "bert_features_x_with_history", - "bert_features_x", - "bert_features_x", - "bert_features_x", - "bert_features_x" - ], - "out":[ - "y_topics_pred", - "y_da_topics_pred", - "y_da_intents_pred", - "y_emo_pred", - "y_sentiment_pred", - "y_toxic_pred", - "y_factoid_pred" - ] - }, - { - "in": [ - "y_topics_pred" - ], - "out": [ - "y_topics_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_da_topics_pred" - ], - "out": [ - "y_da_topics_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_da_intents_pred" - ], - "out": [ - "y_da_intents_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_emo_pred" - ], - "out": [ - "y_emo_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_sentiment_pred" - ], - "out": [ - "y_sentiment_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_toxic_pred" - ], - "out": [ - "y_toxic_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_factoid_pred" - ], - "out": [ - "y_factoid_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - } + "out": [ + "bert_features_x_emo" + ] + }, + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_sentiment" ], - "out":[ - "y_topics_pred", - "y_da_topics_pred", - "y_da_intents_pred", - "y_emo_pred", - "y_sentiment_pred", - "y_toxic_pred", - "y_factoid_pred" + "out": [ + "bert_features_x_sentiment" ] - }, - "metadata":{ - "variables":{ - "MODELS_PATH":"/tmp" - } - } + }, + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_toxic" + ], + "out": [ + "bert_features_x_toxic" + ] + }, + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_factoid" + ], + "out": [ + "bert_features_x_factoid" + ] + }, + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_midas" + ], + "out": [ + "bert_features_x_midas" + ] + }, + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_deeppavlov_topics" + ], + "out": [ + "bert_features_x_deeppavlov_topics" + ] + }, + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_topics" + ], + "out": [ + "bert_features_x_topics" + ] + }, + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_dialogact_topics" + ], + "out": [ + "bert_features_x_dialogact_topics" + ] + }, + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BACKBONE}", + "max_seq_length": "{SINGLE_SENTENCE_LEN}", + "in": [ + "x_dialogact_intents" + ], + "out": [ + "bert_features_x_dialogact_intents" + ] + }, + { + "id": "multitask_bert", + "class_name": "multitask_bert", + "optimizer_parameters": { + "lr": 2e-5 + }, + "learning_rate_drop_patience": 2, + "learning_rate_drop_div": 2.0, + "return_probas": true, + "backbone_model": "{BACKBONE}", + "save_path": "{MODELS_PATH}/model", + "load_path": "{MODELS_PATH}/model", + "tasks": { + "emo": { + "type": "classification", + "options": 7, + "multilabel": true + }, + "sentiment": { + "type": "classification", + "options": 3 + }, + "toxic": { + "type": "classification", + "options": 7, + "multilabel": true + }, + "factoid": { + "type": "classification", + "options": 2 + }, + "midas": { + "type": "classification", + "options": 15 + }, + "deeppavlov_topics": { + "type": "classification", + "options": 33 + }, + "topics": { + "type": "classification", + "options": 22 + }, + "dialogact_topics": { + "type": "classification", + "options": 11 + }, + "dialogact_intents": { + "type": "classification", + "options": 11 + } + }, + "in": [ + "bert_features_x_emo", + "bert_features_x_sentiment", + "bert_features_x_toxic", + "bert_features_x_factoid", + "bert_features_x_midas", + "bert_features_x_deeppavlov_topics", + "bert_features_x_topics", + "bert_features_x_dialogact_topics", + "bert_features_x_dialogact_intents" + ], + "out": [ + "y_emo_pred_probas", + "y_sentiment_pred_probas", + "y_toxic_pred_probas", + "y_factoid_pred_probas", + "y_midas_pred_probas", + "y_deeppavlov_topics_pred_probas", + "y_topics_pred_probas", + "y_dialogact_topics_pred_probas", + "y_dialogact_intents_pred_probas" + ] + } + ], + "out": [ + "y_emo_pred_probas", + "y_sentiment_pred_probas", + "y_toxic_pred_probas", + "y_factoid_pred_probas", + "y_midas_pred_probas", + "y_deeppavlov_topics_pred_probas", + "y_topics_pred_probas", + "y_dialogact_topics_pred_probas", + "y_dialogact_intents_pred_probas" + ] + } } diff --git a/annotators/combined_classification/requirements.txt b/annotators/combined_classification/requirements.txt index 1cbd31ba2c..652b4d18ed 100644 --- a/annotators/combined_classification/requirements.txt +++ b/annotators/combined_classification/requirements.txt @@ -3,10 +3,11 @@ sentry-sdk[flask]==0.14.1 flask==1.1.1 itsdangerous==2.0.1 requests==2.22.0 -uvicorn==0.11.7 +uvicorn==0.3 prometheus-client==0.7.1 -filelock==3.0.12 -torch==1.5.1 -transformers==4.6.0 +filelock==3.4.2 +torch==1.9.1 +transformers==4.15.0 jinja2<=3.0.3 Werkzeug<=2.0.3 +pytorch-crf==0.7.2 diff --git a/annotators/combined_classification/server.py b/annotators/combined_classification/server.py index 37f881c9fd..4e30ec6a72 100644 --- a/annotators/combined_classification/server.py +++ b/annotators/combined_classification/server.py @@ -9,66 +9,58 @@ from deeppavlov import build_model from common.utils import combined_classes -task_names = [ - "cobot_topics", - "cobot_dialogact_topics", - "cobot_dialogact_intents", - "emotion_classification", - "sentiment_classification", - "toxic_classification", - "factoid_classification", -] # ORDER MATTERS! - logger = logging.getLogger(__name__) sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"), integrations=[FlaskIntegration()]) app = Flask(__name__) -def get_result(sentences, sentences_with_history): - st_time = time.time() +def get_result(sentences, sentences_with_history, postannotations=False): + logger.debug((sentences, sentences_with_history, postannotations)) ans = [{} for _ in sentences] - if not sentences: logger.exception("Input sentences not received") sentences = [" "] if not sentences_with_history: logger.exception("Input sentences with history not received") sentences_with_history = sentences - + if not postannotations: + data = [ + sentences, # emo was trained without history + sentences, # sentiment was trained without history + sentences, # toxic was trained without history + sentences, # factoid was trained without history + sentences_with_history, # midas was trained with history + sentences, # deeppavlov topics was trained without history + sentences, # cobot topics was trained without history + sentences, # cobot dialogact topics is now trained without history + sentences, # cobot dialogact intents is now trained without history + ] + else: + # While using postannotations, we annotate only for tasks we use in response_selector + data = [[] for _ in range(9)] + data[2] = sentences + data[-1] = sentences + data[-2] = sentences + data[-3] = sentences try: - if sentences and sentences_with_history: - res = model(sentences, sentences_with_history) - else: - raise Exception( - f"Empty list of sentences or sentences with history received." - f"Sentences: {sentences} " - f"Sentences with history: {sentences_with_history}" - ) - - for name, value in zip(task_names, res): - for i in range(len(value)): - is_toxic = "toxic" in name and value[i][-1] < 0.5 - if is_toxic: # sum of probs of all toxic classes >0.5 - value[i][-1] = 0 - value[i] = [k / sum(value[i]) for k in value[i]] - for class_, prob in zip(combined_classes[name], value[i]): - if prob == max(value[i]): - if class_ != "not_toxic" and name == "toxic_classification": - prob = 1 - ans[i][name] = {class_: float(prob)} + prob_lists = model(*data) + for task_name, prob_list in zip(combined_classes, prob_lists): + # we assume toxic has 7 classes + for i in range(len(prob_list)): + if prob_list[i]: + ans[i][task_name] = { + class_: round(float(prob), 2) for class_, prob in zip(combined_classes[task_name], prob_list[i]) + } except Exception as e: sentry_sdk.capture_exception(e) logger.exception(e) - total_time = time.time() - st_time - logger.info(f"7in1 exec time: {total_time:.3f}s") - logger.info(ans) return ans try: - model = build_model("combined_classifier.json", download=False) + model = build_model("combined_classifier.json", download=True) logger.info("Making test res") test_res = get_result(["a"], ["a"]) logger.info("model loaded, test query processed") @@ -83,23 +75,22 @@ def respond(): t = time.time() sentences = request.json.get("sentences", [" "]) sentences_with_hist = request.json.get("sentences_with_history", sentences) - answer = get_result(sentences, sentences_with_hist) - - logger.info(f"7in1 result: {answer}") - logger.info(f"Combined classifier exec time: {time.time() - t}") + answer = get_result(sentences, sentences_with_hist, postannotations=False) + logger.debug(f"combined_classification result: {answer}") + logger.info(f"combined_classification exec time: {time.time() - t}") return jsonify(answer) @app.route("/batch_model", methods=["POST"]) def batch_respond(): t = time.time() + sep = " [SEP] " utterances_with_histories = request.json.get("utterances_with_histories", [[" "]]) - sentences_with_hist = [" [SEP] ".join(s) for s in utterances_with_histories] - sentences = [s[-1] for s in utterances_with_histories] - answer = get_result(sentences, sentences_with_hist) - - logger.info(f"7in1 batch result: {answer}") - logger.info(f"Combined classifier exec time: {time.time() - t}") + sentences_with_hist = [sep.join(s) for s in utterances_with_histories] + sentences = [s[-1].split(sep)[-1] for s in utterances_with_histories] + answer = get_result(sentences, sentences_with_hist, postannotations=True) + logger.debug(f"combined_classification batch result: {answer}") + logger.info(f"combined_classification exec time: {time.time() - t}") return jsonify([{"batch": answer}]) diff --git a/annotators/combined_classification/test.py b/annotators/combined_classification/test.py index 007f9b4864..9514335e8f 100644 --- a/annotators/combined_classification/test.py +++ b/annotators/combined_classification/test.py @@ -1,5 +1,5 @@ import requests -import logging +from time import time def main_test(): @@ -7,47 +7,118 @@ def main_test(): batch_url = "http://0.0.0.0:8087/batch_model" configs = [ { - "sentences": ["how do I empty my DNS cache?", "which do you prefer?"], + "sentences": ["do you like porn"], + "task": "all", + "possible_answers": { + "cobot_topics": ["Sex_Profanity"], + "cobot_dialogact_topics": ["Inappropriate_Content"], + }, + }, + { + "sentences": ["let's talk about movies", "do you like porn"], + "task": "cobot_dialogact_topics", + "answers_bert": [["Entertainment_Movies"], ["Inappropriate_Content"]], + }, + { + "sentences": ["let's talk about games", "do you like watching films"], + "task": "cobot_topics", + "answers_bert": [["Games"], ["Movies_TV"]], + }, + { + "sentences_with_history": ["What is the capital of Great Britain [SEP] I don't know"], + "sentences": ["I don't know"], + "task": "cobot_dialogact_intents", + "answers_bert": [["Information_DeliveryIntent"]], + }, + { + "sentences": ["how do I empty my DNS cache?", "which do you prefer?", "where is montreal"], "task": "factoid_classification", - "answers": [["is_factoid"], ["is_conversational"]], + "answers_bert": [["is_factoid"], ["is_conversational"], ["is_factoid"]], }, { "sentences": ["i love you", "i hate you", "It is now"], "task": "sentiment_classification", - "answers": [["positive"], ["negative"], ["neutral"]], + "answers_bert": [["positive"], ["negative"], ["neutral"]], }, { - "sentences": ["you son of the bitch", "yes"], - "task": "toxic_classification", - "answers": [["obscene"], ["not_toxic"]], + "sentences": ["why you are such a fool"], + "task": "emotion_classification", + "answers_bert": [["anger"]], + "multilabel": True, }, { - "sentences": ["why you are so dumb"], - "task": "emotion_classification", - "answers": [["anger"]], + "sentences_with_history": ["this is the best dog [SEP] so what you think"], + "sentences": ["so what you think"], + "task": "midas_classification", + "answers_bert": [["opinion"]], }, { - "sentences": ["let's talk about movies"], - "task": "cobot_dialogact_topics", - "answers": [["Entertainment_Movies"]], + "sentences": [ + "do you like porn", + "have you been to Alaska", + "please talk about movies", + "please talk about books", + "talk about games", + ], + "task": "deeppavlov_topics", + "answers_bert": [["Music"], ["Disasters"], ["Movies_TV"], ["Books&Literature"], ["Videogames"]], + }, + { + "sentences": ["you son of the bitch", "yes", "do you like porn"], + "task": "toxic_classification", + "answers_bert": [["insult", "obscene", "toxic"], []], + "multilabel": True, }, - {"sentences": ["let's talk about games"], "task": "cobot_topics", "answers": [["Games"]]}, - {"sentences": ["let's switch topic"], "task": "cobot_dialogact_intents", "answers": [["Topic_SwitchIntent"]]}, ] + t = time() for config in configs: - config["utterances_with_histories"] = [[k] for k in config["sentences"]] + if "sentences_with_history" in config: + config["utterances_with_histories"] = [[k] for k in config["sentences_with_history"]] + else: + config["utterances_with_histories"] = [[k] for k in config["sentences"]] responses = requests.post(url, json=config).json() batch_responses = requests.post(batch_url, json=config).json() - assert batch_responses[0]["batch"] == responses, ( - f"Batch responses {batch_responses} " f"not match to responses {responses}" - ) - responses = [j[config["task"]] for j in responses] - for response, answer, sentence in zip(responses, config["answers"], config["sentences"]): - predicted_classes = [class_ for class_ in response if response[class_] > 0.5] - assert sorted(answer) == sorted(predicted_classes), " * ".join( - [str(j) for j in [sentence, config["task"], answer, predicted_classes, response]] - ) - logging.info("SUCCESS!") + batch_error_msg = f"Batch responses {batch_responses} not match to responses {responses}" + assert ( + batch_responses[0]["batch"][0]["toxic_classification"] == responses[0]["toxic_classification"] + ), batch_error_msg + if config["task"] == "all": + for i in range(len(responses)): + print(f"Checking that at least 1 annotator works for {config['sentences'][i]}") + predicted_cobot_topics = [ + class_ + for class_ in responses[i]["cobot_topics"] + if responses[i]["cobot_topics"][class_] == max(responses[0]["cobot_topics"].values()) + ] + predicted_cobot_da_topics = [ + class_ + for class_ in responses[i]["cobot_dialogact_topics"] + if responses[i]["cobot_dialogact_topics"][class_] + == max(responses[i]["cobot_dialogact_topics"].values()) + ] + error_msg1 = ( + f"Predicted cobot topics {predicted_cobot_topics} and da topics {predicted_cobot_da_topics}" + f"not match with sensitive cobot_topics {config['possible_answers']['cobot_topics']}" + f"and sensitive cobot da topics {config['possible_answers']['cobot_dialogact_topics']}" + ) + assert any( + [ + set(predicted_cobot_topics) & set(config["possible_answers"]["cobot_topics"]), + set(predicted_cobot_da_topics) & set(config["possible_answers"]["cobot_dialogact_topics"]), + ] + ), error_msg1 + else: + responses = [j[config["task"]] for j in responses] + for response, answer, sentence in zip(responses, config["answers_bert"], config["sentences"]): + if config.get("multilabel", False): # multilabel_task + predicted_classes = [class_ for class_ in response if response[class_] > 0.5] + else: + predicted_classes = [class_ for class_ in response if response[class_] == max(response.values())] + assert sorted(answer) == sorted(predicted_classes), " * ".join( + [str(j) for j in [sentence, config["task"], answer, predicted_classes, response]] + ) + print("SUCCESS!") + print(time() - t) return 0 diff --git a/annotators/entity_detection/requirements.txt b/annotators/entity_detection/requirements.txt index 57c084b17a..1f8f7e4de8 100644 --- a/annotators/entity_detection/requirements.txt +++ b/annotators/entity_detection/requirements.txt @@ -2,7 +2,7 @@ pyopenssl==22.0.0 Flask==1.1.1 itsdangerous==2.0.1 nltk==3.4.5 -numpy==1.18.0 +setuptools<=65.5.1 gunicorn==19.9.0 requests==2.22.0 jinja2<=3.0.3 diff --git a/annotators/sentseg_ru/requirements.txt b/annotators/sentseg_ru/requirements.txt index 9650f64b60..3c956e614e 100644 --- a/annotators/sentseg_ru/requirements.txt +++ b/annotators/sentseg_ru/requirements.txt @@ -9,4 +9,4 @@ Werkzeug<=2.0.3 transformers==4.6.0 torch==1.6.0 torchvision==0.7.0 -cryptography==2.8 \ No newline at end of file +cryptography==2.8 diff --git a/annotators/spacy_annotator/requirements.txt b/annotators/spacy_annotator/requirements.txt index dd14077e1c..e8186065ef 100644 --- a/annotators/spacy_annotator/requirements.txt +++ b/annotators/spacy_annotator/requirements.txt @@ -4,6 +4,7 @@ gunicorn==20.0.4 sentry-sdk==0.13.4 requests==2.22.0 spacy==3.2.0 +typer==0.4.1 click<=8.0.4 jinja2<=3.0.3 -Werkzeug<=2.0.3 \ No newline at end of file +Werkzeug<=2.0.3 diff --git a/assistant_dists/dream/dev.yml b/assistant_dists/dream/dev.yml index 0451a44d05..a89f4411eb 100755 --- a/assistant_dists/dream/dev.yml +++ b/assistant_dists/dream/dev.yml @@ -267,6 +267,7 @@ services: volumes: - "./common:/src/common" - "./annotators/combined_classification:/src" + - "~/.deeppavlov:/root/.deeppavlov" ports: - 8087:8087 midas-classification: diff --git a/common/factoid.py b/common/factoid.py index 9148cef68d..70f106ecc9 100644 --- a/common/factoid.py +++ b/common/factoid.py @@ -1,6 +1,7 @@ import re FACTOID_NOTSURE_CONFIDENCE = 0.1 +FACTOID_THRESHOLD = 0.5 FACT_REGEXP = re.compile(r"fact about", re.IGNORECASE) WHAT_REGEXP = re.compile( diff --git a/common/universal_templates.py b/common/universal_templates.py index ddf1c02cd5..31d3d4f3f8 100644 --- a/common/universal_templates.py +++ b/common/universal_templates.py @@ -348,10 +348,17 @@ def if_switch_topic(uttr): def book_movie_music_found(annotated_uttr): - cobot_dialogacts = set(get_topics(annotated_uttr, which="cobot_dialogact_topics")) - named_cobot_dialogacts = {"Entertainment_Books", "Entertainment_Movies", "Entertainment_Music"} - dialogact_met = len(named_cobot_dialogacts & cobot_dialogacts) > 0 - return dialogact_met + topics = set(get_topics(annotated_uttr, which="all")) + target_topics = { + "Entertainment_Books", + "Books&Literature", + "Movies_TV", + "Entertainment_Movies", + "Music", + "Entertainment_Music", + } + target_topic_met = len(target_topics & topics) > 0 + return target_topic_met def is_switch_topic(annotated_uttr): diff --git a/common/utils.py b/common/utils.py index 019dcc2875..a0fcbaef1a 100644 --- a/common/utils.py +++ b/common/utils.py @@ -5,6 +5,7 @@ from random import choice from common.custom_requests import request_triples_wikidata +from common.factoid import FACTOID_THRESHOLD import sentry_sdk logger = logging.getLogger(__name__) @@ -108,9 +109,9 @@ low_priority_intents = {"dont_understand", "what_time", "choose_topic"} -combined_classes = { - "factoid_classification": ["is_factoid", "is_conversational"], - "emotion_classification": ["anger", "fear", "joy", "love", "sadness", "surprise", "neutral"], +combined_classes = { # ORDER MATTERS!!!! DO NOT CHANGE IT!!!! + "emotion_classification": ["anger", "fear", "joy", "disgust", "sadness", "surprise", "neutral"], + "sentiment_classification": ["positive", "neutral", "negative"], "toxic_classification": [ "identity_hate", "insult", @@ -119,9 +120,60 @@ "sexual_explicit", "threat", "toxic", - "not_toxic", ], - "sentiment_classification": ["positive", "negative", "neutral"], + "factoid_classification": ["is_factoid", "is_conversational"], + "midas_classification": [ + "open_question_factual", + "open_question_opinion", + "open_question_personal", + "yes_no_question", + "clarifying_question", + "command", + "dev_command", + "appreciation", + "opinion", + "complaint", + "comment", + "statement", + "other_answers", + "pos_answer", + "neg_answer", + ], + "deeppavlov_topics": [ + "Food", + "Books&Literature", + "Music", + "Gadgets", + "Movies_TV", + "Leisure", + "Beauty", + "Clothes", + "Travel", + "News", + "Art&Hobbies", + "Videogames", + "Job", + "Home&Design", + "Depression", + "Celebrities&Events", + "Politics", + "Toys&Games", + "Animals&Pets", + "PersonalTransport", + "Garden", + "Family&Relationships", + "Health&Medicine", + "Religion", + "ArtificialIntelligence", + "Finance", + "Space", + "Disasters", + "Science_and_Technology", + "Psychology", + "MassTransit", + "Education", + "Sports", + ], "cobot_topics": [ "Phatic", "Other", @@ -174,6 +226,34 @@ ], } +multilabel_tasks = [ + "emotion_classification", + "toxic_classification", +] + +dp_thresholds = { + "Food": 0, + "Movies_TV": 0, + "Leisure": 0, + "Beauty": 0, + "Clothes": 0, + "Depression": 0, + "Celebrities&Events": 0, + "Family&Relationships": 0, + "Health&Medicine": 0, + "Education": 0, + "Sports": 0, + "Books&Literature": 0.3, + "Videogames": 0.3, + "Politics": 0.3, + "ArtificialIntelligence": 0.3, + "MassTransit": 0.3, +} + +thresholds = { + "deeppavlov_topics": {class_: dp_thresholds.get(class_, 0.9) for class_ in combined_classes["deeppavlov_topics"]} +} + midas_classes = { "semantic_request": { "question": [ @@ -491,9 +571,15 @@ def get_all_not_used_templates(used_templates, all_templates): def _probs_to_labels(answer_probs, max_proba=True, threshold=0.5): - answer_labels = [label for label in answer_probs if answer_probs[label] > threshold] - if not answer_labels and max_proba: - answer_labels = [key for key in answer_probs if answer_probs[key] == max(answer_probs.values())] + if isinstance(threshold, dict): + assert len(threshold) == len(answer_probs), f"{threshold} {answer_probs}" + answer_labels = [key for key in answer_probs if answer_probs[key] > threshold[key]] + if max_proba: + answer_labels = [key for key in answer_labels if answer_probs[key] == max(answer_probs.values())] + else: + answer_labels = [label for label in answer_probs if answer_probs[label] > threshold] + if not answer_labels and max_proba: + answer_labels = [key for key in answer_probs if answer_probs[key] == max(answer_probs.values())] return answer_labels @@ -507,7 +593,7 @@ def _labels_to_probs(answer_labels, all_labels): return answer_probs -def _get_combined_annotations(annotated_utterance, model_name): +def _get_combined_annotations(annotated_utterance, model_name, threshold=0.5): answer_probs, answer_labels = {}, [] try: annotations = annotated_utterance["annotations"] @@ -517,11 +603,18 @@ def _get_combined_annotations(annotated_utterance, model_name): if model_name in combined_annotations: answer_probs = combined_annotations[model_name] else: - raise Exception(f"Not found Model name {model_name} in combined annotations {combined_annotations}") - if model_name == "toxic_classification" and "factoid_classification" not in combined_annotations: - answer_labels = _probs_to_labels(answer_probs, max_proba=False, threshold=0.5) + logger.warning(f"Not found Model name {model_name} in combined annotations {combined_annotations}") + old_style_toxic = all( + [model_name == "toxic_classification", "factoid_classification" not in combined_annotations] + ) + if model_name in multilabel_tasks or old_style_toxic: + answer_labels = _probs_to_labels(answer_probs, max_proba=False, threshold=threshold) + elif model_name == "factoid_classification" and answer_probs.get("is_factoid", 0) < threshold: + answer_labels = ["is_conversational"] + elif model_name == "deeppavlov_topics": + answer_labels = _probs_to_labels(answer_probs, max_proba=True, threshold=thresholds["deeppavlov_topics"]) else: - answer_labels = _probs_to_labels(answer_probs, max_proba=True, threshold=0.5) + answer_labels = _probs_to_labels(answer_probs, max_proba=True, threshold=threshold) except Exception as e: sentry_sdk.capture_exception(e) logger.exception(e) @@ -553,7 +646,7 @@ def _process_old_sentiment(answer): return answer -def _get_plain_annotations(annotated_utterance, model_name): +def _get_plain_annotations(annotated_utterance, model_name, threshold=0.5): answer_probs, answer_labels = {}, [] try: annotations = annotated_utterance["annotations"] @@ -563,7 +656,7 @@ def _get_plain_annotations(annotated_utterance, model_name): if isinstance(answer, list): if model_name == "sentiment_classification": answer_probs = _process_old_sentiment(answer) - answer_labels = _probs_to_labels(answer_probs, max_proba=True, threshold=0.5) + answer_labels = _probs_to_labels(answer_probs, max_proba=True, threshold=threshold) else: answer_labels = answer answer_probs = _labels_to_probs(answer_labels, combined_classes[model_name]) @@ -571,9 +664,11 @@ def _get_plain_annotations(annotated_utterance, model_name): answer_probs = answer if model_name == "toxic_classification": # this function is only for plain annotations (when toxic_classification is a separate annotator) - answer_labels = _probs_to_labels(answer_probs, max_proba=False, threshold=0.5) + answer_labels = _probs_to_labels(answer_probs, max_proba=False, threshold=threshold) + elif model_name == "factoid_classification" and answer_probs.get("is_factoid", 0) < threshold: + answer_labels = ["is_conversational"] else: - answer_labels = _probs_to_labels(answer_probs, max_proba=True, threshold=0.5) + answer_labels = _probs_to_labels(answer_probs, max_proba=True, threshold=threshold) except Exception as e: logger.warning(e) @@ -589,22 +684,28 @@ def print_combined(combined_output): logger.info(f"Combined classifier output is {combined_output}") -def _get_etc_model(annotated_utterance, model_name, probs, default_probs, default_labels): +def _get_etc_model(annotated_utterance, model_name, probs, default_probs, default_labels, threshold=0.5): """Function to get emotion classifier annotations from annotated utterance. Args: annotated_utterance: dictionary with annotated utterance, or annotations probs: return probabilities or not - default: default value to return. If it is None, returns empty dict/list depending on probs argument + default_probs: default probs to return. + default_labels: default labels to return. + Threshold: threshold for classification Returns: dictionary with emotion probablilties, if probs == True, or emotion labels if probs != True """ try: if model_name in annotated_utterance.get("annotations", {}): - answer_probs, answer_labels = _get_plain_annotations(annotated_utterance, model_name=model_name) + answer_probs, answer_labels = _get_plain_annotations( + annotated_utterance, model_name=model_name, threshold=threshold + ) elif "combined_classification" in annotated_utterance.get("annotations", {}): - answer_probs, answer_labels = _get_combined_annotations(annotated_utterance, model_name=model_name) + answer_probs, answer_labels = _get_combined_annotations( + annotated_utterance, model_name=model_name, threshold=threshold + ) else: answer_probs, answer_labels = default_probs, default_labels except Exception as e: @@ -655,6 +756,7 @@ def get_factoid(annotated_utterance, probs=True, default_probs=None, default_lab probs=probs, default_probs=default_probs, default_labels=default_labels, + threshold=FACTOID_THRESHOLD, ) @@ -708,7 +810,6 @@ def get_emotions(annotated_utterance, probs=True, default_probs=None, default_la def get_topics(annotated_utterance, probs=False, default_probs=None, default_labels=None, which="all"): """Function to get topics from particular annotator or all detected. - Args: annotated_utterance: dictionary with annotated utterance probs: if False we return labels, otherwise we return probs @@ -718,6 +819,7 @@ def get_topics(annotated_utterance, probs=False, default_probs=None, default_lab 'all' means topics by `cobot_topics` and `cobot_dialogact_topics`, 'cobot_topics' means topics by `cobot_topics`, 'cobot_dialogact_topics' means topics by `cobot_dialogact_topics`. + 'deeppavlov_topics' means topics by `deeppavlov_topics`. Returns: list of topic labels, if probs == False, @@ -729,7 +831,6 @@ def get_topics(annotated_utterance, probs=False, default_probs=None, default_lab cobot_topics_probs, cobot_topics_labels = {}, [] if "cobot_topics" in annotations: cobot_topics_labels = _process_text(annotations.get("cobot_topics", {})) - cobot_topics_probs = _labels_to_probs(cobot_topics_labels, combined_classes.get("cobot_topics", {})) if "combined_classification" in annotations and not cobot_topics_labels: cobot_topics_probs, cobot_topics_labels = _get_combined_annotations( annotated_utterance, model_name="cobot_topics" @@ -740,9 +841,9 @@ def get_topics(annotated_utterance, probs=False, default_probs=None, default_lab cobot_da_topics_probs, cobot_da_topics_labels = {}, [] if "cobot_dialogact" in annotations and "topics" in annotations["cobot_dialogact"]: - cobot_da_topics_labels = annotated_utterance["annotations"]["cobot_dialogact"]["topics"] + cobot_da_topics_labels = annotations["cobot_dialogact"]["topics"] elif "cobot_dialogact_topics" in annotations: - cobot_da_topics_labels = annotated_utterance["annotations"]["cobot_dialogact_topics"] + cobot_da_topics_labels = annotations["cobot_dialogact_topics"] if "combined_classification" in annotations and not cobot_da_topics_labels: cobot_da_topics_probs, cobot_da_topics_labels = _get_combined_annotations( @@ -752,13 +853,20 @@ def get_topics(annotated_utterance, probs=False, default_probs=None, default_lab if not cobot_da_topics_probs: cobot_da_topics_probs = _labels_to_probs(cobot_da_topics_labels, combined_classes["cobot_dialogact_topics"]) + dp_topics_probs, dp_topics_labels = {}, [] + if "combined_classification" in annotations and not dp_topics_labels: + dp_topics_probs, dp_topics_labels = _get_combined_annotations( + annotated_utterance, model_name="deeppavlov_topics" + ) if which == "all": - answer_labels = cobot_topics_labels + cobot_da_topics_labels - answer_probs = {**cobot_topics_probs, **cobot_da_topics_probs} + answer_labels = cobot_topics_labels + cobot_da_topics_labels + dp_topics_labels + answer_probs = {**cobot_topics_probs, **cobot_da_topics_probs, **dp_topics_probs} elif which == "cobot_topics": answer_probs, answer_labels = cobot_topics_probs, cobot_topics_labels elif which == "cobot_dialogact_topics": answer_probs, answer_labels = cobot_da_topics_probs, cobot_da_topics_labels + elif which == "deeppavlov_topics": + answer_probs, answer_labels = dp_topics_probs, dp_topics_labels else: logger.exception(f"Unknown input type in get_topics: {which}") answer_probs, answer_labels = default_probs, default_labels @@ -771,7 +879,6 @@ def get_topics(annotated_utterance, probs=False, default_probs=None, default_lab def get_intents(annotated_utterance, probs=False, default_probs=None, default_labels=None, which="all"): """Function to get intents from particular annotator or all detected. - Args: annotated_utterance: dictionary with annotated utterance probs: if False we return labels, otherwise we return probs @@ -793,8 +900,11 @@ def get_intents(annotated_utterance, probs=False, default_probs=None, default_la intents = annotations.get("intent_catcher", {}) detected_intents = [k for k, v in intents.items() if v.get("detected", 0) == 1] detected_intent_probs = {key: 1 for key in detected_intents} - midas_intent_probs = annotations.get("midas_classification", {}) + if "combined_classification" in annotations and not midas_intent_probs: + midas_intent_probs, midas_intent_labels = _get_combined_annotations( + annotated_utterance, model_name="midas_classification" + ) if isinstance(midas_intent_probs, dict) and midas_intent_probs: semantic_midas_probs = {k: v for k, v in midas_intent_probs.items() if k in MIDAS_SEMANTIC_LABELS} functional_midas_probs = {k: v for k, v in midas_intent_probs.items() if k in MIDAS_FUNCTIONAL_LABELS} diff --git a/dev_requirements.txt b/dev_requirements.txt index 23d0547c37..91b1f6e9d0 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,5 +1,5 @@ sphinx==2.1.2 recommonmark==0.5.0 -sphinx_rtd_theme +sphinx_rtd_theme==1.0.0 Pygments==2.4.2 -flake8 \ No newline at end of file +flake8 diff --git a/response_selectors/convers_evaluation_based_selector/test_data.json b/response_selectors/convers_evaluation_based_selector/test_data.json index 315cbbb3c2..c08ed424ff 100644 --- a/response_selectors/convers_evaluation_based_selector/test_data.json +++ b/response_selectors/convers_evaluation_based_selector/test_data.json @@ -42,7 +42,7 @@ "anger": 0.2401413470506668, "fear": 0.2673250734806061, "joy": 0.25821077823638916, - "love": 0.3577530086040497, + "disgust": 0.3577530086040497, "sadness": 0.21969465911388397, "surprise": 0.2795693576335907, "neutral": 0.9991581439971924 @@ -366,7 +366,7 @@ "anger": 0.2401413470506668, "fear": 0.2673250734806061, "joy": 0.25821077823638916, - "love": 0.3577530086040497, + "disgust": 0.3577530086040497, "sadness": 0.21969465911388397, "surprise": 0.2795693576335907, "neutral": 0.9991581439971924 diff --git a/services/wiki_facts/requirements.txt b/services/wiki_facts/requirements.txt index 3b916aa9b7..337c9e4e48 100644 --- a/services/wiki_facts/requirements.txt +++ b/services/wiki_facts/requirements.txt @@ -8,4 +8,4 @@ jinja2<=3.0.3 Werkzeug<=2.0.3 cryptography==2.8 inflect==5.3.0 -blinker==1.5.0 \ No newline at end of file +blinker==1.5.0 diff --git a/skill_selectors/rule_based_selector/connector.py b/skill_selectors/rule_based_selector/connector.py index 919c97e655..b1f7c79e8b 100644 --- a/skill_selectors/rule_based_selector/connector.py +++ b/skill_selectors/rule_based_selector/connector.py @@ -54,7 +54,7 @@ async def send(self, payload: Dict, callback: Callable): detected_topics = set(get_topics(user_uttr, which="all")) - is_factoid = get_factoid(user_uttr).get("is_factoid", 0.0) > 0.96 + is_factoid = "is_factoid" in get_factoid(user_uttr, probs=False) is_celebrity_mentioned = check_is_celebrity_mentioned(user_uttr) if_choose_topic_detected = if_choose_topic(user_uttr, bot_uttr) diff --git a/skills/dff_animals_skill/requirements.txt b/skills/dff_animals_skill/requirements.txt index d3f78c1587..4adbf2f2b9 100644 --- a/skills/dff_animals_skill/requirements.txt +++ b/skills/dff_animals_skill/requirements.txt @@ -1,2 +1,2 @@ inflect==5.3.0 -spacy==3.0.6 \ No newline at end of file +spacy==3.0.6 diff --git a/skills/dff_bot_persona_skill/requirements.txt b/skills/dff_bot_persona_skill/requirements.txt index 08b20d98bc..2a4be4471e 100644 --- a/skills/dff_bot_persona_skill/requirements.txt +++ b/skills/dff_bot_persona_skill/requirements.txt @@ -1 +1 @@ -spacy==3.0.6 \ No newline at end of file +spacy==3.0.6 diff --git a/skills/dff_food_skill/requirements.txt b/skills/dff_food_skill/requirements.txt index 08b20d98bc..2a4be4471e 100644 --- a/skills/dff_food_skill/requirements.txt +++ b/skills/dff_food_skill/requirements.txt @@ -1 +1 @@ -spacy==3.0.6 \ No newline at end of file +spacy==3.0.6 diff --git a/skills/dff_movie_skill/dialogflows/flows/templates.py b/skills/dff_movie_skill/dialogflows/flows/templates.py index dc72fab5bd..1748e67c11 100644 --- a/skills/dff_movie_skill/dialogflows/flows/templates.py +++ b/skills/dff_movie_skill/dialogflows/flows/templates.py @@ -10,7 +10,7 @@ from common.movies import extract_movies_names_from_annotations from common.universal_templates import LIKE_PATTERN, NOT_LIKE_PATTERN -from common.utils import get_intents, is_opinion_request +from common.utils import get_intents, is_opinion_request, midas_classes logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) @@ -397,11 +397,14 @@ def faq(self, dialog): result = [] user_uttr = dialog["human_utterances"][-1]["text"].lower() - intents = get_intents(dialog["human_utterances"][-1], which="cobot_dialogact_intents") + intents = get_intents(dialog["human_utterances"][-1], which="all") opinion_request_detected = is_opinion_request(dialog["human_utterances"][-1]) # favorite movies - if "Information_RequestIntent" in intents or opinion_request_detected: + information_request_detected = any( + [set(midas_classes["semantic_request"]["question"]) & set(intents), "Information_RequestIntent" in intents] + ) + if information_request_detected or opinion_request_detected: if re.search(self.LESSFAVORITE_PATTERN, user_uttr) or re.search(NOT_LIKE_PATTERN, user_uttr): # less favorite movie if re.search( diff --git a/skills/dff_sport_skill/requirements.txt b/skills/dff_sport_skill/requirements.txt index c0a41fa4ce..606c1f781f 100644 --- a/skills/dff_sport_skill/requirements.txt +++ b/skills/dff_sport_skill/requirements.txt @@ -1,3 +1,3 @@ numpy==1.17.2 jinja2<=3.0.3 -Werkzeug<=2.0.3 \ No newline at end of file +Werkzeug<=2.0.3 diff --git a/skills/dff_wiki_skill/requirements.txt b/skills/dff_wiki_skill/requirements.txt index d3f78c1587..4adbf2f2b9 100644 --- a/skills/dff_wiki_skill/requirements.txt +++ b/skills/dff_wiki_skill/requirements.txt @@ -1,2 +1,2 @@ inflect==5.3.0 -spacy==3.0.6 \ No newline at end of file +spacy==3.0.6 diff --git a/skills/dummy_skill_dialog/test_dialog.json b/skills/dummy_skill_dialog/test_dialog.json index 31879d6fa9..a3ae072e36 100644 --- a/skills/dummy_skill_dialog/test_dialog.json +++ b/skills/dummy_skill_dialog/test_dialog.json @@ -43,7 +43,7 @@ "anger": 0.24306881427764893, "fear": 0.26745349168777466, "joy": 0.2533573806285858, - "love": 0.3893212378025055, + "disgust": 0.3893212378025055, "sadness": 0.22046224772930145, "surprise": 0.26883628964424133, "neutral": 0.999222993850708 @@ -267,7 +267,7 @@ "anger": 0.2731664180755615, "fear": 0.2290324866771698, "joy": 0.30796509981155396, - "love": 0.3771042227745056, + "disgust": 0.3771042227745056, "sadness": 0.20978295803070068, "surprise": 0.24185597896575928, "neutral": 0.9992513060569763 @@ -634,7 +634,7 @@ "anger": 0.34788748621940613, "fear": 0.25068363547325134, "joy": 0.5115033388137817, - "love": 0.22743268311023712, + "disgust": 0.22743268311023712, "sadness": 0.3952408730983734, "surprise": 0.109250009059906, "neutral": 0.9980328679084778 @@ -1028,7 +1028,7 @@ "anger": 0.4007805585861206, "fear": 0.2774290144443512, "joy": 0.3479142487049103, - "love": 0.18069173395633698, + "disgust": 0.18069173395633698, "sadness": 0.17806752026081085, "surprise": 0.36261337995529175, "neutral": 0.9987896084785461 @@ -1394,7 +1394,7 @@ "anger": 0.6923832893371582, "fear": 0.08624736964702606, "joy": 0.9737956523895264, - "love": 0.4466567635536194, + "disgust": 0.4466567635536194, "sadness": 0.6964444518089294, "surprise": 0.025097211822867393, "neutral": 0.7845231294631958 @@ -1822,7 +1822,7 @@ "anger": 0.3024616539478302, "fear": 0.22481660544872284, "joy": 0.6362035274505615, - "love": 0.242213636636734, + "disgust": 0.242213636636734, "sadness": 0.28126445412635803, "surprise": 0.12868888676166534, "neutral": 0.9980267882347107 @@ -2268,7 +2268,7 @@ "anger": 0.39962050318717957, "fear": 0.2144581526517868, "joy": 0.3991815149784088, - "love": 0.27542662620544434, + "disgust": 0.27542662620544434, "sadness": 0.2945675551891327, "surprise": 0.14458975195884705, "neutral": 0.9990655779838562 @@ -2666,7 +2666,7 @@ "anger": 0.30681750178337097, "fear": 0.21370574831962585, "joy": 0.423123300075531, - "love": 0.3117046654224396, + "disgust": 0.3117046654224396, "sadness": 0.3091293275356293, "surprise": 0.1595301628112793, "neutral": 0.9989328980445862 @@ -3000,7 +3000,7 @@ "anger": 0.24859990179538727, "fear": 0.16168983280658722, "joy": 0.34527307748794556, - "love": 0.5824165344238281, + "disgust": 0.5824165344238281, "sadness": 0.18655908107757568, "surprise": 0.23074844479560852, "neutral": 0.9989860653877258 @@ -3310,7 +3310,7 @@ "anger": 0.3416433334350586, "fear": 0.16034436225891113, "joy": 0.6926525235176086, - "love": 0.32836219668388367, + "disgust": 0.32836219668388367, "sadness": 0.3493943214416504, "surprise": 0.10167970508337021, "neutral": 0.9974168539047241 @@ -3652,7 +3652,7 @@ "anger": 0.9946691393852234, "fear": 0.2611047923564911, "joy": 0.18044663965702057, - "love": 0.06932078301906586, + "disgust": 0.06932078301906586, "sadness": 0.8477791547775269, "surprise": 0.08949814736843109, "neutral": 0.3594503700733185 @@ -3900,7 +3900,7 @@ "anger": 0.24306881427764893, "fear": 0.26745349168777466, "joy": 0.2533573806285858, - "love": 0.3893212378025055, + "disgust": 0.3893212378025055, "sadness": 0.22046224772930145, "surprise": 0.26883628964424133, "neutral": 0.999222993850708 @@ -4089,7 +4089,7 @@ "anger": 0.2731664180755615, "fear": 0.2290324866771698, "joy": 0.30796509981155396, - "love": 0.3771042227745056, + "disgust": 0.3771042227745056, "sadness": 0.20978295803070068, "surprise": 0.24185597896575928, "neutral": 0.9992513060569763 @@ -4338,7 +4338,7 @@ "anger": 0.34788748621940613, "fear": 0.25068363547325134, "joy": 0.5115033388137817, - "love": 0.22743268311023712, + "disgust": 0.22743268311023712, "sadness": 0.3952408730983734, "surprise": 0.109250009059906, "neutral": 0.9980328679084778 @@ -4628,7 +4628,7 @@ "anger": 0.4007805585861206, "fear": 0.2774290144443512, "joy": 0.3479142487049103, - "love": 0.18069173395633698, + "disgust": 0.18069173395633698, "sadness": 0.17806752026081085, "surprise": 0.36261337995529175, "neutral": 0.9987896084785461 @@ -4900,7 +4900,7 @@ "anger": 0.6923832893371582, "fear": 0.08624736964702606, "joy": 0.9737956523895264, - "love": 0.4466567635536194, + "disgust": 0.4466567635536194, "sadness": 0.6964444518089294, "surprise": 0.025097211822867393, "neutral": 0.7845231294631958 @@ -5180,7 +5180,7 @@ "anger": 0.3024616539478302, "fear": 0.22481660544872284, "joy": 0.6362035274505615, - "love": 0.242213636636734, + "disgust": 0.242213636636734, "sadness": 0.28126445412635803, "surprise": 0.12868888676166534, "neutral": 0.9980267882347107 @@ -5488,7 +5488,7 @@ "anger": 0.39962050318717957, "fear": 0.2144581526517868, "joy": 0.3991815149784088, - "love": 0.27542662620544434, + "disgust": 0.27542662620544434, "sadness": 0.2945675551891327, "surprise": 0.14458975195884705, "neutral": 0.9990655779838562 @@ -5800,7 +5800,7 @@ "anger": 0.30681750178337097, "fear": 0.21370574831962585, "joy": 0.423123300075531, - "love": 0.3117046654224396, + "disgust": 0.3117046654224396, "sadness": 0.3091293275356293, "surprise": 0.1595301628112793, "neutral": 0.9989328980445862 @@ -6071,7 +6071,7 @@ "anger": 0.24859990179538727, "fear": 0.16168983280658722, "joy": 0.34527307748794556, - "love": 0.5824165344238281, + "disgust": 0.5824165344238281, "sadness": 0.18655908107757568, "surprise": 0.23074844479560852, "neutral": 0.9989860653877258 @@ -6323,7 +6323,7 @@ "anger": 0.3416433334350586, "fear": 0.16034436225891113, "joy": 0.6926525235176086, - "love": 0.32836219668388367, + "disgust": 0.32836219668388367, "sadness": 0.3493943214416504, "surprise": 0.10167970508337021, "neutral": 0.9974168539047241 diff --git a/skills/emotion_skill/data/data.json b/skills/emotion_skill/data/data.json index 91af2b3d3f..16cac81a1c 100644 --- a/skills/emotion_skill/data/data.json +++ b/skills/emotion_skill/data/data.json @@ -102,13 +102,20 @@ "next_step":["offered_advice"], "link": "" }, - "love":{ + "disgust":{ "answers": [ - "Oh, you are in a lovely mood, I see. That's cool. I love some drama movies a lot." + "I see that you are in disgust. I am sorry. Maybe we should talk about movies instead?" ], "next_step":[""], "link": "dff_movie_skill" }, + "surprise":{ + "answers": [ + "I see that you are surprised." + ], + "next_step":[""], + "link": "" + }, "anger":{ "answers": [ "Frequent feelings of displeasure have been linked to higher risk of heart disease, high blood pressure, depression and difficulty sleeping. " diff --git a/skills/emotion_skill/scenario.py b/skills/emotion_skill/scenario.py index 35345e1eb3..0fd4e35b69 100644 --- a/skills/emotion_skill/scenario.py +++ b/skills/emotion_skill/scenario.py @@ -38,7 +38,7 @@ def __init__(self, steps, jokes, advices, logger): "anger": 0.9, "fear": 0.8, "joy": 0.8, - "love": 0.8, + "disgust": 0.8, "sadness": 0.95, "surprise": 0.8, "neutral": 0, @@ -47,7 +47,7 @@ def __init__(self, steps, jokes, advices, logger): "anger": 0.99999, "fear": 0.5, "joy": 0.5, - "love": 0.5, + "disgust": 0.5, "sadness": 0.6, "surprise": 0.5, "neutral": 0.5, @@ -114,7 +114,7 @@ def _get_reply_and_conf(self, annotated_user_phrase, bot_phrase, emotion, emotio "fear": "fear", "anger": "anger", "surprise": "surprise", - "love": "love", + "disgust": "disgust", } if "emotion_skill" not in human_attr: human_attr["emotion_skill"] = {} diff --git a/skills/emotion_skill/test.py b/skills/emotion_skill/test.py index 5598de6f04..9d1faac42d 100644 --- a/skills/emotion_skill/test.py +++ b/skills/emotion_skill/test.py @@ -24,7 +24,7 @@ def make_input_data(curr_sent, bot_sent, human_attributes=None, emotion=None, in "anger": 0, "fear": 0, "joy": 0, - "love": 0, + "disgust": 0, "sadness": 0.9, "surprise": 0, "neutral": 0, diff --git a/skills/emotion_skill/tests.json b/skills/emotion_skill/tests.json index 0e75172642..201cbe0817 100644 --- a/skills/emotion_skill/tests.json +++ b/skills/emotion_skill/tests.json @@ -6,7 +6,7 @@ "no": {"confidence": 0.0, "detected": 0}, "yes": {"confidence": 0.0, "detected": 0} }, - "emotion": {"anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, + "emotion": {"anger": 0, "fear": 0, "joy": 0, "disgust": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, "results":{"text":""} }, { @@ -16,7 +16,7 @@ "no": {"confidence": 0.0, "detected": 0}, "yes": {"confidence": 0.0, "detected": 0} }, - "emotion": {"anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0.9, "surprise": 0, "neutral": 0}, + "emotion": {"anger": 0, "fear": 0, "joy": 0, "disgust": 0, "sadness": 0.9, "surprise": 0, "neutral": 0}, "results":{"state":["sad_and_lonely"]} }, { @@ -27,7 +27,7 @@ "yes": {"confidence": 0.0, "detected": 0} }, "human_attributes":{"emotion_skill_attributes":{"state":"sad_and_lonely", "emotion":"sadness"}}, - "emotion": {"anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0, "surprise": 0, "neutral": 0}, + "emotion": {"anger": 0, "fear": 0, "joy": 0, "disgust": 0, "sadness": 0, "surprise": 0, "neutral": 0}, "results":{"state":["sad_and_lonely_2"]} }, { @@ -38,7 +38,7 @@ "yes": {"confidence": 0.0, "detected": 0} }, "human_attributes":{"emotion_skill_attributes":{"state":"sad_and_lonely_2", "emotion":"sadness"}}, - "emotion": {"anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0, "surprise": 0, "neutral": 0}, + "emotion": {"anger": 0, "fear": 0, "joy": 0, "disgust": 0, "sadness": 0, "surprise": 0, "neutral": 0}, "results":{"state":["offer_advice"]} }, { @@ -49,7 +49,7 @@ "yes": {"confidence": 1.0, "detected": 1} }, "human_attributes":{"emotion_skill_attributes":{"state":"offered_advice", "emotion":"sadness"}}, - "emotion": {"anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, + "emotion": {"anger": 0, "fear": 0, "joy": 0, "disgust": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, "results":{"state":["offer_another_advice"], "emotion":"sadness"} }, { @@ -60,7 +60,7 @@ "yes": {"confidence": 0.0, "detected": 0} }, "human_attributes":{"emotion_skill_attributes":{"state":"offer_another_advice", "emotion":"sadness"}}, - "emotion": {"anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, + "emotion": {"anger": 0, "fear": 0, "joy": 0, "disgust": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, "results":{"state":["offered_advice"], "emotion":"sadness"} }, { @@ -71,7 +71,7 @@ "yes": {"confidence": 0.0, "detected": 0} }, "human_attributes":{"emotion_skill_attributes":{"state":"offered_advice", "emotion":"sadness"}}, - "emotion": {"anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, + "emotion": {"anger": 0, "fear": 0, "joy": 0, "disgust": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, "results":{"state":[""], "emotion":"sadness"} }, { @@ -82,7 +82,7 @@ "yes": {"confidence": 0.0, "detected": 0} }, "human_attributes":{"emotion_skill_attributes":{"state": "joy_link_to_joke", "emotion":"joy"}}, - "emotion": {"anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, + "emotion": {"anger": 0, "fear": 0, "joy": 0, "disgust": 0, "sadness": 0.0, "surprise": 0, "neutral": 0}, "results":{"states":["offered_joke", ""], "emotion":"joy"} } ] diff --git a/skills/factoid_qa/requirements.txt b/skills/factoid_qa/requirements.txt index 2995c1500f..eef5f4a646 100644 --- a/skills/factoid_qa/requirements.txt +++ b/skills/factoid_qa/requirements.txt @@ -9,4 +9,4 @@ spacy==2.2.3 https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz#egg=en_core_web_sm==2.2.5 jinja2<=3.0.3 Werkzeug<=2.0.3 -importlib-metadata<5.0 \ No newline at end of file +importlib-metadata<5.0 diff --git a/skills/factoid_qa/server.py b/skills/factoid_qa/server.py index e06881c019..860b65a72a 100644 --- a/skills/factoid_qa/server.py +++ b/skills/factoid_qa/server.py @@ -28,7 +28,6 @@ use_annotators_output = True FACTOID_DEFAULT_CONFIDENCE = 0.99 # otherwise dummy often beats it ASKED_ABOUT_FACT_PROB = 0.99 -FACTOID_CLASS_THRESHOLD = 0.5 templates_dict = json.load(open("templates_dict.json", "r")) @@ -249,8 +248,7 @@ def respond(): names = [j for j in names + probable_subjects if j in fact_dict.keys()] names = list(set(names)) nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) - factoid_conf = get_factoid(uttr) - is_factoid_cls = factoid_conf.get("is_factoid", 0.0) > 0.9 + is_factoid_cls = "is_factoid" in get_factoid(uttr, probs=False) is_factoid = is_factoid_cls and (names or nounphrases) and check_factoid(last_phrase) is_factoid_sents.append(is_factoid) ner_outputs_to_classify.append(names) diff --git a/skills/faq_skill_deepy/requirements.txt b/skills/faq_skill_deepy/requirements.txt index c7b542f3fd..333cd5aa73 100644 --- a/skills/faq_skill_deepy/requirements.txt +++ b/skills/faq_skill_deepy/requirements.txt @@ -9,4 +9,5 @@ deeppavlov==0.14.0 https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz#egg=en_core_web_sm==2.2.5 jinja2<=3.0.3 Werkzeug<=2.0.3 -cryptography==2.8 \ No newline at end of file +cryptography==2.8 +importlib_metadata<5 diff --git a/skills/game_cooperative_skill/requirements.txt b/skills/game_cooperative_skill/requirements.txt index 92900666d7..6f2506538d 100644 --- a/skills/game_cooperative_skill/requirements.txt +++ b/skills/game_cooperative_skill/requirements.txt @@ -3,8 +3,8 @@ flask==1.1.1 itsdangerous==2.0.1 gunicorn==19.9.0 healthcheck==1.3.3 -numpy==1.18.2 +numpy==1.17.2 programy==4.3 requests==2.22.0 jinja2<=3.0.3 -Werkzeug<=2.0.3 \ No newline at end of file +Werkzeug<=2.0.3 diff --git a/skills/game_cooperative_skill/skills/skill_scorer/skill.py b/skills/game_cooperative_skill/skills/skill_scorer/skill.py index 1eb38c4f2a..f3dc034291 100644 --- a/skills/game_cooperative_skill/skills/skill_scorer/skill.py +++ b/skills/game_cooperative_skill/skills/skill_scorer/skill.py @@ -19,7 +19,7 @@ def get_emo_scores(utterances): "anger": 0.0, "fear": 0.0, "joy": 0.0, - "love": 0.0, + "disgust": 0.0, "neutral": 0.0, "sadness": 0.0, "surprise": 0.0, diff --git a/skills/knowledge_grounding_skill/test_configs/test_no_annotations.json b/skills/knowledge_grounding_skill/test_configs/test_no_annotations.json index 16bf98ac04..429442277f 100644 --- a/skills/knowledge_grounding_skill/test_configs/test_no_annotations.json +++ b/skills/knowledge_grounding_skill/test_configs/test_no_annotations.json @@ -40,7 +40,7 @@ "anger": 0.2747260630130768, "fear": 0.2630036473274231, "joy": 0.27582406997680664, - "love": 0.34568631649017334, + "disgust": 0.34568631649017334, "sadness": 0.23109346628189087, "surprise": 0.23477131128311157, "neutral": 0.9991804957389832 @@ -458,7 +458,7 @@ "anger": 0.26627522706985474, "fear": 0.23377177119255066, "joy": 0.27491524815559387, - "love": 0.3455559313297272, + "disgust": 0.3455559313297272, "sadness": 0.23781153559684753, "surprise": 0.2659085988998413, "neutral": 0.9992526173591614 @@ -640,7 +640,7 @@ "anger": 0.2747260630130768, "fear": 0.2630036473274231, "joy": 0.27582406997680664, - "love": 0.34568631649017334, + "disgust": 0.34568631649017334, "sadness": 0.23109346628189087, "surprise": 0.23477131128311157, "neutral": 0.9991804957389832 @@ -1017,7 +1017,7 @@ "anger": 0.26627522706985474, "fear": 0.23377177119255066, "joy": 0.27491524815559387, - "love": 0.3455559313297272, + "disgust": 0.3455559313297272, "sadness": 0.23781153559684753, "surprise": 0.2659085988998413, "neutral": 0.9992526173591614 @@ -1228,4 +1228,4 @@ "channel_type": "cmd_client", "date_start": "2020-04-06 15:41:19.672000", "date_finish": "2020-04-06 15:41:23.456000" -} \ No newline at end of file +} diff --git a/skills/meta_script_skill/requirements.txt b/skills/meta_script_skill/requirements.txt index 1467ae8e1f..564e44259e 100644 --- a/skills/meta_script_skill/requirements.txt +++ b/skills/meta_script_skill/requirements.txt @@ -5,6 +5,7 @@ requests==2.22.0 numpy==1.17.2 sentry-sdk==0.14.1 spacy==2.1.0 +importlib_metadata<5 nltk==3.2.5 jinja2<=3.0.3 -Werkzeug<=2.0.3 \ No newline at end of file +Werkzeug<=2.0.3