diff --git a/annotators/ConveRTBasedNLI/Dockerfile b/annotators/ConveRTBasedNLI/Dockerfile
new file mode 100644
index 0000000000..8257b6e737
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/Dockerfile
@@ -0,0 +1,25 @@
+FROM python:3.9.16-slim
+
+ARG CONVERT_URL=http://files.deeppavlov.ai/tmp/convert_model.tar.gz
+ARG NLI_URL=http://files.deeppavlov.ai/tmp/nli_model.tar.gz
+ARG TRAINED_MODEL_PATH
+ARG SERVICE_PORT
+
+ENV TRAINED_MODEL_PATH ${TRAINED_MODEL_PATH}
+ENV SERVICE_PORT ${SERVICE_PORT}
+
+RUN apt-get update && \
+    apt-get install -y --allow-unauthenticated wget && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY ${WORK_DIR}/requirements.txt /src/requirements.txt
+RUN pip install -r /src/requirements.txt
+COPY ${WORK_DIR} /src
+WORKDIR /src
+
+RUN mkdir /cache /data /data/nli_model/ /data/convert_model/
+RUN wget -c -q $NLI_URL -P /tmp/ && \
+    tar -xf /tmp/nli_model.tar.gz -C /data/nli_model/ && \
+    wget -c -q $CONVERT_URL -P /tmp/ && \
+    tar -xf /tmp/convert_model.tar.gz -C /data/convert_model/ && \
+    rm -rf /tmp/
\ No newline at end of file
diff --git a/annotators/ConveRTBasedNLI/README.md b/annotators/ConveRTBasedNLI/README.md
new file mode 100644
index 0000000000..5fa6bf60e4
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/README.md
@@ -0,0 +1,11 @@
+This model is designed to solve the Natural Language Inference problem.
+
+It consists of two parts:
+* [ConveRT model](https://arxiv.org/abs/1911.03688) that vectorizes the data
+* Custom model consisting from 4 linear layers
+
+The model was trained on the **Stanford Natural Language Inference** (SNLI) corpus that contains human-written English sentence pairs with the labels entailment, contradiction, and neutral. 
+
+Pre-trained model available [here](http://files.deeppavlov.ai/tmp/nli_model.tar.gz).
+
+If you want to train a model from scratch, just omit TRAINED_MODEL_PATH input argument or set it to _None_.
diff --git a/annotators/ConveRTBasedNLI/convert_annotator.py b/annotators/ConveRTBasedNLI/convert_annotator.py
new file mode 100644
index 0000000000..b65724919a
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/convert_annotator.py
@@ -0,0 +1,243 @@
+import os
+import logging
+import numpy as np
+import random
+
+from encoder import Encoder
+import tensorflow as tf
+import tensorflow_datasets as tfds
+
+
+seed = 1
+os.environ["PYTHONHASHSEED"] = str(seed)
+random.seed(seed)
+tf.random.set_seed(seed)
+np.random.seed(seed)
+
+logging.basicConfig(
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
+)
+logger = logging.getLogger(__name__)
+
+TRAINED_MODEL_PATH = os.environ.get("TRAINED_MODEL_PATH", None)
+
+
+def data_generation(file_path):
+    premise = np.load(file_path)["arr_0"][0]
+    hypothesis = np.load(file_path)["arr_0"][1]
+    label = np.load(file_path)["arr_1"]
+    label = label.reshape((len(label), 1))
+    return premise, hypothesis, label
+
+
+class DataGenerator(tf.compat.v2.keras.utils.Sequence):
+    def __init__(self, list_examples, shuffle=False):
+        self.list_examples = list_examples
+        self.shuffle = shuffle
+        self.indexes = None
+        self.on_epoch_end()
+
+    def __len__(self):
+        return len(self.list_examples)
+
+    def __getitem__(self, index):
+        pos = self.indexes[index]
+        premise, hypothesis, label = data_generation(self.list_examples[pos])
+
+        return [premise, hypothesis], label
+
+    def on_epoch_end(self):
+        self.indexes = np.arange(len(self.list_examples))
+        if self.shuffle:
+            np.random.shuffle(self.indexes)
+
+
+class ConveRTAnnotator:
+    def __init__(self):
+        self.encoder = Encoder()
+        self.model = None
+
+        if TRAINED_MODEL_PATH:
+            self.model_path = TRAINED_MODEL_PATH + "/model.h5"
+        else:
+            self.batch_size = 1024
+            self.__prepare_data()
+            self.__create_model()
+            self.__train_model()
+
+    def __prepare_data(self):
+        logger.info("The download of SNLI dataset has begun.")
+        snli_dataset = tfds.text.Snli()
+        snli_dataset.download_and_prepare(download_dir="/cache")
+
+        datasets = snli_dataset.as_dataset()
+        train_dataset, test_dataset, val_dataset = (
+            datasets["train"],
+            datasets["test"],
+            datasets["validation"],
+        )
+        val_dataset = val_dataset.batch(self.batch_size).prefetch(
+            tf.data.experimental.AUTOTUNE
+        )
+        test_dataset = test_dataset.batch(self.batch_size).prefetch(
+            tf.data.experimental.AUTOTUNE
+        )
+        train_dataset = train_dataset.batch(self.batch_size).prefetch(
+            tf.data.experimental.AUTOTUNE
+        )
+
+        logger.info("Dataset downloaded.")
+
+        common_path = "/cache/data"
+        val_path = common_path + "/validation/"
+        test_path = common_path + "/test/"
+        train_path = common_path + "/train/"
+        if not os.path.exists(val_path):
+            os.makedirs(val_path)
+        if not os.path.exists(test_path):
+            os.makedirs(test_path)
+        if not os.path.exists(train_path):
+            os.makedirs(train_path)
+
+        logger.info("Started making validation dataset.")
+        self.__vectorize_data(val_path + "val_", val_dataset)
+        logger.info("Started making test dataset.")
+        self.__vectorize_data(test_path + "test_", test_dataset)
+        logger.info("Started making train dataset.")
+        self.__vectorize_data(train_path + "train_", train_dataset)
+
+        train_examples = os.listdir(train_path)
+        train_examples = [train_path + f_name for f_name in train_examples]
+        test_examples = os.listdir(test_path)
+        test_examples = [test_path + f_name for f_name in test_examples]
+        val_examples = os.listdir(val_path)
+        val_examples = [val_path + f_name for f_name in val_examples]
+
+        self.train_generator = DataGenerator(train_examples)
+        self.test_generator = DataGenerator(test_examples)
+        self.val_generator = DataGenerator(val_examples)
+
+        logger.info("All datasets have been created.")
+
+    def __vectorize_data(self, data_path, dataset):
+        counter = 0
+        for example in tfds.as_numpy(dataset):
+            counter += 1
+            premise, hypothesis, label = (
+                example["premise"],
+                example["hypothesis"],
+                example["label"],
+            )
+
+            useless_pos = np.where(label == -1)[0]
+            premise = np.delete(premise, useless_pos)
+            hypothesis = np.delete(hypothesis, useless_pos)
+            label = np.delete(label, useless_pos)
+
+            premise_encoded = self.encoder.encode_sentences(premise)
+            hypothesis_encoded = self.encoder.encode_sentences(hypothesis)
+            np.savez(
+                data_path + str(counter), [premise_encoded, hypothesis_encoded], label
+            )
+
+            if counter % 10 == 0:
+                logger.info(f"Prepared {counter} files.")
+        logger.info("Prepared all files.")
+
+    def __create_model(self):
+        inp_p = tf.keras.layers.Input(shape=self.batch_size)
+        inp_h = tf.keras.layers.Input(shape=self.batch_size)
+        combined = tf.keras.layers.concatenate([inp_p, inp_h])
+        linear_1 = tf.keras.layers.Dense(1024, activation="relu")(combined)
+        dropout_1 = tf.keras.layers.Dropout(0.45)(linear_1)
+        linear_2 = tf.keras.layers.Dense(512, activation="relu")(dropout_1)
+        linear_3 = tf.keras.layers.Dense(256, activation="relu")(linear_2)
+        output = tf.keras.layers.Dense(3, activation="softmax")(linear_3)
+
+        self.model = tf.keras.models.Model(inputs=[inp_p, inp_h], outputs=output)
+        self.model.compile(
+            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+            optimizer="adam",
+            metrics=["accuracy"],
+        )
+
+    def __train_model(self):
+        log_dir = "/cache/logs/"
+        if not os.path.exists(log_dir):
+            os.makedirs(log_dir)
+        csv_logger = tf.keras.callbacks.CSVLogger(log_dir + "log.csv")
+
+        ch_path = "/cache/checkpoints"
+        if not os.path.exists(ch_path):
+            os.makedirs(ch_path)
+        ch_path += "/cp-{epoch:04d}.ckpt"
+        model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
+            filepath=ch_path, save_weights_only=True
+        )
+
+        early_stopping = tf.keras.callbacks.EarlyStopping(
+            monitor="val_loss", patience=10
+        )
+
+        _ = self.model.fit(
+            x=self.train_generator,
+            validation_data=self.val_generator,
+            use_multiprocessing=True,
+            workers=6,
+            epochs=100,
+            callbacks=[model_checkpoint, csv_logger, early_stopping],
+        )
+
+        self.model_path = "/cache/model.h5"
+        self.model.save(self.model_path)
+        os.environ["TRAINED_MODEL_PATH"] = self.model_path
+        logger.info("Model is trained.")
+
+    def candidate_selection(self, candidates, bot_uttr_history, threshold=0.8):
+        self.model = tf.keras.models.load_model(self.model_path)
+        labels = {0: "entailment", 1: "neutral", 2: "contradiction"}
+        base_dict = {
+            "decision": labels[1],
+            labels[0]: 0.0,
+            labels[1]: 1.0,
+            labels[2]: 0.0,
+        }
+
+        rez_list = list(base_dict.copy() for _ in range(len(candidates)))
+        unique_history = {u for b in bot_uttr_history for u in b}
+
+        if unique_history and candidates:
+            vectorized_candidates = self.__response_encoding(candidates)
+            vectorized_history = self.__response_encoding(list(unique_history))
+
+            vectorized_history = dict(zip(unique_history, vectorized_history))
+            history_arr = [
+                vectorized_history.get(u) for b in bot_uttr_history for u in b
+            ]
+            candidates_arr = []
+            for i in range(len(candidates)):
+                candidates_arr.extend(
+                    [vectorized_candidates[i]] * len(bot_uttr_history[i])
+                )
+
+            pred_rez = self.model.predict([history_arr, candidates_arr])
+            pred_rez_idx = 0
+            for i in range(len(candidates)):
+                for _ in range(len(bot_uttr_history[i])):
+                    row_probab = pred_rez[pred_rez_idx]
+                    if row_probab[2] < threshold:
+                        row_probab[2] = -row_probab[2]
+                    label = int(np.argmax(row_probab, axis=-1))
+                    if rez_list[i]["decision"] != labels[2]:
+                        rez_list[i] = {
+                            "decision": labels[label],
+                            labels[0]: row_probab[0].astype(float),
+                            labels[1]: row_probab[1].astype(float),
+                            labels[2]: np.abs(row_probab[2]).astype(float),
+                        }
+                    pred_rez_idx += 1
+        logger.info(rez_list)
+        return rez_list
+
+    def __response_encoding(self, responses):
+        return self.encoder.encode_sentences(responses)
diff --git a/annotators/ConveRTBasedNLI/encoder.py b/annotators/ConveRTBasedNLI/encoder.py
new file mode 100644
index 0000000000..f893525dcc
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/encoder.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+import tensorflow as tf
+import tensorflow_text
+import tensorflow_hub as tfhub
+
+
+tf.compat.v1.disable_eager_execution()
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+
+def normalize_vectors(vectors):
+    vectors = np.vstack(vectors)
+    norm = np.linalg.norm(vectors, ord=2, axis=-1, keepdims=True)
+    return vectors / norm
+
+
+class Encoder:
+    def __init__(self):
+        self.sess = tf.compat.v1.Session()
+        self.text_placeholder = tf.compat.v1.placeholder(dtype=tf.string, shape=[None])
+
+        self.module = tfhub.Module("/data/convert_model")
+        self.context_encoding_tensor = self.module(
+            self.text_placeholder, signature="encode_context"
+        )
+        self.encoding_tensor = self.module(self.text_placeholder)
+        self.response_encoding_tensor = self.module(
+            self.text_placeholder, signature="encode_response"
+        )
+
+        self.sess.run(tf.compat.v1.tables_initializer())
+        self.sess.run(tf.compat.v1.global_variables_initializer())
+
+    def encode_sentences(self, sentences):
+        vectors = self.sess.run(
+            self.encoding_tensor, feed_dict={self.text_placeholder: sentences}
+        )
+        return normalize_vectors(vectors)
+
+    def encode_contexts(self, sentences):
+        vectors = self.sess.run(
+            self.context_encoding_tensor, feed_dict={self.text_placeholder: sentences}
+        )
+        return normalize_vectors(vectors)
+
+    def encode_responses(self, sentences):
+        vectors = self.sess.run(
+            self.response_encoding_tensor, feed_dict={self.text_placeholder: sentences}
+        )
+        return normalize_vectors(vectors)
diff --git a/annotators/ConveRTBasedNLI/requirements.txt b/annotators/ConveRTBasedNLI/requirements.txt
new file mode 100644
index 0000000000..4d9dd5ac0b
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/requirements.txt
@@ -0,0 +1,13 @@
+tensorflow==2.8.0
+tensorflow_hub==0.12.0
+tensorflow_text==2.8.2
+tensorflow-datasets==4.8.1
+flask==1.1.1
+itsdangerous==2.0.1
+numpy==1.21.6
+gunicorn==19.9.0
+requests==2.22.0
+sentry-sdk==0.12.3
+jinja2<=3.0.3
+Werkzeug<=2.0.3
+protobuf==3.20.3
\ No newline at end of file
diff --git a/annotators/ConveRTBasedNLI/server.py b/annotators/ConveRTBasedNLI/server.py
new file mode 100644
index 0000000000..31d97913b2
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/server.py
@@ -0,0 +1,37 @@
+import logging
+import time
+from os import getenv
+
+from convert_annotator import ConveRTAnnotator
+import sentry_sdk
+from flask import Flask, jsonify, request
+
+
+sentry_sdk.init(getenv("SENTRY_DSN"))
+
+logging.basicConfig(
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
+)
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+app.config["JSON_SORT_KEYS"] = False
+
+annotator = ConveRTAnnotator()
+logger.info("Annotator is loaded.")
+
+
+@app.route("/batch_model", methods=["POST"])
+def respond_batch():
+    start_time = time.time()
+    sentences = request.json.get("sentences", [])
+    last_bot_utterances = request.json.get("last_bot_utterances", [])
+    logger.debug(f"Sentences: {sentences}")
+    logger.debug(f"Last bot utterances: {last_bot_utterances}")
+    result = annotator.candidate_selection(sentences, last_bot_utterances)
+    total_time = time.time() - start_time
+    logger.info(f"convert-based-nli exec time: {round(total_time, 2)} sec")
+    return jsonify([{"batch": result}])
+
+
+if __name__ == "__main__":
+    app.run(debug=False, host="0.0.0.0", port=8150)
diff --git a/annotators/ConveRTBasedNLI/service_configs/convert-based-nli/environment.yml b/annotators/ConveRTBasedNLI/service_configs/convert-based-nli/environment.yml
new file mode 100644
index 0000000000..e200334473
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/service_configs/convert-based-nli/environment.yml
@@ -0,0 +1,4 @@
+SERVICE_PORT: 8150
+TRAINED_MODEL_PATH: /data/nli_model
+SERVICE_NAME: convert_based_nli
+FLASK_APP: server
diff --git a/annotators/ConveRTBasedNLI/service_configs/convert-based-nli/service.yml b/annotators/ConveRTBasedNLI/service_configs/convert-based-nli/service.yml
new file mode 100644
index 0000000000..bdf034f666
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/service_configs/convert-based-nli/service.yml
@@ -0,0 +1,28 @@
+name: convert-based-nli
+endpoints:
+- batch_model
+compose:
+  env_file:
+  - .env
+  build:
+    args:
+      SERVICE_PORT: 8150
+      SERVICE_NAME: convert_based_nli
+      TRAINED_MODEL_PATH: /data/nli_model
+      FLASK_APP: server
+    context: annotators/ConveRTBasedNLI/
+  command: flask run -h 0.0.0.0 -p 8150
+  environment:
+  - FLASK_APP=server
+  deploy:
+    resources:
+      limits:
+        memory: 1.5G
+      reservations:
+        memory: 1.5G
+  volumes:
+  - ./annotators/ConveRTBasedNLI:/src
+  - ./common:/src/common
+  ports:
+  - 8150:8150
+proxy: null
\ No newline at end of file
diff --git a/annotators/ConveRTBasedNLI/test.py b/annotators/ConveRTBasedNLI/test.py
new file mode 100644
index 0000000000..c14e3a59b8
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/test.py
@@ -0,0 +1,76 @@
+import requests
+
+
+def main():
+    url = "http://0.0.0.0:8150/batch_model"
+
+    input_data = {
+        "sentences": [
+            "Do you like ice cream?",
+            "It's going to be sunny today",
+            "I love dogs",
+            "Do you want to know some interesting fact?",
+            "Wolves have small teeth",
+        ],
+        "last_bot_utterances": [
+            ["I hate dogs", "The moon is a satellite of the earth"],
+            [],
+            [
+                "I hate dogs",
+                "Wolves have big teeth",
+                "The moon is a satellite of the earth",
+            ],
+            ["The moon is a satellite of the earth"],
+            ["Wolves have big teeth", "The moon is a satellite of the earth"],
+        ],
+    }
+    desired_output = [
+        {
+            "decision": "neutral",
+            "entailment": 0.0019908840768039227,
+            "neutral": 0.7070657014846802,
+            "contradiction": 0.2909433841705322,
+        },
+        {
+            "decision": "neutral",
+            "entailment": 0.0,
+            "neutral": 1.0,
+            "contradiction": 0.0,
+        },
+        {
+            "decision": "contradiction",
+            "entailment": 2.6359959974797675e-06,
+            "neutral": 0.0002536950050853193,
+            "contradiction": 0.999743640422821,
+        },
+        {
+            "decision": "neutral",
+            "entailment": 0.014720427803695202,
+            "neutral": 0.9783505797386169,
+            "contradiction": 0.0069289617240428925,
+        },
+        {
+            "decision": "contradiction",
+            "entailment": 0.0019739873241633177,
+            "neutral": 0.0290225762873888,
+            "contradiction": 0.9690034985542297,
+        },
+    ]
+
+    result = requests.post(url, json=input_data).json()
+
+    for rez in desired_output:
+        for k, v in rez.items():
+            if type(v) == float:
+                rez[k] = round(v, 2)
+
+    for rez in result[0]["batch"]:
+        for k, v in rez.items():
+            if type(v) == float:
+                rez[k] = round(v, 2)
+    assert result[0]["batch"] == desired_output
+    print("Successfully predicted contradiction!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/annotators/ConveRTBasedNLI/test.sh b/annotators/ConveRTBasedNLI/test.sh
new file mode 100644
index 0000000000..61672db785
--- /dev/null
+++ b/annotators/ConveRTBasedNLI/test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python test.py
diff --git a/assistant_dists/dream/dev.yml b/assistant_dists/dream/dev.yml
index 5364ec9961..7c7696a4c3 100644
--- a/assistant_dists/dream/dev.yml
+++ b/assistant_dists/dream/dev.yml
@@ -454,6 +454,11 @@ services:
       - "~/.deeppavlov/cache:/root/.cache"
     ports:
       - 8102:8102
+  convert-based-nli:
+    volumes:
+      - "./annotators/ConveRTBasedNLI:/src"
+    ports:
+      - 8150:8150
   dff-template-skill:
     volumes:
       - "./skills/dff_template_skill:/src"
diff --git a/assistant_dists/dream/docker-compose.override.yml b/assistant_dists/dream/docker-compose.override.yml
index 12664f15a6..86fb84ec36 100644
--- a/assistant_dists/dream/docker-compose.override.yml
+++ b/assistant_dists/dream/docker-compose.override.yml
@@ -1386,6 +1386,22 @@ services:
           memory: 4G
         reservations:
           memory: 4G
+          
+  convert-based-nli:
+    env_file: [.env]
+    build:
+      args:
+        TRAINED_MODEL_PATH: /data/nli_model
+      context: ./annotators/ConveRTBasedNLI/
+    command: flask run -h 0.0.0.0 -p 8150
+    environment:
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 1.5G
+        reservations:
+          memory: 1.5G
 
   dff-template-skill:
     env_file: [ .env ]
@@ -1402,4 +1418,5 @@ services:
           memory: 128M
         reservations:
           memory: 128M
+
 version: '3.7'
diff --git a/assistant_dists/dream/pipeline_conf.json b/assistant_dists/dream/pipeline_conf.json
index 8b076b77f8..7047c10587 100644
--- a/assistant_dists/dream/pipeline_conf.json
+++ b/assistant_dists/dream/pipeline_conf.json
@@ -705,6 +705,25 @@
                     "component": "components/PbLNvh4hrvs47rPaf2bfYQ.yml",
                     "service": "annotators/combined_classification/service_configs/combined-classification"
                 }
+            },
+            "convert_based_nli": {
+                "connector": {
+                    "protocol": "http",
+                    "timeout": 10,
+                    "url": "http://convert-based-nli:8150/batch_model"
+                },
+                "dialog_formatter": "state_formatters.dp_formatters:convert_nli_hypotheses_annotator_formatter",
+                "response_formatter": "state_formatters.dp_formatters:simple_formatter_service",
+                "previous_services": [
+                    "skills"
+                ],
+                "state_manager_method": "add_hypothesis_annotation_batch",
+                "is_enabled": true,
+                "source": {
+                    "directory": "annotators/ConveRTBasedNLI",
+                    "container": "convert-based-nli",
+                    "endpoint": "batch_model"
+                }
             }
         },
         "skill_selectors": {
diff --git a/common/utils.py b/common/utils.py
index 0177170916..61e5528eb8 100644
--- a/common/utils.py
+++ b/common/utils.py
@@ -1308,6 +1308,12 @@ def is_toxic_or_badlisted_utterance(annotated_utterance):
     return is_toxic_utterance(annotated_utterance) or is_badlisted_utterance(annotated_utterance)
 
 
+def is_contradiction_utterance(annotated_utterance):
+    contradiction_result = annotated_utterance.get("annotations", {}).get("convert_based_nli", {}).get("decision", "")
+
+    return "contradiction" in contradiction_result
+
+
 FACTOID_PATTERNS = re.compile(
     r"^(do you know |((can |could )you )tell me )?(please )?"
     r"((what|who|which|where) (is|are|was|were)\b|how to\b|when)",
diff --git a/components.tsv b/components.tsv
index 189889c450..d021638fd6 100644
--- a/components.tsv
+++ b/components.tsv
@@ -151,7 +151,7 @@
 8147	dff-universal-prompted-skill
 8148
 8149	transformers-lm-llama7bru
-8150
+8150	convert-based-nli
 8151	dff-dream-persona-llama7bru-prompted-skill
 8152	dff-deepy-prompted-skill
 8153
diff --git a/components/I90h9nwf9IWI9WEneLdT.yml b/components/I90h9nwf9IWI9WEneLdT.yml
new file mode 100644
index 0000000000..055696e2f1
--- /dev/null
+++ b/components/I90h9nwf9IWI9WEneLdT.yml
@@ -0,0 +1,24 @@
+name: convert-based-nli
+display_name: ConveRT based NLI
+container_name: convert-based-nli
+component_type: null
+model_type: NN-based
+is_customizable: false
+author: DeepPavlov
+description: Defines wheather 2 sentences are correlated as entailment, neutral or contradiction
+ram_usage: 1.5G
+gpu_usage: null
+connector:
+  protocol: http
+  timeout: 2.0
+  url: http://convert-based-nli:8150/batch_model
+dialog_formatter: state_formatters.dp_formatters:convert_nli_hypotheses_annotator_formatter
+response_formatter: state_formatters.dp_formatters:simple_formatter_service
+previous_services:
+- skills
+required_previous_services: null
+state_manager_method: add_hypothesis_annotation_batch
+tags: null
+endpoint: batch_model
+service: annotators/ConveRTBasedNLI/service_configs/convert-based-nli
+date_created: '2023-06-05T09:45:32'
\ No newline at end of file
diff --git a/response_selectors/convers_evaluation_based_selector/server.py b/response_selectors/convers_evaluation_based_selector/server.py
index a450b5a82d..e67a9cb49e 100644
--- a/response_selectors/convers_evaluation_based_selector/server.py
+++ b/response_selectors/convers_evaluation_based_selector/server.py
@@ -15,12 +15,17 @@
 from nltk.tokenize import sent_tokenize
 
 from common.greeting import greeting_spec, HI_THIS_IS_DREAM
-from common.universal_templates import if_chat_about_particular_topic, if_choose_topic, DUMMY_DONTKNOW_RESPONSES
+from common.universal_templates import (
+    if_chat_about_particular_topic,
+    if_choose_topic,
+    DUMMY_DONTKNOW_RESPONSES,
+)
 from common.utils import (
     get_intent_name,
     low_priority_intents,
     substitute_nonwords,
     is_toxic_or_badlisted_utterance,
+    is_contradiction_utterance,
 )
 from common.response_selection import ACTIVE_SKILLS
 from tag_based_selection import tag_based_response_selection
@@ -39,12 +44,16 @@
 
 sentry_sdk.init(getenv("SENTRY_DSN"))
 
-logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.DEBUG)
+logging.basicConfig(
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.DEBUG
+)
 logger = logging.getLogger(__name__)
 
 app = Flask(__name__)
 
-CALL_BY_NAME_PROBABILITY = float(getenv("CALL_BY_NAME_PROBABILITY", 0.5))  # if name is already known
+CALL_BY_NAME_PROBABILITY = float(
+    getenv("CALL_BY_NAME_PROBABILITY", 0.5)
+)  # if name is already known
 TAG_BASED_SELECTION = getenv("TAG_BASED_SELECTION", False)
 MOST_DUMMY_RESPONSES = [
     "I really do not know what to answer.",
@@ -53,6 +62,8 @@
 ]
 LANGUAGE = getenv("LANGUAGE", "EN")
 GREETING_FIRST = int(getenv("GREETING_FIRST", 1))
+TOXIC_FILTERING = getenv("TOXIC_FILTERING", True)
+CONTRADICTION_FILTERING = getenv("CONTRADICTION_FILTERING", True)
 
 
 @app.route("/respond", methods=["POST"])
@@ -61,7 +72,9 @@ def respond():
 
     st_time = time.time()
     dialogs_batch = request.json["dialogs"]
-    all_prev_active_skills_batch = request.json.get("all_prev_active_skills", [[]] * len(dialogs_batch))
+    all_prev_active_skills_batch = request.json.get(
+        "all_prev_active_skills", [[]] * len(dialogs_batch)
+    )
 
     selected_skill_names = []
     selected_texts = []
@@ -70,10 +83,12 @@ def respond():
     selected_bot_attributes = []
     selected_attributes = []
 
-    for i, (dialog, all_prev_active_skills) in enumerate(zip(dialogs_batch, all_prev_active_skills_batch)):
+    for i, (dialog, all_prev_active_skills) in enumerate(
+        zip(dialogs_batch, all_prev_active_skills_batch)
+    ):
         curr_confidences = []
         curr_scores = []
-        curr_is_toxics = []
+        curr_is_toxics_or_contr = []
 
         try:
             curr_candidates = dialog["human_utterances"][-1]["hypotheses"]
@@ -88,16 +103,28 @@ def respond():
                 curr_confidences += [skill_data["confidence"]]
                 if skill_data["text"] and skill_data["confidence"]:
                     if not skill_data.get("annotations"):
-                        logger.warning(f"Valid skill data without annotations: {skill_data}")
+                        logger.warning(
+                            f"Valid skill data without annotations: {skill_data}"
+                        )
 
                 is_toxic_utterance = is_toxic_or_badlisted_utterance(skill_data)
-                curr_is_toxics.append(is_toxic_utterance)
+                is_contr_utterance = is_contradiction_utterance(skill_data)
+
+                is_toxic_or_contr_utterance = False
+                if is_toxic_utterance and TOXIC_FILTERING:
+                    is_toxic_or_contr_utterance = is_toxic_utterance
+                if is_contr_utterance and CONTRADICTION_FILTERING:
+                    is_toxic_or_contr_utterance = is_contr_utterance
+
+                curr_is_toxics_or_contr.append(is_toxic_or_contr_utterance)
 
                 if is_toxic_utterance:
                     with sentry_sdk.push_scope() as scope:
                         scope.set_extra("utterance", skill_data["text"])
                         scope.set_extra("selected_skills", skill_data)
-                        sentry_sdk.capture_message("response selector got candidate with badlisted phrases")
+                        sentry_sdk.capture_message(
+                            "response selector got candidate with badlisted phrases"
+                        )
                         msg = (
                             "response selector got candidate with badlisted phrases:\n"
                             f"utterance: {skill_data['text']}\n"
@@ -105,19 +132,42 @@ def respond():
                         )
                         logger.info(msg)
 
+                if is_contr_utterance:
+                    with sentry_sdk.push_scope() as scope:
+                        scope.set_extra("utterance", skill_data["text"])
+                        scope.set_extra("selected_skills", skill_data)
+                        sentry_sdk.capture_message(
+                            "response selector got contradicting candidate"
+                        )
+                        msg = (
+                            "response selector got contradicting candidate:\n"
+                            f"utterance: {skill_data['text']}\n"
+                            f"skill name: {skill_data['skill_name']}"
+                        )
+                        logger.info(msg)
+
                 curr_scores += [
-                    calculate_single_evaluator_score(skill_data.get("annotations"), skill_data["confidence"])
+                    calculate_single_evaluator_score(
+                        skill_data.get("annotations"), skill_data["confidence"]
+                    )
                 ]
 
-            curr_is_toxics = np.array(curr_is_toxics)
+            curr_is_toxics_or_contr = np.array(curr_is_toxics_or_contr)
             curr_scores = np.array(curr_scores)
             curr_confidences = np.array(curr_confidences)
             # now we collected all current candidates and their annotations. select response among them
-            best_skill_name, best_text, best_confidence, best_human_attrs, best_bot_attrs, best_attrs = select_response(
+            (
+                best_skill_name,
+                best_text,
+                best_confidence,
+                best_human_attrs,
+                best_bot_attrs,
+                best_attrs,
+            ) = select_response(
                 curr_candidates,
                 curr_scores,
                 curr_confidences,
-                curr_is_toxics,
+                curr_is_toxics_or_contr,
                 dialog,
                 all_prev_active_skills,
             )
@@ -125,10 +175,14 @@ def respond():
             logger.exception(e)
             sentry_sdk.capture_exception(e)
             if dialog["human_utterances"][-1].get("hypotheses", []):
-                logger.info("Response Selector Error: randomly choosing final response among hypotheses.")
+                logger.info(
+                    "Response Selector Error: randomly choosing final response among hypotheses."
+                )
                 best_cand = random.choice(dialog["human_utterances"][-1]["hypotheses"])
             else:
-                logger.info("Response Selector Error: randomly choosing response among dummy responses.")
+                logger.info(
+                    "Response Selector Error: randomly choosing response among dummy responses."
+                )
                 best_cand = {
                     "text": random.choice(DUMMY_DONTKNOW_RESPONSES[LANGUAGE]),
                     "confidence": 0.1,
@@ -177,7 +231,13 @@ def respond():
 
 
 def rule_score_based_selection(
-    dialog, candidates, scores, confidences, is_toxics, bot_utterances, all_prev_active_skills
+    dialog,
+    candidates,
+    scores,
+    confidences,
+    is_toxics,
+    bot_utterances,
+    all_prev_active_skills,
 ):
     curr_single_scores = []
 
@@ -199,17 +259,30 @@ def rule_score_based_selection(
         factoid_index = skill_names.index("factoid_qa")
         logging.debug("factoid")
         logging.debug(str(candidates[factoid_index]))
-        if "not sure" in candidates[factoid_index] and candidates[factoid_index]["not sure"]:
+        if (
+            "not sure" in candidates[factoid_index]
+            and candidates[factoid_index]["not sure"]
+        ):
             not_sure_factoid = True
     for i in range(len(scores)):
         curr_score = None
-        is_misheard = misheard_with_spec1 in candidates[i]["text"] or misheard_with_spec2 in candidates[i]["text"]
+        is_misheard = (
+            misheard_with_spec1 in candidates[i]["text"]
+            or misheard_with_spec2 in candidates[i]["text"]
+        )
         intent_name = get_intent_name(candidates[i]["text"])
-        is_intent_candidate = (skill_names[i] in ["dff_intent_responder_skill", "dff_program_y_skill"]) and intent_name
-        is_intent_candidate = is_intent_candidate and intent_name not in low_priority_intents
+        is_intent_candidate = (
+            skill_names[i] in ["dff_intent_responder_skill", "dff_program_y_skill"]
+        ) and intent_name
+        is_intent_candidate = (
+            is_intent_candidate and intent_name not in low_priority_intents
+        )
         # print("is intent candidate? " + str(is_intent_candidate), flush=True)
 
-        if len(dialog["human_utterances"]) == 1 and greeting_spec[LANGUAGE] not in candidates[i]["text"]:
+        if (
+            len(dialog["human_utterances"]) == 1
+            and greeting_spec[LANGUAGE] not in candidates[i]["text"]
+        ):
             logger.info("Dialog Beginning detected.")
             if (
                 if_chat_about_particular_topic(dialog["utterances"][0])
@@ -220,50 +293,86 @@ def rule_score_based_selection(
                 if skill_names[i] == "factoid_qa":
                     logger.info("Particular topic. Facts + Greeting to very big score.")
                     # I don't have an opinion on that but I know some facts.
-                    resp = candidates[i]["text"].replace("I don't have an opinion on that but I know some facts.", "")
+                    resp = candidates[i]["text"].replace(
+                        "I don't have an opinion on that but I know some facts.", ""
+                    )
                     candidates[i]["text"] = f"{HI_THIS_IS_DREAM[LANGUAGE]} {resp}"
                     curr_score = very_big_score
-                elif skill_names[i] == "meta_script_skill" and len(candidates[i]["text"]) > 0 and confidences[i] > 0.98:
-                    logger.info("Particular topic. meta_script_skill + Greeting to very big score.")
+                elif (
+                    skill_names[i] == "meta_script_skill"
+                    and len(candidates[i]["text"]) > 0
+                    and confidences[i] > 0.98
+                ):
+                    logger.info(
+                        "Particular topic. meta_script_skill + Greeting to very big score."
+                    )
                     # I don't have an opinion on that but I know some facts.
                     resp = candidates[i]["text"]
                     candidates[i]["text"] = f"{HI_THIS_IS_DREAM[LANGUAGE]} {resp}"
                     curr_score = very_big_score
                 elif skill_names[i] == "small_talk_skill":
-                    logger.info("Particular topic. Small-talk + Greeting NOT to very big score.")
+                    logger.info(
+                        "Particular topic. Small-talk + Greeting NOT to very big score."
+                    )
                     # for now do not give small talk a very big score here
-                    candidates[i]["text"] = f"{HI_THIS_IS_DREAM[LANGUAGE]} {candidates[i]['text']}"
+                    candidates[i][
+                        "text"
+                    ] = f"{HI_THIS_IS_DREAM[LANGUAGE]} {candidates[i]['text']}"
                     # curr_score = very_big_score
-            elif if_choose_topic(dialog["utterances"][0]) and "about it" not in dialog["utterances"][0]["text"].lower():
+            elif (
+                if_choose_topic(dialog["utterances"][0])
+                and "about it" not in dialog["utterances"][0]["text"].lower()
+            ):
                 logger.info("User wants bot to choose the topic")
                 # if user says `let's chat about something`
                 if skill_names[i] == "small_talk_skill":
                     logger.info("No topic. Small-talk + Greeting to very big score.")
-                    candidates[i]["text"] = f"{HI_THIS_IS_DREAM[LANGUAGE]} {candidates[i]['text']}"
+                    candidates[i][
+                        "text"
+                    ] = f"{HI_THIS_IS_DREAM[LANGUAGE]} {candidates[i]['text']}"
                     curr_score = very_big_score
-                elif skill_names[i] == "meta_script_skill" and len(candidates[i]["text"]) > 0:
+                elif (
+                    skill_names[i] == "meta_script_skill"
+                    and len(candidates[i]["text"]) > 0
+                ):
                     logger.info("No topic. Meta-script + Greeting to very big score.")
-                    candidates[i]["text"] = f"{HI_THIS_IS_DREAM[LANGUAGE]} {candidates[i]['text']}"
+                    candidates[i][
+                        "text"
+                    ] = f"{HI_THIS_IS_DREAM[LANGUAGE]} {candidates[i]['text']}"
                     curr_score = very_big_score
             else:
                 logger.info("User just wants to talk.")
                 # if user says something else
-                if skill_names[i] == "program_y" and greeting_spec[LANGUAGE] in candidates[i]["text"]:
+                if (
+                    skill_names[i] == "program_y"
+                    and greeting_spec[LANGUAGE] in candidates[i]["text"]
+                ):
                     logger.info("Just chat. Program-y to very big score.")
                     curr_score = very_big_score
         elif (
             skill_names[i] == "dff_friendship_skill"
-            and (how_are_you_spec in candidates[i]["text"] or what_i_can_do_spec in candidates[i]["text"])
+            and (
+                how_are_you_spec in candidates[i]["text"]
+                or what_i_can_do_spec in candidates[i]["text"]
+            )
             and len(dialog["utterances"]) < 16
         ):
             curr_score = very_big_score
-        elif skill_names[i] == "dff_friendship_skill" and greeting_spec[LANGUAGE] in candidates[i]["text"]:
+        elif (
+            skill_names[i] == "dff_friendship_skill"
+            and greeting_spec[LANGUAGE] in candidates[i]["text"]
+        ):
             if len(dialog["utterances"]) < 2:
                 curr_score = very_big_score
             else:
-                confidences[i] = 0.2  # Low confidence for greeting in the middle of dialogue
+                confidences[
+                    i
+                ] = 0.2  # Low confidence for greeting in the middle of dialogue
         # we don't have 'cobotqa' anymore; instead we have factoid_qa
-        elif skill_names[i] in ["factoid_qa"] and "Here's something I found on the web." in candidates[i]["text"]:
+        elif (
+            skill_names[i] in ["factoid_qa"]
+            and "Here's something I found on the web." in candidates[i]["text"]
+        ):
             confidences[i] = 0.6
         elif (
             skill_names[i] == "factoid_qa"
@@ -278,17 +387,33 @@ def rule_score_based_selection(
             curr_score = very_big_score
         elif is_intent_candidate:
             curr_score = very_big_score
-        elif skill_names[i] in ["dummy_skill", "convert_reddit", "alice", "eliza", "tdidf_retrieval", "program_y"]:
-            if "question" in candidates[i].get("type", "") or "?" in candidates[i]["text"]:
+        elif skill_names[i] in [
+            "dummy_skill",
+            "convert_reddit",
+            "alice",
+            "eliza",
+            "tdidf_retrieval",
+            "program_y",
+        ]:
+            if (
+                "question" in candidates[i].get("type", "")
+                or "?" in candidates[i]["text"]
+            ):
                 penalty_start_utt = 1
                 if skill_names[i] == "program_y":
                     penalty_start_utt = 4
 
                 n_questions = 0
-                if len(bot_utterances) >= penalty_start_utt and "?" in bot_utterances[-1]:
+                if (
+                    len(bot_utterances) >= penalty_start_utt
+                    and "?" in bot_utterances[-1]
+                ):
                     confidences[i] /= 1.5
                     n_questions += 1
-                if len(bot_utterances) >= penalty_start_utt + 1 and "?" in bot_utterances[-2]:
+                if (
+                    len(bot_utterances) >= penalty_start_utt + 1
+                    and "?" in bot_utterances[-2]
+                ):
                     confidences[i] /= 1.1
                     n_questions += 1
                 if n_questions == 2:
@@ -298,7 +423,9 @@ def rule_score_based_selection(
             if "link_to_for_response_selector" in candidates[i].get("type", ""):
                 link_to_question = candidates[i]["text"]
                 link_to_human_attrs = candidates[i].get("human_attributes", {})
-        if skill_names[i] == "dummy_skill" and "question" in candidates[i].get("type", ""):
+        if skill_names[i] == "dummy_skill" and "question" in candidates[i].get(
+            "type", ""
+        ):
             dummy_question = candidates[i]["text"]
             dummy_question_human_attr = candidates[i].get("human_attributes", {})
 
@@ -314,22 +441,32 @@ def rule_score_based_selection(
             confidence = confidences[i]
             skill_name = skill_names[i]
             logger.info(
-                f"Skill {skill_name} has final score: {score}. Confidence: {confidence}. " f"Toxicity: {is_toxics[i]}"
+                f"Skill {skill_name} has final score: {score}. Confidence: {confidence}. "
+                f"Toxicity: {is_toxics[i]}"
             )
             curr_single_scores.append(score)
         else:
             score = scores[i]
             skill_name = skill_names[i]
-            logger.info(f"Skill {skill_name} has final score: {score}. " f"Toxicity: {is_toxics[i]}")
+            logger.info(
+                f"Skill {skill_name} has final score: {score}. "
+                f"Toxicity: {is_toxics[i]}"
+            )
             curr_single_scores.append(score)
 
     highest_conf_exist = True if any(confidences >= 1.0) else False
     if highest_conf_exist:
         logger.info("Found skill with the highest confidence.")
     for j in range(len(candidates)):
-        if highest_conf_exist and confidences[j] < 1.0 and curr_single_scores[j] < very_big_score:
+        if (
+            highest_conf_exist
+            and confidences[j] < 1.0
+            and curr_single_scores[j] < very_big_score
+        ):
             # need to drop this candidates
-            logger.info(f"Dropping {skill_names[j]} which does not have a highest confidence or `very big score`.")
+            logger.info(
+                f"Dropping {skill_names[j]} which does not have a highest confidence or `very big score`."
+            )
             curr_single_scores[j] = very_low_score
 
     best_id = np.argmax(curr_single_scores)
@@ -351,16 +488,24 @@ def rule_score_based_selection(
     return best_candidate, best_id, curr_single_scores
 
 
-def select_response(candidates, scores, confidences, is_toxics, dialog, all_prev_active_skills=None):
+def select_response(
+    candidates, scores, confidences, is_toxics, dialog, all_prev_active_skills=None
+):
     # TOXICITY & BADLISTS checks
-    n_toxic_candidates, scores, confidences = downscore_toxic_badlisted_responses(scores, confidences, is_toxics)
+    n_toxic_candidates, scores, confidences = downscore_toxic_badlisted_responses(
+        scores, confidences, is_toxics
+    )
     if n_toxic_candidates == len(candidates):
         # the most dummy заглушка на случай, когда все абсолютно скиллы вернули токсичные ответы
         return None, np.random.choice(DUMMY_DONTKNOW_RESPONSES[LANGUAGE]), 1.0, {}, {}
 
     # REPEAT checks
-    bot_utterances = [sent_tokenize(uttr["text"].lower()) for uttr in dialog["bot_utterances"]]
-    prev_large_utterances = [[sent] for utt in bot_utterances[:-15] for sent in utt if len(sent) >= 40]
+    bot_utterances = [
+        sent_tokenize(uttr["text"].lower()) for uttr in dialog["bot_utterances"]
+    ]
+    prev_large_utterances = [
+        [sent] for utt in bot_utterances[:-15] for sent in utt if len(sent) >= 40
+    ]
     bot_utterances = prev_large_utterances + bot_utterances[-15:]
     # flatten 2d list to 1d list of all appeared sentences of bot replies
     bot_utterances = sum(bot_utterances, [])
@@ -369,12 +514,23 @@ def select_response(candidates, scores, confidences, is_toxics, dialog, all_prev
     if TAG_BASED_SELECTION:
         logger.info("Tag based selection")
         best_candidate, best_id, curr_single_scores = tag_based_response_selection(
-            dialog, candidates, scores, confidences, bot_utterances, all_prev_active_skills
+            dialog,
+            candidates,
+            scores,
+            confidences,
+            bot_utterances,
+            all_prev_active_skills,
         )
     else:
         logger.info("Confidence & ConvEvaluationAnnotator Scores based selection")
         best_candidate, best_id, curr_single_scores = rule_score_based_selection(
-            dialog, candidates, scores, confidences, is_toxics, bot_utterances, all_prev_active_skills
+            dialog,
+            candidates,
+            scores,
+            confidences,
+            is_toxics,
+            bot_utterances,
+            all_prev_active_skills,
         )
 
     logger.info(f"Best candidate: {best_candidate}")
@@ -384,7 +540,11 @@ def select_response(candidates, scores, confidences, is_toxics, dialog, all_prev
     best_human_attributes = best_candidate.get("human_attributes", {})
     best_bot_attributes = best_candidate.get("bot_attributes", {})
 
-    if len(dialog["bot_utterances"]) == 0 and greeting_spec[LANGUAGE] not in best_text and GREETING_FIRST:
+    if (
+        len(dialog["bot_utterances"]) == 0
+        and greeting_spec[LANGUAGE] not in best_text
+        and GREETING_FIRST
+    ):
         # add greeting to the first bot uttr, if it's not already included
         best_text = f"{HI_THIS_IS_DREAM[LANGUAGE]} {best_text}"
 
@@ -399,7 +559,10 @@ def select_response(candidates, scores, confidences, is_toxics, dialog, all_prev
         if sum(curr_single_scores) == 0.0:
             break
 
-    if dialog["human"]["profile"].get("name", False) and best_skill_name != "personal_info_skill":
+    if (
+        dialog["human"]["profile"].get("name", False)
+        and best_skill_name != "personal_info_skill"
+    ):
         name = dialog["human"]["profile"].get("name", False)
         if len(dialog["bot_utterances"]) >= 1:
             if re.search(r"\b" + name + r"\b", dialog["bot_utterances"][-1]["text"]):
@@ -423,7 +586,14 @@ def select_response(candidates, scores, confidences, is_toxics, dialog, all_prev
     candidates[best_id].pop("annotations", {})
     best_attrs = candidates[best_id]
 
-    return best_skill_name, best_text, best_confidence, best_human_attributes, best_bot_attributes, best_attrs
+    return (
+        best_skill_name,
+        best_text,
+        best_confidence,
+        best_human_attributes,
+        best_bot_attributes,
+        best_attrs,
+    )
 
 
 if __name__ == "__main__":
diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py
index 0734ab3e49..af942487b3 100755
--- a/state_formatters/dp_formatters.py
+++ b/state_formatters/dp_formatters.py
@@ -407,6 +407,14 @@ def last_utt_and_history_dialog(dialog: Dict) -> List:
     ]
 
 
+def convert_nli_hypotheses_annotator_formatter(dialog: Dict) -> List[Dict]:
+    # Used by: convert_based_nli candidate annotators
+    hypotheses = dialog["human_utterances"][-1]["hypotheses"]
+    hypots = [h["text"] for h in hypotheses]
+    last_bot_utterances = [u["text"] for u in dialog["bot_utterances"][-20:]]
+    return [{"sentences": hypots, "last_bot_utterances": [last_bot_utterances] * len(hypots)}]
+
+
 def convers_evaluator_annotator_formatter(dialog: Dict) -> List[Dict]:
     dialog = utils.get_last_n_turns(dialog)
     dialog = utils.remove_clarification_turns_from_dialog(dialog)