Feat/ru llama distribution (#383)

* feat: ru prompted dist based on llama * feat: ru persona * fix: versions * fix duplicate endpoints, missing keys (#371) * Make MTL refer to DeepPavlov 1.1 instead of my branch (#366) * Update Dockerfile * Update combined_classifier.json * Update Dockerfile * Update Dockerfile * fix: starting openai services without dev.yml (#373) * fix: reqs * fix: dockerfile * fix: pip update * fix: reqs * feat: using peft for gusev's model * fix: reqs * fix: working load * feat: separate folder for peft transformers * fix: revert to dev * fix: transformers generation * fix: use peft model * fix: component yml * fix: yml configs * fix: no half precision * fix: description * fix: config is optional for some models * fix: increase timeout * fix: formatter * fix: language * fixed * fix: rights * fix: info ymls * fix: 5sec timeout * fix: 10sec timeout * fix: gpu mem * feat: ru pipeline and dockers * feat: badlisted words ru * feat: use fp16 for faster inference * feat: rank sentences endpoint * fix: endpoint func * fix: ping pong * fix: rannker url * fix: prompt selector ru * fix: env ru * fix: sentence ranker * fix: no-scripts selector * fix: timeout 2.0 for ru toxic * fix: first try for toxic model before ready * fix: params for language * fix: language * fix: timoeout for dialogrpt * fix: no use of toxic cls * fix: revert timeout to 1sec * feat: ru lang * fix: ru persona * feat: new prompt * fix: prompt * fix: prompt * fix: prompt * Update dream_persona.json * Update dream_persona.json * Update dream_persona_ru.json * fix: rename distr * fix: formatter --------- Co-authored-by: Maxim Talimanchuk <[email protected]> Co-authored-by: dimakarp1996 <[email protected]> Co-authored-by: Fedor Ignatov <[email protected]> Co-authored-by: Lidia Ostyakova <[email protected]>
deeppavlov · Apr 19, 2023 · bf07f37 · bf07f37
1 parent bdaa9d0
commit bf07f37
Show file tree

Hide file tree

Showing 25 changed files with 904 additions and 28 deletions.
diff --git a/.env_ru b/.env_ru
@@ -21,3 +21,4 @@ BADLIST_ANNOTATOR_URL=http://badlisted-words-ru:8018/badlisted_words_batch
 DP_WIKIDATA_URL=http://wiki-parser-ru:8077/model
 DP_ENTITY_LINKING_URL=http://entity-linking-ru:8075/model
 FILE_SERVER_URL=http://files:3000
+SENTENCE_RANKER_SERVICE_URL=http://dialogrpt-ru:8122/rank_sentences
diff --git a/annotators/toxic_classification_ru/server.py b/annotators/toxic_classification_ru/server.py
@@ -30,22 +30,6 @@
 
 logger.info(f"toxic-classification is set to run on {device}")
 
-try:
-    tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
-    model = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
-    model.eval()
-    if cuda:
-        model.cuda()
-    logger.info("toxic-classification model is ready")
-except Exception as e:
-    sentry_sdk.capture_exception(e)
-    logger.exception(e)
-    raise e
-
-app = Flask(__name__)
-health = HealthCheck(app, "/healthcheck")
-logging.getLogger("werkzeug").setLevel("WARNING")
-
 
 def classify_sentences(sentences):
     try:
@@ -68,6 +52,25 @@ def classify_sentences(sentences):
     return result
 
 
+try:
+    tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
+    model = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
+    model.eval()
+    if cuda:
+        model.cuda()
+
+    classify_sentences(["this is a simple test sentence without any toxicity."])
+    logger.info("toxic-classification model is ready")
+except Exception as e:
+    sentry_sdk.capture_exception(e)
+    logger.exception(e)
+    raise e
+
+app = Flask(__name__)
+health = HealthCheck(app, "/healthcheck")
+logging.getLogger("werkzeug").setLevel("WARNING")
+
+
 @app.route("/respond", methods=["POST"])
 def respond():
     st_time = time.time()

diff --git a/assistant_dists/dream_persona_rullama_prompted/cpu.yml b/assistant_dists/dream_persona_rullama_prompted/cpu.yml
@@ -0,0 +1,8 @@
+version: '3.7'
+services:
+  dialogrpt-ru:
+    environment:
+      CUDA_VISIBLE_DEVICES: ""
+  transformers-lm-llama7bru:
+    environment:
+      CUDA_VISIBLE_DEVICES: ""
diff --git a/assistant_dists/dream_persona_rullama_prompted/db_conf.json b/assistant_dists/dream_persona_rullama_prompted/db_conf.json
@@ -0,0 +1,6 @@
+{
+    "host": "DB_HOST",
+    "port": "DB_PORT",
+    "name": "DB_NAME",
+    "env": true
+}
diff --git a/assistant_dists/dream_persona_rullama_prompted/dev.yml b/assistant_dists/dream_persona_rullama_prompted/dev.yml
@@ -0,0 +1,46 @@
+# С такими volumes удобно дебажить, не нужно пересобирать контейнер каждый раз при изменении кода
+services:
+  agent:
+    volumes:
+      - ".:/dp-agent"
+    ports:
+      - 4242:4242
+  no-restrictions-selector-ru:
+    volumes:
+      - "./response_selectors/convers_evaluation_based_selector:/src"
+      - "./common:/src/common"
+    ports:
+      - 8009:8009
+  badlisted-words-ru:
+    volumes:
+      - "./annotators/BadlistedWordsDetector_ru:/src"
+      - "./common:/src/common"
+    ports:
+      - 8018:8018
+  dialogrpt-ru:
+    volumes:
+      - "./services/dialogrpt_ru:/src"
+      - "~/.deeppavlov/cache:/root/.cache"
+    ports:
+      - 8122:8122
+  prompt-selector-ru:
+    volumes:
+      - "./annotators/prompt_selector:/src"
+      - "./common:/src/common"
+    ports:
+      - 8135:8135
+  transformers-lm-llama7bru:
+    volumes:
+      - "./services/transformers_peft_lm:/src"
+      - "./common:/src/common"
+      - "~/.deeppavlov/cache:/root/.cache"
+    ports:
+      - 8149:8149
+  dff-dream-persona-llama7bru-prompted-skill:
+    volumes:
+      - "./skills/dff_template_prompted_skill:/src"
+      - "./common:/src/common"
+    ports:
+      - 8151:8151
+
+version: "3.7"
diff --git a/assistant_dists/dream_persona_rullama_prompted/docker-compose.override.yml b/assistant_dists/dream_persona_rullama_prompted/docker-compose.override.yml
@@ -0,0 +1,146 @@
+services:
+  agent:
+    command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_persona_rullama_prompted/pipeline_conf.json'
+    environment:
+      WAIT_HOSTS: "no-restrictions-selector-ru:8009, badlisted-words-ru:8018,
+          dialogrpt-ru:8122, prompt-selector-ru:8135, transformers-lm-llama7bru:8149, 
+          dff-dream-persona-llama7bru-prompted-skill:8151"
+      WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1800}
+      HIGH_PRIORITY_INTENTS: 0
+      RESTRICTION_FOR_SENSITIVE_CASE: 0
+      ALWAYS_TURN_ON_ALL_SKILLS: 0
+      LANGUAGE: RU
+
+  no-restrictions-selector-ru:
+    env_file: [ .env ]
+    build:
+      args:
+        TAG_BASED_SELECTION: 1
+        CALL_BY_NAME_PROBABILITY: 0.5
+        PROMPT_PROBA: 0
+        ACKNOWLEDGEMENT_PROBA: 0.3
+        PRIORITIZE_WITH_REQUIRED_ACT: 0
+        PRIORITIZE_NO_DIALOG_BREAKDOWN: 0
+        PRIORITIZE_WITH_SAME_TOPIC_ENTITY: 0
+        IGNORE_DISLIKED_SKILLS: 0
+        GREETING_FIRST: 0
+        RESTRICTION_FOR_SENSITIVE_CASE: 0
+        PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: 0
+        MAX_TURNS_WITHOUT_SCRIPTS: 100
+        ADD_ACKNOWLEDGMENTS_IF_POSSIBLE: 0
+        PRIORITIZE_SCRIPTED_SKILLS: 0
+        CONFIDENCE_STRENGTH: 0.8
+        CONV_EVAL_STRENGTH: 0.4
+        PRIORITIZE_HUMAN_INITIATIVE: 1
+        QUESTION_TO_QUESTION_DOWNSCORE_COEF: 0.8
+        LANGUAGE: RU
+      context: .
+      dockerfile: ./response_selectors/convers_evaluation_based_selector/Dockerfile
+    command: flask run -h 0.0.0.0 -p 8009
+    environment:
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 100M
+        reservations:
+          memory: 100M
+
+  badlisted-words-ru:
+    env_file: [ .env_ru ]
+    build:
+      args:
+        SERVICE_PORT: 8018
+        SERVICE_NAME: badlisted_words
+      context: annotators/BadlistedWordsDetector_ru/
+    command: flask run -h 0.0.0.0 -p 8018
+    environment:
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 128M
+        reservations:
+          memory: 128M
+
+  dialogrpt-ru:
+    env_file: [ .env_ru ]
+    build:
+      context: ./services/dialogrpt_ru/
+      args:
+        SERVICE_PORT: 8122
+        PRETRAINED_MODEL_FNAME: dialogrpt_ru_ckpt_v0.pth
+        TOKENIZER_NAME_OR_PATH: DeepPavlov/rudialogpt3_medium_based_on_gpt2_v2
+    command: flask run -h 0.0.0.0 -p 8122
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+        reservations:
+          memory: 4G
+
+  prompt-selector-ru:
+    env_file: [ .env_ru ]
+    build:
+      args:
+        SERVICE_PORT: 8135
+        SERVICE_NAME: prompt_selector
+        N_SENTENCES_TO_RETURN: 3
+        PROMPTS_TO_CONSIDER: dream_persona_ru
+      context: .
+      dockerfile: ./annotators/prompt_selector/Dockerfile
+    command: flask run -h 0.0.0.0 -p 8135
+    environment:
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 100M
+        reservations:
+          memory: 100M
+
+  transformers-lm-llama7bru:
+    env_file: [ .env_ru ]
+    build:
+      args:
+        SERVICE_PORT: 8149
+        SERVICE_NAME: transformers_peft_lm_llama7bru
+        PRETRAINED_MODEL_NAME_OR_PATH: IlyaGusev/llama_7b_ru_turbo_alpaca_lora
+        LANGUAGE: RU
+      context: .
+      dockerfile: ./services/transformers_peft_lm/Dockerfile
+    command: flask run -h 0.0.0.0 -p 8149
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 50G
+        reservations:
+          memory: 50G
+
+  dff-dream-persona-llama7bru-prompted-skill:
+    env_file: [ .env_ru ]
+    build:
+      args:
+        SERVICE_PORT: 8151
+        SERVICE_NAME: dff_dream_persona_ru_prompted_skill
+        PROMPT_FILE: common/prompts/dream_persona_ru.json
+        GENERATIVE_SERVICE_URL: http://transformers-lm-llama7bru:8149/respond
+        GENERATIVE_TIMEOUT: 10
+        N_UTTERANCES_CONTEXT: 3
+      context: .
+      dockerfile: ./skills/dff_template_prompted_skill/Dockerfile
+    command: gunicorn --workers=1 server:app -b 0.0.0.0:8151 --reload
+    deploy:
+      resources:
+        limits:
+          memory: 128M
+        reservations:
+          memory: 128M
+
+version: '3.7'