Skip to content

Commit

Permalink
Feat/ru llama distribution (#383)
Browse files Browse the repository at this point in the history
* feat: ru prompted dist based on llama

* feat: ru persona

* fix: versions

* fix duplicate endpoints, missing keys (#371)

* Make MTL refer to DeepPavlov 1.1 instead of my branch (#366)

* Update Dockerfile

* Update combined_classifier.json

* Update Dockerfile

* Update Dockerfile

* fix: starting openai services without dev.yml (#373)

* fix: reqs

* fix: dockerfile

* fix: pip update

* fix: reqs

* feat: using peft for gusev's model

* fix: reqs

* fix: working load

* feat: separate folder for peft transformers

* fix: revert to dev

* fix: transformers generation

* fix: use peft model

* fix: component yml

* fix: yml configs

* fix: no half precision

* fix: description

* fix: config is optional for some models

* fix: increase timeout

* fix: formatter

* fix: language

* fixed

* fix: rights

* fix: info ymls

* fix: 5sec timeout

* fix: 10sec timeout

* fix: gpu mem

* feat: ru pipeline and dockers

* feat: badlisted words ru

* feat: use fp16 for faster inference

* feat: rank sentences endpoint

* fix: endpoint func

* fix: ping pong

* fix: rannker url

* fix: prompt selector ru

* fix: env ru

* fix: sentence ranker

* fix: no-scripts selector

* fix: timeout 2.0 for ru toxic

* fix: first try for toxic model before ready

* fix: params for language

* fix: language

* fix: timoeout for dialogrpt

* fix: no use of toxic cls

* fix: revert timeout to 1sec

* feat: ru lang

* fix: ru persona

* feat: new prompt

* fix: prompt

* fix: prompt

* fix: prompt

* Update dream_persona.json

* Update dream_persona.json

* Update dream_persona_ru.json

* fix: rename distr

* fix: formatter

---------

Co-authored-by: Maxim Talimanchuk <[email protected]>
Co-authored-by: dimakarp1996 <[email protected]>
Co-authored-by: Fedor Ignatov <[email protected]>
Co-authored-by: Lidia Ostyakova <[email protected]>
  • Loading branch information
5 people authored Apr 19, 2023
1 parent bdaa9d0 commit bf07f37
Show file tree
Hide file tree
Showing 25 changed files with 904 additions and 28 deletions.
1 change: 1 addition & 0 deletions .env_ru
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ BADLIST_ANNOTATOR_URL=http://badlisted-words-ru:8018/badlisted_words_batch
DP_WIKIDATA_URL=http://wiki-parser-ru:8077/model
DP_ENTITY_LINKING_URL=http://entity-linking-ru:8075/model
FILE_SERVER_URL=http://files:3000
SENTENCE_RANKER_SERVICE_URL=http://dialogrpt-ru:8122/rank_sentences
35 changes: 19 additions & 16 deletions annotators/toxic_classification_ru/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,6 @@

logger.info(f"toxic-classification is set to run on {device}")

try:
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
model = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
model.eval()
if cuda:
model.cuda()
logger.info("toxic-classification model is ready")
except Exception as e:
sentry_sdk.capture_exception(e)
logger.exception(e)
raise e

app = Flask(__name__)
health = HealthCheck(app, "/healthcheck")
logging.getLogger("werkzeug").setLevel("WARNING")


def classify_sentences(sentences):
try:
Expand All @@ -68,6 +52,25 @@ def classify_sentences(sentences):
return result


try:
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
model = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
model.eval()
if cuda:
model.cuda()

classify_sentences(["this is a simple test sentence without any toxicity."])
logger.info("toxic-classification model is ready")
except Exception as e:
sentry_sdk.capture_exception(e)
logger.exception(e)
raise e

app = Flask(__name__)
health = HealthCheck(app, "/healthcheck")
logging.getLogger("werkzeug").setLevel("WARNING")


@app.route("/respond", methods=["POST"])
def respond():
st_time = time.time()
Expand Down
8 changes: 8 additions & 0 deletions assistant_dists/dream_persona_rullama_prompted/cpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: '3.7'
services:
dialogrpt-ru:
environment:
CUDA_VISIBLE_DEVICES: ""
transformers-lm-llama7bru:
environment:
CUDA_VISIBLE_DEVICES: ""
6 changes: 6 additions & 0 deletions assistant_dists/dream_persona_rullama_prompted/db_conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"host": "DB_HOST",
"port": "DB_PORT",
"name": "DB_NAME",
"env": true
}
46 changes: 46 additions & 0 deletions assistant_dists/dream_persona_rullama_prompted/dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# С такими volumes удобно дебажить, не нужно пересобирать контейнер каждый раз при изменении кода
services:
agent:
volumes:
- ".:/dp-agent"
ports:
- 4242:4242
no-restrictions-selector-ru:
volumes:
- "./response_selectors/convers_evaluation_based_selector:/src"
- "./common:/src/common"
ports:
- 8009:8009
badlisted-words-ru:
volumes:
- "./annotators/BadlistedWordsDetector_ru:/src"
- "./common:/src/common"
ports:
- 8018:8018
dialogrpt-ru:
volumes:
- "./services/dialogrpt_ru:/src"
- "~/.deeppavlov/cache:/root/.cache"
ports:
- 8122:8122
prompt-selector-ru:
volumes:
- "./annotators/prompt_selector:/src"
- "./common:/src/common"
ports:
- 8135:8135
transformers-lm-llama7bru:
volumes:
- "./services/transformers_peft_lm:/src"
- "./common:/src/common"
- "~/.deeppavlov/cache:/root/.cache"
ports:
- 8149:8149
dff-dream-persona-llama7bru-prompted-skill:
volumes:
- "./skills/dff_template_prompted_skill:/src"
- "./common:/src/common"
ports:
- 8151:8151

version: "3.7"
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
services:
agent:
command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_persona_rullama_prompted/pipeline_conf.json'
environment:
WAIT_HOSTS: "no-restrictions-selector-ru:8009, badlisted-words-ru:8018,
dialogrpt-ru:8122, prompt-selector-ru:8135, transformers-lm-llama7bru:8149,
dff-dream-persona-llama7bru-prompted-skill:8151"
WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1800}
HIGH_PRIORITY_INTENTS: 0
RESTRICTION_FOR_SENSITIVE_CASE: 0
ALWAYS_TURN_ON_ALL_SKILLS: 0
LANGUAGE: RU

no-restrictions-selector-ru:
env_file: [ .env ]
build:
args:
TAG_BASED_SELECTION: 1
CALL_BY_NAME_PROBABILITY: 0.5
PROMPT_PROBA: 0
ACKNOWLEDGEMENT_PROBA: 0.3
PRIORITIZE_WITH_REQUIRED_ACT: 0
PRIORITIZE_NO_DIALOG_BREAKDOWN: 0
PRIORITIZE_WITH_SAME_TOPIC_ENTITY: 0
IGNORE_DISLIKED_SKILLS: 0
GREETING_FIRST: 0
RESTRICTION_FOR_SENSITIVE_CASE: 0
PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: 0
MAX_TURNS_WITHOUT_SCRIPTS: 100
ADD_ACKNOWLEDGMENTS_IF_POSSIBLE: 0
PRIORITIZE_SCRIPTED_SKILLS: 0
CONFIDENCE_STRENGTH: 0.8
CONV_EVAL_STRENGTH: 0.4
PRIORITIZE_HUMAN_INITIATIVE: 1
QUESTION_TO_QUESTION_DOWNSCORE_COEF: 0.8
LANGUAGE: RU
context: .
dockerfile: ./response_selectors/convers_evaluation_based_selector/Dockerfile
command: flask run -h 0.0.0.0 -p 8009
environment:
- FLASK_APP=server
deploy:
resources:
limits:
memory: 100M
reservations:
memory: 100M

badlisted-words-ru:
env_file: [ .env_ru ]
build:
args:
SERVICE_PORT: 8018
SERVICE_NAME: badlisted_words
context: annotators/BadlistedWordsDetector_ru/
command: flask run -h 0.0.0.0 -p 8018
environment:
- FLASK_APP=server
deploy:
resources:
limits:
memory: 128M
reservations:
memory: 128M

dialogrpt-ru:
env_file: [ .env_ru ]
build:
context: ./services/dialogrpt_ru/
args:
SERVICE_PORT: 8122
PRETRAINED_MODEL_FNAME: dialogrpt_ru_ckpt_v0.pth
TOKENIZER_NAME_OR_PATH: DeepPavlov/rudialogpt3_medium_based_on_gpt2_v2
command: flask run -h 0.0.0.0 -p 8122
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
limits:
memory: 4G
reservations:
memory: 4G

prompt-selector-ru:
env_file: [ .env_ru ]
build:
args:
SERVICE_PORT: 8135
SERVICE_NAME: prompt_selector
N_SENTENCES_TO_RETURN: 3
PROMPTS_TO_CONSIDER: dream_persona_ru
context: .
dockerfile: ./annotators/prompt_selector/Dockerfile
command: flask run -h 0.0.0.0 -p 8135
environment:
- FLASK_APP=server
deploy:
resources:
limits:
memory: 100M
reservations:
memory: 100M

transformers-lm-llama7bru:
env_file: [ .env_ru ]
build:
args:
SERVICE_PORT: 8149
SERVICE_NAME: transformers_peft_lm_llama7bru
PRETRAINED_MODEL_NAME_OR_PATH: IlyaGusev/llama_7b_ru_turbo_alpaca_lora
LANGUAGE: RU
context: .
dockerfile: ./services/transformers_peft_lm/Dockerfile
command: flask run -h 0.0.0.0 -p 8149
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
limits:
memory: 50G
reservations:
memory: 50G

dff-dream-persona-llama7bru-prompted-skill:
env_file: [ .env_ru ]
build:
args:
SERVICE_PORT: 8151
SERVICE_NAME: dff_dream_persona_ru_prompted_skill
PROMPT_FILE: common/prompts/dream_persona_ru.json
GENERATIVE_SERVICE_URL: http://transformers-lm-llama7bru:8149/respond
GENERATIVE_TIMEOUT: 10
N_UTTERANCES_CONTEXT: 3
context: .
dockerfile: ./skills/dff_template_prompted_skill/Dockerfile
command: gunicorn --workers=1 server:app -b 0.0.0.0:8151 --reload
deploy:
resources:
limits:
memory: 128M
reservations:
memory: 128M

version: '3.7'
Loading

0 comments on commit bf07f37

Please sign in to comment.