-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* fix: add spacy annotator * fix: usage of spacy attributes * fix: test spacy annotator * fix: add params * fix: add params * fix: fix test * fix: rights on file * fix: codestyle * fix: extra f string
- Loading branch information
1 parent
531a34b
commit 40ec573
Showing
10 changed files
with
203 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
FROM python:3.8.4 | ||
|
||
ARG SRC_DIR | ||
ENV SRC_DIR ${SRC_DIR} | ||
ARG SERVICE_PORT | ||
ENV SERVICE_PORT ${SERVICE_PORT} | ||
ARG SPACY_MODEL | ||
ENV SPACY_MODEL ${SPACY_MODEL} | ||
ARG TOKEN_ATTRIBUTES | ||
ENV TOKEN_ATTRIBUTES ${TOKEN_ATTRIBUTES} | ||
|
||
RUN mkdir /src | ||
|
||
COPY $SRC_DIR /src/ | ||
COPY ./common/ /src/common/ | ||
|
||
COPY $SRC_DIR/requirements.txt /src/requirements.txt | ||
RUN pip install -r /src/requirements.txt | ||
RUN python -m spacy download ${SPACY_MODEL} | ||
|
||
WORKDIR /src | ||
|
||
CMD gunicorn --workers=2 server:app |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This is Cobot nounphrase annotator. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
flask==1.1.1 | ||
itsdangerous==2.0.1 | ||
gunicorn==20.0.4 | ||
sentry-sdk==0.13.4 | ||
requests==2.22.0 | ||
spacy==3.2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import logging | ||
import time | ||
from os import getenv | ||
|
||
import sentry_sdk | ||
import spacy | ||
from flask import Flask, request, jsonify | ||
|
||
|
||
sentry_sdk.init(getenv("SENTRY_DSN")) | ||
|
||
spacy_nlp = spacy.load(getenv("SPACY_MODEL")) | ||
TOKEN_ATTRIBUTES = getenv("TOKEN_ATTRIBUTES").split("|") | ||
|
||
logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.DEBUG) | ||
logger = logging.getLogger(__name__) | ||
|
||
app = Flask(__name__) | ||
|
||
|
||
def get_result(request): | ||
st_time = time.time() | ||
sentences = request.json["sentences"] | ||
result = [] | ||
|
||
for uttr in sentences: | ||
doc = spacy_nlp(uttr) | ||
curr_tokens = [] | ||
for token in doc: | ||
curr_token = {"text": token.text} | ||
for attr in TOKEN_ATTRIBUTES: | ||
curr_token[attr] = str(getattr(token, attr)) | ||
curr_tokens += [curr_token] | ||
result += [curr_tokens] | ||
total_time = time.time() - st_time | ||
logger.info(f"spacy_annotator exec time: {total_time:.3f}s") | ||
return result | ||
|
||
|
||
@app.route("/respond", methods=["POST"]) | ||
def respond(): | ||
result = get_result(request) | ||
return jsonify(result) | ||
|
||
|
||
@app.route("/respond_batch", methods=["POST"]) | ||
def respond_batch(): | ||
result = get_result(request) | ||
return jsonify([{"batch": result}]) | ||
|
||
|
||
if __name__ == "__main__": | ||
app.run(debug=False, host="0.0.0.0", port=3000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import os | ||
import requests | ||
|
||
|
||
SERVICE_PORT = int(os.getenv("SERVICE_PORT")) | ||
|
||
|
||
def main(): | ||
url = f"http://0.0.0.0:{SERVICE_PORT}/respond" | ||
input_data = {"sentences": ["джейсон стетхэм хочет есть."]} | ||
gold = [ | ||
[ | ||
{ | ||
"dep_": "nsubj", | ||
"ent_iob_": "B", | ||
"ent_type_": "PER", | ||
"lemma_": "джейсон", | ||
"morph": "Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing", | ||
"pos_": "PROPN", | ||
"text": "джейсон", | ||
}, | ||
{ | ||
"dep_": "appos", | ||
"ent_iob_": "I", | ||
"ent_type_": "PER", | ||
"lemma_": "стетхэм", | ||
"morph": "Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing", | ||
"pos_": "PROPN", | ||
"text": "стетхэм", | ||
}, | ||
{ | ||
"dep_": "ROOT", | ||
"ent_iob_": "O", | ||
"ent_type_": "", | ||
"lemma_": "хотеть", | ||
"morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=Third|Tense=Pres|VerbForm=Fin|Voice=Act", | ||
"pos_": "VERB", | ||
"text": "хочет", | ||
}, | ||
{ | ||
"dep_": "xcomp", | ||
"ent_iob_": "O", | ||
"ent_type_": "", | ||
"lemma_": "есть", | ||
"morph": "Aspect=Imp|VerbForm=Inf|Voice=Act", | ||
"pos_": "VERB", | ||
"text": "есть", | ||
}, | ||
{ | ||
"dep_": "punct", | ||
"ent_iob_": "O", | ||
"ent_type_": "", | ||
"lemma_": ".", | ||
"morph": "", | ||
"pos_": "PUNCT", | ||
"text": ".", | ||
}, | ||
] | ||
] | ||
|
||
result = requests.post(url, json=input_data).json() | ||
assert result == gold, print(result) | ||
print("Success!") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
python test.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters