forked from explosion/spacy-course
-
Notifications
You must be signed in to change notification settings - Fork 0
/
solution_03_07.py
29 lines (24 loc) · 1.02 KB
/
solution_03_07.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import spacy
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
nlp = spacy.load("en_core_web_sm")
animals = ["Golden Retriever", "cat", "turtle", "Rattus norvegicus"]
animal_patterns = list(nlp.pipe(animals))
print("animal_patterns:", animal_patterns)
matcher = PhraseMatcher(nlp.vocab)
matcher.add("ANIMAL", None, *animal_patterns)
# カスタムコンポーネントを定義
def animal_component(doc):
# matcherをdocに適用
matches = matcher(doc)
# マッチした結果に対してSpanを作り、"ANIMAL"のラベルを付ける
spans = [Span(doc, start, end, label="ANIMAL") for match_id, start, end in matches]
# doc.entsにマッチ結果のスパンを追加
doc.ents = spans
return doc
# 「ner」コンポーネントのあとに追加
nlp.add_pipe(animal_component, after="ner")
print(nlp.pipe_names)
# テキストを処理し、doc.entsの文字列とラベルをプリント
doc = nlp("I have a cat and a Golden Retriever")
print([(ent.text, ent.label_) for ent in doc.ents])