-
Notifications
You must be signed in to change notification settings - Fork 0
/
spacy_7_combiningModelsAndRules.py
45 lines (33 loc) · 1.44 KB
/
spacy_7_combiningModelsAndRules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# -*- coding: utf-8 -*-
"""
Created on Fri May 24 2019
@author: Stacy Bridges
Statistical Models Rules-Based Systems
------------------------------- -------------------------------
Use cases: application needs to generalize dictionary with finite
based on examples number of examples
Examples: product names, person names, countries of the world,
subject/object relationships cities, drug names, dog breeds
spaCy: entity recognizer, tokenizer, Matcher,
dependency parser, phraseMatcher
pos tagger
"""
import spacy
# import the Doc and Span classes from spaCy tokens
# from spacy.tokens import Doc, Span
from spacy.matcher import Matcher
def main():
nlp = spacy.load('en_core_web_md')
# recap of Rules-Based Matching
# Initialize with the shared vocab
matcher = Matcher(nlp.vocab)
# Patterns are lists of dictionaries describing the tokens
pattern = [{'LEMMA': 'love', 'POS': 'VERB'}, {'LOWER': 'cats'}]
matcher.add('LOVE_CATS', None, pattern)
# Operators can specify how often a token should be matched
pattern = [{'TEXT': 'very', 'OP': '+'}, {'TEXT': 'happy'}]
# Calling matcher on doc returns list of (match_id, start, end) tuples
doc = nlp("I love cats and I'm very very happy")
matches = matcher(doc)
print(matches)
if __name__ == '__main__' : main()