-
Notifications
You must be signed in to change notification settings - Fork 0
/
spacy_learn.py
32 lines (22 loc) · 951 Bytes
/
spacy_learn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import spacy
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
nlp = spacy.load("en_core_web_sm")
class EntityMatcher(object):
name = "entity_matcher"
def __init__(self, nlp, terms, label):
patterns = [nlp.make_doc(text) for text in terms]
self.matcher = PhraseMatcher(nlp.vocab)
self.matcher.add(label, None, *patterns)
def __call__(self, doc):
matches = self.matcher(doc)
for match_id, start, end in matches:
span = Span(doc, start, end, label=match_id)
doc.ents = list(doc.ents) + [span]
return doc
terms = (u"cat", u"dog", u"tree kangaroo", u"giant sea spider")
entity_matcher = EntityMatcher(nlp, terms, "ANIMAL")
nlp.add_pipe(entity_matcher, after="ner")
print(nlp.pipe_names) # The components in the pipeline
doc = nlp(u"This is a text about Barack Obama and a tree kangaroo")
print([(ent.text, ent.label_) for ent in doc.ents])