-
Notifications
You must be signed in to change notification settings - Fork 0
/
NerModel.py
61 lines (44 loc) · 1.99 KB
/
NerModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import preprocessing
# 개체명 인식 모델 모듈
class NerModel:
def __init__(self, model_name, proprocess):
# BIO 태그 클래스 별 레이블
self.index_to_ner = {1: 'O', 2: 'B_DT', 3: 'B_DEST', 4: 'I', 5: 'B_OG', 6: 'B_PS', 7: 'B_LC', 8: 'NNP', 9: 'B_TI', 0: 'PAD'}
# 의도 분류 모델 불러오기
self.model = load_model(model_name)
# 챗봇 Preprocess 객체
self.p = proprocess
# 개체명 클래스 예측
def predict(self, query):
# 형태소 분석
pos = self.p.pos(query)
# 문장내 키워드 추출(불용어 제거)
keywords = self.p.get_keywords(pos, without_tag=True)
sequences = [self.p.get_wordidx_sequence(keywords)]
# 패딩처리
max_len = 40
padded_seqs = preprocessing.sequence.pad_sequences(sequences, padding="post", value=0, maxlen=max_len)
predict = self.model.predict(np.array([padded_seqs[0]]))
predict_class = tf.math.argmax(predict, axis=-1)
tags = [self.index_to_ner[i] for i in predict_class.numpy()[0]]
return list(zip(keywords, tags))
def predict_tags(self, query):
# 형태소 분석
pos = self.p.pos(query)
# 문장내 키워드 추출(불용어 제거)
keywords = self.p.get_keywords(pos, without_tag=True)
sequences = [self.p.get_wordidx_sequence(keywords)]
# 패딩처리
max_len = 40
padded_seqs = preprocessing.sequence.pad_sequences(sequences, padding="post", value=0, maxlen=max_len)
predict = self.model.predict(np.array([padded_seqs[0]]))
predict_class = tf.math.argmax(predict, axis=-1)
tags = []
for tag_idx in predict_class.numpy()[0]:
if tag_idx == 1: continue
tags.append(self.index_to_ner[tag_idx])
if len(tags) == 0: return None
return tags