-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathpolarity_predict.py
75 lines (58 loc) · 2.25 KB
/
polarity_predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'ZhangYi'
import os
from sklearn.externals import joblib
from utils.utils import delimiter
from utils.grammar import chinese_only
from utils.data_process import seg_words, gen_text_vec
class PolarityClassifier(object):
"""
text classification
"""
def __init__(self):
# config
model_name = 'polarity_doc' # doc-based
# path
path_delimiter = delimiter()
if 'absa' in os.path.abspath('.').split(path_delimiter):
path_absa = os.path.abspath('.')
else:
# 被调用路径=path_comment
path_absa = os.path.abspath('.') + path_delimiter + 'train' \
+ path_delimiter + 'sentiment' + path_delimiter + 'absa'
# model path
path_model_dir = path_absa + path_delimiter + 'model'
# load tokenizer
path_tokenizer = path_model_dir + path_delimiter + '{}.tk'.format(model_name)
self.tokenizer = joblib.load(path_tokenizer)
# load model
path_model = path_model_dir + path_delimiter + '{}.mdl'.format(model_name)
self.model = joblib.load(path_model)
self.model._make_predict_function()
def predict(self, comment):
# 1. chinese only
cmt = chinese_only([comment])
# 2. jieba token
cmt = seg_words(cmt)[0]
# 3. gen word vector
_cmt = gen_text_vec(self.tokenizer, cmt, maxlen = 200)
# token observation
# split_tokens = []
# for token in str(_cmt).split(" "):
# if token.isdigit():
# split_tokens.append(token)
# print("len(split_tokens):{}".format(len(split_tokens)))
# 4. predict
neg_prob = self.model.predict(_cmt)[0][0]
# neg_prob = (neg_prob > 0.5)
# 5. json result output
result = {'items':[{'negative_prob': 0,'sentiment': 0}], 'log_id': '', 'text': ''}
result['items'][0]['negative_prob'] = neg_prob
result['items'][0]['sentiment'] = int(round(neg_prob)) # 1表示差评;0表示好评
result['text'] = comment
print("SENTIMENT RESULT: ",result)
return result
if __name__=="__main__":
t = PolarityClassifier()
t.predict('这块电池好看')