-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
84 lines (75 loc) · 2.8 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from tokenizer import Tokenizer
from tokenizer import load_vocab
from transformers import BertModel
from transformers import BertConfig
import jieba
import json
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from transformers import load_tf_weights_in_bert
from transformers import BertForPreTraining
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report
real = [[1, 1, 1, 0], [1, 1, 1, 1]]
pred = [[1, 0, 1, 0], [1, 0, 1, 1]]
print(f1_score(y_pred=pred, y_true=real, average='macro'))
print(accuracy_score(y_pred=pred, y_true=real))
print(classification_report(y_pred=pred, y_true=real))
# config = BertConfig.from_pretrained('chinese_wobert/bert_config.json')
# config.vocab_size = 33586
# model = BertForPreTraining(config)
# init_vars = tf.train.list_variables('chinese_wobert/bert_model.ckpt')
# for name, shape in init_vars:
# print(name)
# logger.info("Loading TF weight {} with shape {}".format(name, shape))
# array = tf.train.load_variable(tf_path, name)
# names.append(name)
# arrays.append(array)
# model = load_tf_weights_in_bert(model, config, 'chinese_wobert/bert_model.ckpt')
# model.save_pretrained('pytorch_wobert')
#
# model = BertModel.from_pretrained('pytorch_wobert')
# X = np.ones((3,3))
# Y = np.array([[1, 0, 1], [0, 1, 0], [0, 0, 1]])
# sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, train_size=0.8, random_state=7)
# res = sss.split(X, Y)
# for i in res:
# print(i)
# train_X, test_X, train_y, test_y = train_test_split(X, Y, test_size=0.3)
# print(train_X, train_y)
# from bert4keras.models import build_transformer_model
# vocab = load_vocab('data/split_word/keep_vocab.txt', simplified=False)
# print(len(vocab))
# config = BertConfig.from_pretrained('chinese_wobert/bert_config.json')
# config.num_hidden_layers = 3
# model = BertModel(config=config)
# print(model)
# import jieba
# print(load_vocab('chinese_wobert/vocab.txt', simplified=True))
#
# tokenizer = Tokenizer(
# 'pytorch_wobert/vocab.txt',
# do_lower_case=True,
# pre_tokenize=lambda s: jieba.cut(s, HMM=False)
# )
#
# res = tokenizer.encode(first_text='老师好,我叫何同学', second_text='财大牛逼')
# print(res)
# print(res)
# with open('data/split_word/train.json', 'r', encoding='utf-8') as f:
# data = json.load(f)
# for d in data:
# res = tokenizer.encode_plus(d['content'])
# print(res)
# import numpy as np
# from scipy import optimize
#
# z = np.array([1, -1, -2])
# a = np.array([[-1, -3, -2], [1, 1, -1]])
# b = np.array([-12, 2])
# x1_b = x2_b = x3_b = (0, None)
# a_eq = np.array([[0, 2, 1]])
# b_eq = np.array([4])
# res = optimize.linprog(z, A_ub=-a, b_ub=-b, A_eq=a_eq, b_eq=b_eq, bounds=(x1_b, x2_b, x3_b))
# print(res)