-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·96 lines (63 loc) · 2.18 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from pprint import pprint
from nlptext.base import BasicObject
########### Wiki ###########
# CORPUSPath = 'corpus/WikiEnglish/'
# Corpus2GroupMethod = '.txt'
# Group2TextMethod = 'line'
# Text2SentMethod = 'whole' # this caused the problem
# Sent2TokenMethod = 'pos_en' # ' '
# TOKENLevel = 'word'
# min_token_freq = 10
# use_hyper = ['pos_en'] # ' '
# anno = False
# anno_keywords = {}
# BasicObject.INIT(CORPUSPath,
# Corpus2GroupMethod,
# Group2TextMethod,
# Text2SentMethod,
# Sent2TokenMethod, TOKENLevel, min_token_freq = min_token_freq,
# use_hyper = use_hyper,
# anno = False, anno_keywords = anno_keywords)
# from nlptext.base import BasicObject
# ########### Wiki ###########
# CORPUSPath = 'corpus/WikiChinese/'
# Corpus2GroupMethod = '.txt'
# Group2TextMethod = 'line'
# Text2SentMethod = 'whole'
# Sent2TokenMethod = 'iter'
# TOKENLevel = 'char'
# min_token_freq = 1
# use_hyper = {'pos'}
# anno = False
# anno_keywords = {}
# BasicObject.INIT(CORPUSPath,
# Corpus2GroupMethod,
# Group2TextMethod,
# Text2SentMethod,
# Sent2TokenMethod, TOKENLevel, min_token_freq = min_token_freq,
# use_hyper = use_hyper,
# anno = False, anno_keywords = anno_keywords)
from pprint import pprint
from nlptext.base import BasicObject
CORPUSPath = 'corpus/LuohuNER750Neat/'
Corpus2GroupMethod = 'Dir' # TODO
Group2TextMethod = 'file'
Text2SentMethod = 're'
Sent2TokenMethod = 'iter'
TOKENLevel = 'char'
min_token_freq = 1
use_hyper = {'pos', 'medpos'}
anno = 'annofile4text'
anno_keywords = {
'ANNOIden': '.NER',
'anno_sep' : '\t',
'notZeroIndex' : 0, # if 0, indexed from zero
'notRightOpen' : 0, # if 0, Right is Open, is 1 not Open
}
BasicObject.INIT(CORPUSPath,
Corpus2GroupMethod,
Group2TextMethod,
Text2SentMethod,
Sent2TokenMethod, TOKENLevel, min_token_freq = min_token_freq,
use_hyper = use_hyper,
anno = anno, anno_keywords = anno_keywords)