-
Notifications
You must be signed in to change notification settings - Fork 7
/
hanlp.properties
executable file
·38 lines (38 loc) · 2.59 KB
/
hanlp.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#本配置文件中的路径的根目录,根目录+其他路径=完整路径(支持相对路径,请参考:https://github.com/hankcs/HanLP/pull/254)
#Windows用户请注意,路径分隔符统一使用/
root = ${data root}
#核心词典路径
CoreDictionaryPath = data/dictionary/CoreNatureDictionary.txt
#2元语法词典路径
BiGramDictionaryPath = data/dictionary/CoreNatureDictionary.ngram.txt
#停用词词典路径
CoreStopWordDictionaryPath = data/dictionary/stopwords.txt
#同义词词典路径
CoreSynonymDictionaryDictionaryPath = data/dictionary/synonym/CoreSynonym.txt
#人名词典路径
PersonDictionaryPath = data/dictionary/person/nr.txt
#人名词典转移矩阵路径
PersonDictionaryTrPath = data/dictionary/person/nr.tr.txt
#繁简词典根目录
tcDictionaryRoot = data/dictionary/tc
#自定义词典路径,用;隔开多个自定义词典,空格开头表示在同一个目录,使用“文件名 词性”形式则表示这个词典的词性默认是该词性。优先级递减。
#另外data/dictionary/custom/CustomDictionary.txt是个高质量的词库,请不要删除。所有词典统一使用UTF-8编码。
CustomDictionaryPath = data/dictionary/custom/CustomDictionary.txt; 现代汉语补充词库.txt; 全国地名大全.txt ns; 人名词典.txt; 机构名词典.txt; 上海地名.txt ns;data/dictionary/person/nrf.txt nrf;
#CRF分词模型路径
CRFSegmentModelPath = data/model/segment/CRFSegmentModel.txt
#HMM分词模型
HMMSegmentModelPath = data/model/segment/HMMSegmentModel.bin
#分词结果是否展示词性
ShowTermNature = true
#IO适配器,实现com.hankcs.hanlp.corpus.io.IIOAdapter接口以在不同的平台(Hadoop、Redis等)上运行HanLP
#默认的IO适配器如下,该适配器是基于普通文件系统的。
#IOAdapter=com.hankcs.hanlp.corpus.io.FileIOAdapter
#感知机词法分析器
PerceptronCWSModelPath = data/model/perceptron/pku199801/cws.bin
PerceptronPOSModelPath = data/model/perceptron/pku199801/pos.bin
PerceptronNERModelPath = data/model/perceptron/pku199801/ner.bin
#CRF词法分析器
CRFCWSModelPath = data/model/crf/pku199801/cws.bin
CRFPOSModelPath = data/model/crf/pku199801/pos.bin
CRFNERModelPath = data/model/crf/pku199801/ner.bin
#更多配置项请参考 https://github.com/hankcs/HanLP/blob/master/src/main/java/com/hankcs/hanlp/HanLP.java#L59 自行添加