From 1969ebde69f3d813fe25d5f90e5fcc0d86f2584a Mon Sep 17 00:00:00 2001 From: hankcs Date: Thu, 16 Nov 2017 22:53:55 -0600 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96CommonDictionary=E7=9A=84?= =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E9=80=9F=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../collection/trie/DoubleArrayTrie.java | 26 +++++++++++++++++++ .../dictionary/common/CommonDictionary.java | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java b/src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java index 70639b1a7..771f77de3 100644 --- a/src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java +++ b/src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java @@ -535,6 +535,32 @@ public boolean load(ByteArray byteArray, V[] value) return true; } + /** + * 从字节数组加载(发现在MacOS上,此方法比ByteArray更快) + * @param bytes + * @param offset + * @param value + * @return + */ + public boolean load(byte[] bytes, int offset, V[] value) + { + if (bytes == null) return false; + size = ByteUtil.bytesHighFirstToInt(bytes, offset); + offset += 4; + base = new int[size + 65535]; // 多留一些,防止越界 + check = new int[size + 65535]; + for (int i = 0; i < size; i++) + { + base[i] = ByteUtil.bytesHighFirstToInt(bytes, offset); + offset += 4; + check[i] = ByteUtil.bytesHighFirstToInt(bytes, offset); + offset += 4; + } + v = value; + used = null; // 无用的对象,释放掉 + return true; + } + /** * 载入双数组,但是不提供值,此时本trie相当于一个set * diff --git a/src/main/java/com/hankcs/hanlp/dictionary/common/CommonDictionary.java b/src/main/java/com/hankcs/hanlp/dictionary/common/CommonDictionary.java index 46478e177..c9b7b24a5 100644 --- a/src/main/java/com/hankcs/hanlp/dictionary/common/CommonDictionary.java +++ b/src/main/java/com/hankcs/hanlp/dictionary/common/CommonDictionary.java @@ -105,7 +105,7 @@ protected boolean loadDat(ByteArray byteArray) { return false; } - return trie.load(byteArray, valueArray); + return trie.load(byteArray.getBytes(), byteArray.getOffset(), valueArray); } /**