From a9997d8d80b2c18874468c230f9f6b27198d8738 Mon Sep 17 00:00:00 2001 From: tiandi Date: Tue, 24 Aug 2021 17:18:28 +0800 Subject: [PATCH] =?UTF-8?q?DoubleArrayTrie=E9=87=8C=E7=9A=84LongestSearche?= =?UTF-8?q?r=E7=9A=84next=E6=96=B9=E6=B3=95=E9=9C=80=E8=A6=81=E8=BF=9B?= =?UTF-8?q?=E8=A1=8C=E5=BC=BA=E5=8C=96=EF=BC=8C=E5=BD=93=E4=BC=A0=E5=85=A5?= =?UTF-8?q?=E7=9A=84treemap=E7=9A=84value=E4=B8=BAnull=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E4=BC=9A=E5=BC=95=E5=8F=91bug=EF=BC=8C=E5=8F=AF=E4=BB=A5?= =?UTF-8?q?=E6=A0=B9=E6=8D=AEindex=E6=88=96=E8=80=85length=E5=AD=97?= =?UTF-8?q?=E6=AE=B5=E5=88=A4=E6=96=AD=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../collection/trie/DoubleArrayTrie.java | 6 ++-- .../collection/trie/DoubleArrayTrieTest.java | 31 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java b/src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java index a5bba4d6c..e0f9b7c45 100644 --- a/src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java +++ b/src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java @@ -1299,7 +1299,7 @@ public LongestSearcher(int offset, char[] charArray) */ public boolean next() { - value = null; + length = 0; begin = i; int b = base[0]; int n; @@ -1309,7 +1309,7 @@ public boolean next() { if (i >= arrayLength) // 指针到头了,将起点往前挪一个,重新开始,状态归零 { - return value != null; + return length > 0; } p = b + (int) (charArray[i]) + 1; // 状态转移 p = base[char[i-1]] + char[i] + 1 if (b == check[p]) // base[char[i-1]] == check[base[char[i-1]] + char[i] + 1] @@ -1317,7 +1317,7 @@ public boolean next() else { if (begin == arrayLength) break; - if (value != null) + if (length > 0) { i = begin + length; // 输出最长词后,从该词语的下一个位置恢复扫描 return true; diff --git a/src/test/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrieTest.java b/src/test/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrieTest.java index ed9131e56..c8219fe7a 100644 --- a/src/test/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrieTest.java +++ b/src/test/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrieTest.java @@ -54,6 +54,37 @@ public void testLongestSearcher() throws Exception } } + public void testLongestSearcherWithNullValue() { + TreeMap buildFrom = new TreeMap(); + TreeMap buildFromValueNull = new TreeMap(); + String[] keys = new String[]{"he", "her", "his"}; + for (String key : keys) { + buildFrom.put(key, key); + buildFromValueNull.put(key, null); + } + DoubleArrayTrie trie = new DoubleArrayTrie(buildFrom); + DoubleArrayTrie trieValueNull = new DoubleArrayTrie(buildFromValueNull); + + String text = "her3he6his-hers! "; + + DoubleArrayTrie.LongestSearcher searcher = trie.getLongestSearcher(text.toCharArray(), 0); + DoubleArrayTrie.LongestSearcher searcherValueNull = trieValueNull.getLongestSearcher(text.toCharArray(), 0); + + while (true) { + boolean next = searcher.next(); + boolean nextValueNull = searcherValueNull.next(); + + if (next && nextValueNull) { + assertTrue(searcher.begin == searcherValueNull.begin && searcher.length == searcherValueNull.length); + } else if (next || nextValueNull) { + assert false; + break; + } else { + break; + } + } + } + public void testTransmit() throws Exception { DoubleArrayTrie dat = CustomDictionary.DEFAULT.dat;