Skip to content

Commit

Permalink
词法分析器seg接口支持自定义词性覆盖统计词性 fix #1156
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Apr 20, 2019
1 parent 6888837 commit 259825c
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,16 @@ protected List<Term> segSentence(char[] sentence)
{
childrenList.add(iterator.next());
}
if (attributeList != null)
{
attributeIterator = attributeList.iterator();
for (int i = 0; i < wordArray.length; i++)
{
CoreDictionary.Attribute attribute = attributeIterator.next();
if (attribute != null)
posArray[i] = attribute.nature[0].toString();
}
}
String prePos = posArray[0];
offset = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.model.crf.CRFLexicalAnalyzer;
import com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import junit.framework.TestCase;

import java.io.IOException;
import java.util.List;

public class AbstractLexicalAnalyzerTest extends TestCase
Expand Down Expand Up @@ -34,4 +36,13 @@ public void testCustomDictionary() throws Exception
CustomDictionary.add("攻城狮");
System.out.println(analyzer.segment(text));
}

public void testOverwriteTag() throws IOException
{
CRFLexicalAnalyzer analyzer = new CRFLexicalAnalyzer();
String text = "强行修改词性";
System.out.println(analyzer.seg(text));
CustomDictionary.add("修改", "自定义词性");
System.out.println(analyzer.seg(text));
}
}

0 comments on commit 259825c

Please sign in to comment.