使用 mmseg4j 1.9.1 使用的是 lucene 4.3 没问题。
package com.artbulb.search.utils; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import com.chenlb.mmseg4j.analysis.MaxWordAnalyzer; public class Test { public static void main(String[] args)throws Exception { //下面这个分词器,是经过修改支持同义词的分词器 //Analyzer analyzer = new ComplexAnalyzer(); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); // Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_47); Analyzer analyzer = new MaxWordAnalyzer(); String text="goods_zone_bate"; TokenStream ts=analyzer.tokenStream("field", new StringReader(text)); CharTermAttribute term=ts.addAttribute(CharTermAttribute.class); ts.reset();//重置做准备 while(ts.incrementToken()){ System.out.println(term.toString()); } ts.end();// ts.close();//关闭流 } }
如果使用lucene 4.7 需要注释 ts.reset();
package com.artbulb.search.utils; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import com.chenlb.mmseg4j.analysis.MaxWordAnalyzer; public class Test { public static void main(String[] args)throws Exception { //下面这个分词器,是经过修改支持同义词的分词器 //Analyzer analyzer = new ComplexAnalyzer(); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); // Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_47); Analyzer analyzer = new MaxWordAnalyzer(); String text="goods_zone_bate"; TokenStream ts=analyzer.tokenStream("field", new StringReader(text)); CharTermAttribute term=ts.addAttribute(CharTermAttribute.class); // ts.reset();//重置做准备 while(ts.incrementToken()){ System.out.println(term.toString()); } ts.end();// ts.close();//关闭流 } }