IKAnalyzer分词器

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;

import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;


public class TestJeAnalyzer {
            private static String testString1 = "中国人是最美丽的,钓鱼岛是中国的地盘";
      
            public static Map<String, String> segMore(String text) {
              Map<String, String> map = new HashMap<>();
              map.put("智能切分", segText(text, true));
              map.put("细粒度切分", segText(text, false));
              return map;
            }
            private static String segText(String text, boolean useSmart) {
              StringBuilder result = new StringBuilder();
              IKSegmenter ik = new IKSegmenter(new StringReader(text), useSmart);       
              try {
                Lexeme word = null;
                while((word=ik.next())!=null) {       
                  result.append(word.getLexemeText()).append(" ");
                }
              } catch (IOException ex) {
                throw new RuntimeException(ex);
              }
              return result.toString();
            } 
            
            
            public static void main(String[] args) throws Exception{
            // String testString = testString1;
            String testString = testString1;
            System.out.println(segMore(testString));
            
         
}

}

对应的JAR包在附件中下载。

猜你喜欢

转载自5keit.iteye.com/blog/2316323