例子:
import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.wltea.analyzer.lucene.IKAnalyzer; public class IkAnalyzerTest { public static void main(String[] args) { // String keyWord = // "IKAnalyzer的分词效果到底怎么样呢,我们来看一下吧IKAnalyzer的分词效果到底怎么样呢,我们来看一下吧我们我们我们我们我们我们我们我们"; String keyWord = "排骨猪肉的write,猪 牛,肉猪肉的,猪 牛,肉"; // String keyWord = ""; // 创建IKAnalyzer中文分词对象 IKAnalyzer analyzer = new IKAnalyzer(); // 使用智能分词 analyzer.setUseSmart(true); // 打印分词结果 try { printAnalysisResult(analyzer, keyWord); } catch (Exception e) { e.printStackTrace(); } } /** * 打印出给定分词器的分词结果 * * @param analyzer分词器 * @param keyWord关键词 * @throws Exception */ private static String[] printAnalysisResult(Analyzer analyzer, String keyWord) throws Exception { System.out.println("[" + keyWord + "]分词效果如下"); String logString = "GetKeyWordArray getKeyWordArray "; String[] returnMsgArray = null; String returnMsgTemp = ""; TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(keyWord)); tokenStream.addAttribute(CharTermAttribute.class); try { while (tokenStream.incrementToken()) { CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); if (charTermAttribute != null) { System.out.println(logString + "charTermAttribute ====== " + charTermAttribute.toString()); if (charTermAttribute.toString() != null) { returnMsgTemp += charTermAttribute.toString(); returnMsgTemp += ","; } else { System.out.println(logString + "charTermAttribute.toString() is null"); } } else { System.out.println(logString + "charTermAttribute is null"); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println(logString + "returnMsgTemp == " + returnMsgTemp); returnMsgArray = returnMsgTemp.split(","); if (returnMsgArray == null) { System.out.println(logString + "returnMsgArray is null"); return null; } System.out.println(logString + "returnMsgArray len == " + returnMsgArray.length); // 去掉数组中重复元素 List<String> list = new ArrayList<String>(); for (int i = 0; i < returnMsgArray.length; i++) { if (!list.contains(returnMsgArray[i])) {// 如果数组 list不包含当前项,则增加该项到数组中 if (returnMsgArray[i].equals("")) { System.out.println(logString + "returnMsgArray[" + i + "].equals(\"\")"); continue; } list.add(returnMsgArray[i]); } } String[] newStr = list.toArray(new String[1]); if (newStr == null) { System.out.println(logString + "newStr is null"); return null; } returnMsgTemp = ""; System.out.println(logString + "newStr.length ==" + newStr.length); for (int i = 0; i < newStr.length; i++) { if (newStr[i] == null) { System.out.println(logString + "newStr[" + i + "] is null"); } returnMsgTemp = returnMsgTemp + "[" + newStr[i] + "]"; } System.out.println(logString + "returnMsgArray ==" + returnMsgTemp); System.out.println(logString + "newStr ==" + newStr.toString()); return newStr; } }
jar包下载:附件