Apache OpenNLP使用


import java.io.*;

import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;




public class Testing_openNLP {
	
	
	/* http://opennlp.apache.org/documentation/1.5.3/manual/opennlp.html   官方教程Apache OpenNLP Developer Documentation
	 * openNLP 中的各种模型可以在   http://opennlp.sourceforge.net/models-1.5/   下载
	 * http://www.programcreek.com/2012/05/opennlp-tutorial/    this is good tutorial about openNLP tools
	 * 
	 * */

	public static void main(String[] args) {
//		String testString = "This isn't the greatest example sentence in the world because I've seen better.  Neither is this one.  This one's not bad, though.";
		String testString = "Hi. How are you?  This is      &3 $444 Mike."	;
		
		String tokens[] = Token(testString);
		String sentences[] = sentenceSegmentation(testString);
		String aa = "";

	}
	
	//分句
	public static String[] sentenceSegmentation(String str){ 
		try {
			InputStream modelIn = new FileInputStream("en-sent.bin");
			SentenceModel model = null;
		try {
		   model = new SentenceModel(modelIn);
		}
		catch (IOException e) {
		  e.printStackTrace();
		}
		finally {
		  if (modelIn != null) {
		    try {
		      modelIn.close();
		    }
		    catch (IOException e) {
		    }
		  }
		}
		
		SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
		String sentences[] = sentenceDetector.sentDetect(str);
		return sentences;
		
		} catch (FileNotFoundException e1) {
			e1.printStackTrace();
			return null;
		}
	}
	
	
	//分词
	public static String[] Token(String str){ 
		try{ 
		InputStream modelIn = new FileInputStream("en-token.bin"); 
		TokenizerModel model = null; 
		try { 
		  model = new TokenizerModel(modelIn); 
		} 
		catch (IOException e) { 
		  e.printStackTrace(); 
		} 
		finally { 
		  if (modelIn != null) { 
		    try { 
		      modelIn.close(); 
		    } 
		    catch (IOException e) { 
		    } 
		  } 
		} 
		
		TokenizerME tokenizer = new TokenizerME(model); 
		String tokens[] = tokenizer.tokenize(str); 
//		double tokenProbs[] = tokenizer.getTokenProbabilities();//must be called directly after one of the tokenize methods was called.
		return tokens; 
		} 
		catch(FileNotFoundException e){return null;} 
	} 

}



猜你喜欢

转载自blog.csdn.net/dongweionly/article/details/45038213