3) 第一章 初识Lucene:一个简单的实例

还是看代码来的直接:

1. 索引

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

/**
 * Lucene索引类示例
 * 代码摘自 Lucene In Action 2nd Edition
 */
public class Indexer {

	public static void main(String[] args) throws Exception {
		if (args.length != 2) {
			throw new Exception("Usage: java " + Indexer.class.getName()
					+ " <index dir> <data dir>");
		}
		// 1 Lucene索引文件存放的位置
		String indexDir = args[0]; 
		// 2 索引此目录下的 *.txt 文件
		String dataDir = args[1];  
		
		long start = System.currentTimeMillis();
		Indexer indexer = new Indexer(indexDir);
		int numIndexed = indexer.index(dataDir);
		indexer.close();
		long end = System.currentTimeMillis();
		System.out.println("Indexing " + numIndexed + " files took "
				+ (end - start) + " milliseconds");
	}

	private IndexWriter writer;

	public Indexer(String indexDir) throws IOException {
		Directory dir = new SimpleFSDirectory(new File(indexDir), null);
		// 3 创建Lucene的 IndexWriter 类
		writer = new IndexWriter( dir, 
				new StandardAnalyzer(Version.LUCENE_30), true,
				IndexWriter.MaxFieldLength.UNLIMITED);
	}

	public void close() throws IOException {
		// 4 关闭IndexWriter
		writer.close(); 
	}

	public int index(String dataDir) throws Exception {
		File[] files = new File(dataDir).listFiles();
		for (int i = 0; i < files.length; i++) {
			File f = files[i];
			if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
					&& acceptFile(f)) {
				indexFile(f);
			}
		}
		// 5 返回被索引的文档的数量
		return writer.numDocs(); 
	}

	protected boolean acceptFile(File f) {
		// 6 仅索引 .txt 文件类型
		return f.getName().endsWith(".txt");
	}

	protected Document getDocument(File f) throws Exception {
		Document doc = new Document();
		// 7 索引文件的文本内容
		doc.add(new Field("contents", new FileReader(f))); 
		// 8 索引文件路径
		doc.add(new Field("filename", f.getCanonicalPath(), 
				Field.Store.YES, Field.Index.NOT_ANALYZED));
		return doc;
	}

	private void indexFile(File f) throws Exception {
		System.out.println("Indexing " + f.getCanonicalPath());
		Document doc = getDocument(f);
		if (doc != null) {
			// 9 将document添加到索引中
			writer.addDocument(doc); 
		}
	}
}

2. 搜索

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

/**
 * Lucene搜索类示例
 * 代码摘自 Lucene In Action 2nd Edition
 */
public class Searcher {
	
	public static void main(String[] args) throws Exception {
		if (args.length != 2) {
			throw new Exception("Usage: java " + Searcher.class.getName() + " <index dir> <query>");
		}
		//1 由Indexer类创建的索引文件的位置
		String indexDir = args[0]; 
		//2 查询条件
		String q = args[1];
		search(indexDir, q);
	}
	
	public static void search(String indexDir, String q) throws Exception {
		
		Directory dir = new SimpleFSDirectory(new File(indexDir), null);
		//3 打开索引文件
		IndexSearcher is = new IndexSearcher(dir); 
		
		//4 分析查询条件
		QueryParser parser = new QueryParser(
				Version.LUCENE_30,
				"contents",
				new StandardAnalyzer(Version.LUCENE_30));
		Query query = parser.parse(q);
		
		long start = System.currentTimeMillis();
		//5 搜索索引 TopDocs对象仅保存对底层文档的引用,第7步才实际加载
		TopDocs hits = is.search(query, 10);
		long end = System.currentTimeMillis();
		
		//6 输出一些基本查询信息
		System.err.println(
				"Found " + hits.totalHits + " document(s) (in " + 
				(end - start) + " milliseconds) that matched query '" +
				q + "':"); 
		
		for(int i=0;i<hits.scoreDocs.length;i++) {
			ScoreDoc scoreDoc = hits.scoreDocs[i];
			//7 检索符合的文档
			Document doc = is.doc(scoreDoc.doc); 
			//8 输出符合的文件名
			System.out.println(doc.get("filename"));
		}
		//9 关闭索引
		is.close();
	}
}
 

猜你喜欢

转载自bun-ny.iteye.com/blog/1064610