还是看代码来的直接:
1. 索引
import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; /** * Lucene索引类示例 * 代码摘自 Lucene In Action 2nd Edition */ public class Indexer { public static void main(String[] args) throws Exception { if (args.length != 2) { throw new Exception("Usage: java " + Indexer.class.getName() + " <index dir> <data dir>"); } // 1 Lucene索引文件存放的位置 String indexDir = args[0]; // 2 索引此目录下的 *.txt 文件 String dataDir = args[1]; long start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed = indexer.index(dataDir); indexer.close(); long end = System.currentTimeMillis(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } private IndexWriter writer; public Indexer(String indexDir) throws IOException { Directory dir = new SimpleFSDirectory(new File(indexDir), null); // 3 创建Lucene的 IndexWriter 类 writer = new IndexWriter( dir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); } public void close() throws IOException { // 4 关闭IndexWriter writer.close(); } public int index(String dataDir) throws Exception { File[] files = new File(dataDir).listFiles(); for (int i = 0; i < files.length; i++) { File f = files[i]; if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && acceptFile(f)) { indexFile(f); } } // 5 返回被索引的文档的数量 return writer.numDocs(); } protected boolean acceptFile(File f) { // 6 仅索引 .txt 文件类型 return f.getName().endsWith(".txt"); } protected Document getDocument(File f) throws Exception { Document doc = new Document(); // 7 索引文件的文本内容 doc.add(new Field("contents", new FileReader(f))); // 8 索引文件路径 doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); return doc; } private void indexFile(File f) throws Exception { System.out.println("Indexing " + f.getCanonicalPath()); Document doc = getDocument(f); if (doc != null) { // 9 将document添加到索引中 writer.addDocument(doc); } } }
2. 搜索
import java.io.File; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; /** * Lucene搜索类示例 * 代码摘自 Lucene In Action 2nd Edition */ public class Searcher { public static void main(String[] args) throws Exception { if (args.length != 2) { throw new Exception("Usage: java " + Searcher.class.getName() + " <index dir> <query>"); } //1 由Indexer类创建的索引文件的位置 String indexDir = args[0]; //2 查询条件 String q = args[1]; search(indexDir, q); } public static void search(String indexDir, String q) throws Exception { Directory dir = new SimpleFSDirectory(new File(indexDir), null); //3 打开索引文件 IndexSearcher is = new IndexSearcher(dir); //4 分析查询条件 QueryParser parser = new QueryParser( Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30)); Query query = parser.parse(q); long start = System.currentTimeMillis(); //5 搜索索引 TopDocs对象仅保存对底层文档的引用,第7步才实际加载 TopDocs hits = is.search(query, 10); long end = System.currentTimeMillis(); //6 输出一些基本查询信息 System.err.println( "Found " + hits.totalHits + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + q + "':"); for(int i=0;i<hits.scoreDocs.length;i++) { ScoreDoc scoreDoc = hits.scoreDocs[i]; //7 检索符合的文档 Document doc = is.doc(scoreDoc.doc); //8 输出符合的文件名 System.out.println(doc.get("filename")); } //9 关闭索引 is.close(); } }