Lucene学习笔记(1)--实现简单查询

1、用maven创建一个简单mavenproject项目

2、配置pom.xml文件,增加jar包依赖

<dependencies>
  <dependency>
		<groupId>org.apache.lucene</groupId>
		<artifactId>lucene-core</artifactId>
		<version>5.3.1</version>
	</dependency>
	
	<dependency>
	<groupId>org.apache.lucene</groupId>
		<artifactId>lucene-queryparser</artifactId>
		<version>5.3.1</version>
	</dependency>
	
	<dependency>
		<groupId>org.apache.lucene</groupId>
		<artifactId>lucene-analyzers-common</artifactId>
		<version>5.3.1</version>
	</dependency>
  </dependencies>

3、增加索引类,Indexer.java

import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;


public class Indexer {
	private IndexWriter writer;//写索引实例
	
	
	/**
	 * 构造方法 实例化IndexWriter
	 * @param indexDir
	 * @throws Exception
	 */
	public Indexer(String indexDir)throws Exception{
		Directory dir = FSDirectory.open(Paths.get(indexDir));
		Analyzer analyzer = new StandardAnalyzer();//标准分词器
		IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
		writer = new IndexWriter(dir, iwc);
	}
	
	/**
	 * 关闭索引
	 * @throws Exception
	 */
	public void close()throws Exception{
		writer.close();
	}
	
	/**
	 * 索引指定目录的所有文件
	 * @param dataDir
	 * @throws Exception
	 */
	public int index(String dataDir)throws Exception{
		File []files = new File(dataDir).listFiles();
		for (File f : files) {
			indexFile(f);
		}
		return writer.numDocs();
	}

	 /**
	  * 索引指定文件
	  * @param f
	  */
	private void indexFile(File f) throws Exception{
		// TODO Auto-generated method stub
		System.out.println("索引文件:"+f.getCanonicalPath());
		Document doc = getDocument(f);
		writer.addDocument(doc);
	}

	/**
	 * 获取文档,文档里在设置没个字段
	 * @param f
	 */
	private Document getDocument(File f)throws Exception{
		Document doc = new Document();
		doc.add(new TextField("contents",new FileReader(f)));
		doc.add(new TextField("fileName", f.getName(),Field.Store.YES));
		doc.add(new TextField("fullPath", f.getCanonicalPath(),Field.Store.YES));
		return doc;
	}
	
	
	public static void main(String[] args){
		String indexDir = "E:\\lucene";
		String dataDir = "E:\\lucene\\data";
		Indexer indexer = null;
		int numIndexed = 0;
		long start = System.currentTimeMillis();
		try {
			indexer = new Indexer(indexDir);
			numIndexed = indexer.index(dataDir);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			try {
				indexer.close();
			} catch (Exception e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		long end = System.currentTimeMillis();
		long time = end-start;
		System.out.println("索引:"+numIndexed+"个文件,用时"+time+"毫秒");
	}
}

4、增加查询类实现查询操作,Search.java

import java.nio.file.Paths;
import java.util.Iterator;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Search {

	public static void search(String indexDir,String q)throws Exception{
		Directory directory = FSDirectory.open(Paths.get(indexDir));
		IndexReader reader = DirectoryReader.open(directory);
		IndexSearcher is = new IndexSearcher(reader);
		Analyzer analyzer = new StandardAnalyzer();
		QueryParser parser = new QueryParser("contents",analyzer);
		Query query = parser.parse(q);
		long start = System.currentTimeMillis();
		TopDocs hits = is.search(query, 10);
		long end = System.currentTimeMillis();
		System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+hits.totalHits+"个记录");
		for (ScoreDoc scoreDoc: hits.scoreDocs) {
			Document doc = is.doc(scoreDoc.doc);
			System.out.println(doc.get("fullPath"));
		}
		reader.close();
	}
	
	public static void main(String[] args) {
		String indexDir="E:\\lucene";
		String q="yuyuyuyuyuyuy-yyyyyyy";
		try {
			search(indexDir,q);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}


猜你喜欢

转载自blog.csdn.net/maonian1762/article/details/79978868