一、环境搭建
1.新建java-project工程,导入以下三个jar包。
commons-io-2.2.jar
lucene-analyzers-common-7.4.0.jar
lucene-core-7.4.0.jar
2. 新建索引保存的文件夹。例如:I:\Lucene\index
3. 新建一个文件目录,包含一些测试文本文档。例如目录为:I:\Lucene\index
二、创建索引
/**
* 创建一个Director对象,指定索引库的位置
* 基于Director对象,创建IndexWriter对象
* 读取磁盘上的文件,为每个文件创建一个文档对象
* 向文档对象中添加域
* 把文档对象写入索引库
* 关闭IndexWriter对象
*
*/
@Test
public void createIndex() throws Exception{
//把索引库保存在磁盘
Directory directory = FSDirectory.open(new File("I:\\Lucene\\index").toPath());
IndexWriter indexWriter = new IndexWriter(directory,new IndexWriterConfig());
File dir = new File("I:\\Lucene\\searchsource");
File[] files = dir.listFiles();
for (File file:
files) {
String filename = file.getName();
String filepath = file.getPath();
String content = FileUtils.readFileToString(file,"utf8");
long size = FileUtils.sizeOf(file);
Field fieldName = new TextField("name",filename, Field.Store.YES);
Field fieldPath = new TextField("path",filepath, Field.Store.YES);
Field fieldContent = new TextField("content",content, Field.Store.YES);
Field fieldSize = new TextField("size",size+"", Field.Store.YES);
Document document = new Document();
document.add(fieldName);
document.add(fieldPath);
document.add(fieldContent);
document.add(fieldSize);
indexWriter.addDocument(document);
}
indexWriter.close();
}
三、查询索引
/**
* 查询索引
*
*/
@Test
public void searchIndex() throws Exception{
Directory directory = FSDirectory.open(new File("I:\\黑马学习\\Lucene\\index").toPath());
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Query query = new TermQuery(new Term("content","spring"));
TopDocs topDocs = indexSearcher.search(query, 10);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc:
scoreDocs) {
int docId = scoreDoc.doc;
Document document = indexSearcher.doc(docId);
System.out.println(document.getField("name"));
}
indexReader.close();
}
四、查看分析器的分词效果
@Test
public void testTokenStream() throws Exception{
Analyzer analyzer = new StandardAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("","the spring Framework is ...");
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()){
System.out.println(charTermAttribute.toString());
}
tokenStream.close();
}