Lucene的API使用

1）创建索引（使用6.0.0的方式创建）

pom.xml

 <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>

        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>6.0.0</version>
        </dependency>

        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>6.0.0</version>
        </dependency>

        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>6.0.0</version>
        </dependency>

        <!-- http://mvnrepository.com/artifact/commons-io/commons-io -->
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.5</version>
        </dependency>

    </dependencies>

IndexRepository.java

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * 索引存储
 */
public class IndexRepository {
    // 注意：此处使用的是Lucene6.0.0最新版本与4.X版本有一些区别，可以查看源码或者API进行了解
    public static void main(String[] args) throws IOException {
        // 指定索引库的存放路径，需要在系统中首先进行索引库的创建
        // 指定索引库存放路径
        File indexrepository_file = new File("此处是索引存放地址");
        Path path = indexrepository_file.toPath();
        Directory directory = FSDirectory.open(path);
        // 读取原始文档内容
        File files = new File("此处是源文件地址");
        // 创建一个分析器对象
        // 使用标准分析器
        Analyzer analyzer = new StandardAnalyzer();
        // 创建一个IndexwriterConfig对象
        // 分析器
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        // 创建一个IndexWriter对象，对于索引库进行写操作
        IndexWriter indexWriter = new IndexWriter(directory, config);
        // 遍历一个文件
        for (File f : files.listFiles()) {
            // 文件名
            String fileName = f.getName();
            // 文件内容
            @SuppressWarnings("deprecation")
            String fileContent = FileUtils.readFileToString(f);
            // 文件路径
            String filePath = f.getPath();
            // 文件大小
            long fileSize = FileUtils.sizeOf(f);

            // 创建一个Document对象
            Document document = new Document();
            // 向Document对象中添加域信息
            // 参数：1、域的名称；2、域的值；3、是否存储；
            Field nameField = new TextField("name", fileName, Store.YES);
            Field contentField = new TextField("content", fileContent , Store.YES);
            // storedFiled默认存储
            Field pathField = new StoredField("path", filePath);
            Field sizeField = new StoredField("size", fileSize);
            // 将域添加到document对象中
            document.add(nameField);
            document.add(contentField);
            document.add(pathField);
            document.add(sizeField);
            // 将信息写入到索引库中
            indexWriter.addDocument(document);

        }

        // 关闭indexWriter
        indexWriter.close();
    }

}

运行结果：
运行结果-01

2）创建索引（使用4.10.3的方式创建）

pom.xml

    <dependencies>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>
        <!-- http://mvnrepository.com/artifact/commons-io/commons-io -->
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.5</version>
        </dependency>
        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>4.10.3</version>
        </dependency>
        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>4.10.3</version>
        </dependency>
        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>4.10.3</version>
        </dependency>

    </dependencies>

IndexRepository.java

import java.io.File;
import java.io.IOException;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 索引的创建
 */
public class IndexRepository {

    public static void main(String[] args) throws IOException {
        Directory directory = FSDirectory.open(new File("此处是索引文件存放地址"));
        File files = new File("此处是源文件地址");

        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);

        IndexWriter indexWriter = new IndexWriter(directory,config);

        for (File f : files.listFiles()) {
            // 文件名
            String fileName = f.getName();
            // 文件内容
            @SuppressWarnings("deprecation")
            String fileContent = FileUtils.readFileToString(f);
            // 文件路径
            String filePath = f.getPath();
            // 文件大小
            long fileSize = FileUtils.sizeOf(f);

            // 创建一个Document对象
            Document document = new Document();
            // 向Document对象中添加域信息
            // 参数：1、域的名称；2、域的值；3、是否存储；
            Field nameField = new TextField("name", fileName, Store.YES);
            Field contentField = new TextField("content", fileContent , Store.YES);
            // storedFiled默认存储
            Field pathField = new StoredField("path", filePath);
            Field sizeField = new LongField("size", fileSize, Store.YES);
            // 将域添加到document对象中
            document.add(nameField);
            document.add(contentField);
            document.add(pathField);
            document.add(sizeField);
            // 将信息写入到索引库中
            indexWriter.addDocument(document);
        }

        indexWriter.close();
    }

}

3）查询索引库

import java.io.File;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * 文档搜索
 * 通过关键词搜索文档
 *
 */
public class DocSearch {

    public static void main(String[] args) throws IOException {

        // 打开索引库
        // 找到索引库的位置
        Directory directory = FSDirectory.open(new File("此处是索引文件存放地址"));
        IndexReader indexReader = DirectoryReader.open(directory);
        // 创建一个IndexSearcher对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        // 创建一个查询对象
        TermQuery query = new TermQuery(new Term("name","apache"));
        // 执行查询
        // 返回的最大值，在分页的时候使用
        TopDocs topDocs = indexSearcher.search(query, 5);
        // 取查询结果总数量
        System.out.println("总共的查询结果：" + topDocs.totalHits);
        // 查询结果，就是documentID列表
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;

        for (ScoreDoc scoreDoc : scoreDocs) {
            // 取对象document的对象id
            int docID = scoreDoc.doc;

            // 相关度得分
            float score = scoreDoc.score;

            // 根据ID去document对象
            Document document = indexSearcher.doc(docID);

            System.out.println("相关度得分：" + score);
            System.out.println("");
            System.out.println(document.get("name"));
            System.out.println("");
            // 另外的一种使用方法
            System.out.println(document.getField("content").stringValue());
            System.out.println(document.get("path"));
            System.out.println();
            System.out.println("=======================");
        }

        indexReader.close();
    }

}

运行结果：

本文截取自：https://blog.csdn.net/yangqian201175/article/details/51462413

猜你喜欢