基于Lucene 7.1.0 实现搜索引擎

引入lucene 7.1.0 所使用的jar包

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-core</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-analyzers-smartcn</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-highlighter</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-backward-codecs</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>com.janeluo</groupId>
    <artifactId>ikanalyzer</artifactId>
    <version>2012_u6</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-suggest</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-queryparser</artifactId>
    <version>7.1.0</version>
</dependency>
创建索引
/**
 * 创建索引
 *
 * @param id      ID
 * @param content 内容
 * @param time    时间
 */
public void createIndex(int id, String content, Long time) {
    try {
        // 索引存放磁盘位置
        FSDirectory directory = FSDirectory.open(file.toPath());
        // 使用的分词器
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        // 建立文件索引
        Document document = new Document();
        // int类型 存放形式
        document.add(new IntPoint("id", id));
        // 保存存储信息,不写不保存存储信息
        document.add(new StoredField("id", id));

        // 字段名字,字段内容,Store:如果是yes 则说明存储到文档中
        document.add(new TextField("content", content, Field.Store.YES));

        document.add(new LongPoint("time", time));
        document.add(new StoredField("time", time));
        // 控制排序字段
        document.add(new NumericDocValuesField("time", time));

        writer.addDocument(document);
        // 必须存在,不然不生效
        writer.commit();
        writer.close();
        directory.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
修改索引
/**
 * 修改索引
 *
 * @param id      ID
 * @param content 内容
 */
public void updateIndex(int id, String content) {
    try {
        FSDirectory directory = FSDirectory.open(file.toPath());
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher search = new IndexSearcher(reader);

        // 通过id查询对应的数据
        TopDocs topDocs = search.search(IntPoint.newExactQuery("id", id), 1);
        ScoreDoc scoreDoc = topDocs.scoreDocs[0];
        Document document = search.doc(scoreDoc.doc);

        document.add(new TextField("content", content, Field.Store.YES));

        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        // 通过id进行匹配
        writer.updateDocument(new Term("id", String.valueOf(id)), document);
        writer.commit();
        writer.close();
        directory.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
删除索引
/**
 * 删除索引
 *
 * @param field 字段名称
 * @param value 关键词
 */
public void deleteIndex(String field, String value) {
    try {
        FSDirectory directory = FSDirectory.open(file.toPath());
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        // 通过匹配字段进行删除
        writer.deleteDocuments(new Term(field, value));
        writer.commit();
        writer.close();
        directory.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
搜索
/**
 * 搜索
 *
 * @param string    关键词
 * @param startTime 开始时间
 * @param endTime   结束时间
 * @param order     排序 0默认打分排序,1时间排序,2查看次数
 * @param reverse   升降序 0升序,1降序
 * @param page      页数
 * @param size      一页的大小
 * @return JSONObject
 */
public JSONObject search(String string, Long startTime, Long endTime, int order, boolean reverse, int page, int size) {
    List<String> list = new ArrayList<>();
    JSONObject object = new JSONObject();
    try {
        Directory directory = FSDirectory.open(file.toPath());
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher search = new IndexSearcher(reader);
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        if (StringUtils.isBlank(string)) {
            string = "*";
            builder.add(new WildcardQuery(new Term("title", string)), BooleanClause.Occur.MUST);
        } else {
            builder.add(new QueryParser("content", analyzer).parse(string), BooleanClause.Occur.MUST);
            list.add("content");
        }

        if (startTime != 0 && endTime != 0) {
            builder.add(LongPoint.newRangeQuery("time", startTime, endTime), BooleanClause.Occur.MUST);
        }
        BooleanQuery query = builder.build();

        Sort sort = null;
        switch (order) {
            case 0:
                sort = Sort.RELEVANCE;
                break;
            case 1:
                SortField sortField = new SortField("time", SortField.Type.LONG, reverse);
                sort = new Sort(sortField);
                break;
            default:
                break;
        }
        // 通过indexSearcher来搜索索引     int 条数  sort 排序
        TopDocs topDocs = search.search(query, page * size, sort);
        // 关键字高亮显示的html标签
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
        // 根据查询条件匹配出的记录总数
        long count = topDocs.totalHits;
        object.put("count", count);
        JSONArray array = new JSONArray();
        // 打分
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        int start = page - 1;
        if (page > 1) {
            start = (page - 1) * size;
        }
        int end = page * size;
        if (end > count) {
            end = (int) count;
        }
        if (end > start) {
            for (int i = start; i < end; i++) {
                JSONObject json = new JSONObject();
                Document doc = search.doc(scoreDocs[i].doc);
                for (IndexableField field : doc.getFields()) {
                    String name = field.name();
                    String value = field.stringValue();
                    if (list.contains(name)) {
                        // 内容增加高亮显示
                        TokenStream stream = analyzer.tokenStream(name, new StringReader(value));
                        String highlight = highlighter.getBestFragment(stream, value);
                        if (highlight == null) {
                            json.put(name, value);
                        } else {
                            json.put(name, highlight);
                        }
                    } else {
                        if (value.length() > 240) {
                            json.put(name, value.substring(0, 240));
                        } else {
                            json.put(name, value);
                        }
                    }
                }
                json.put("score", scoreDocs[i].score);
                array.add(json);
            }
        }
        object.put("data", array);
        reader.close();
        directory.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return object;
}
相关文章推荐
/**
 * 相关文章推荐
 *
 * @param id ID
 * @return JSONArray
 */
public JSONArray moreLikeThis(int id) {
    JSONArray array = new JSONArray();
    try {
        Directory directory = FSDirectory.open(file.toPath());
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);

        MoreLikeThis mlt = new MoreLikeThis(reader);
        // 相似的字段
        mlt.setFieldNames(new String[]{"content"});
        mlt.setMinTermFreq(1);
        mlt.setMinDocFreq(1);
        mlt.setAnalyzer(analyzer);

        for (int docID = 0; docID < reader.maxDoc(); docID++) {
            Query query = mlt.like(docID);
            TopDocs topDocs = searcher.search(query, 10);
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document doc = reader.document(scoreDoc.doc);
                if (scoreDoc.doc != docID && doc.getField("id").numericValue().intValue() == id) {
                    JSONObject json = new JSONObject();
                    json.put("id", doc.getField("id").stringValue());
                    json.put("content", doc.getField("content").stringValue());
                    array.add(json);
                }
            }
        }
        reader.close();
        directory.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return array;
}

猜你喜欢

转载自blog.csdn.net/T_amo/article/details/81125880