java——lucene实现全文检索(四)索引检索

将输入的查询词拆分后再传入通过短语查询进行检索

/**
 * 
 * @param indexPath 索引目录
 * @param searchStr 拆词后的字符集合
 * @param limit 查询条数
 * @throws IOException
 */
public static void indexSearch(String indexPath, List<String> searchStr, Integer limit) throws IOException {
    if (limit == null)limit=10;
    Directory directory = FSDirectory.open(Paths.get(indexPath));
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    System.out.println(searchStr);
    if (searchStr.size() == 0) {
        return;
    }
    String[] terms = new String[searchStr.size()];
    for (int i = 0; i < searchStr.size(); i++){
        terms[i] = searchStr.get(i);
    }
    PhraseQuery phraseQuery = new PhraseQuery("contents", terms);
    TopDocs topDocs = indexSearcher.search(phraseQuery, limit); // 前10条
    // 结果总数topDocs.totalHits
    for (ScoreDoc sdoc : topDocs.scoreDocs) {
        // 根据文档id取存储的文档
        Document hitDoc = indexSearcher.doc(sdoc.doc);
        // 取文档的字段
        System.out.println(hitDoc.get("filename"));
    }
    // 使用完毕,关闭、释放资源
    indexReader.close();
    directory.close();
}

/***
 * 
 * @param ts 需要拆词的字符串
 * @return
 * @throws IOException
 */
private static List<String> doToken(TokenStream ts) throws IOException {
    List<String> stringList = new ArrayList<>();
    ts.reset();
    CharTermAttribute cta = ts.getAttribute(CharTermAttribute.class);
    while (ts.incrementToken()) {
        stringList.add(cta.toString());
    }
    System.out.println();
    ts.end();
    ts.close();
    return stringList;
}

public static void main(String[] args) throws IOException{
    try (Analyzer ik = new StandardAnalyzer()) {
        List<String> list = doToken(ik.tokenStream("contents", "十三"));
        indexSearch("C:\\Users\\admin\\Desktop\\te\\suoyin", list, 10);
    }
}

猜你喜欢

转载自blog.csdn.net/yilia_jia/article/details/86628242