lucene6中自定义QueryParser实现自定义格式搜索

仍然在原有案例基础上进行

创建索引FileIndexUtils

public class FileIndexUtils {
    private static Directory directory = null;

    static {
        try {
            directory = FSDirectory.open(Paths.get("D:\\indexFile\\test-search-advance\\"));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static Directory getDirectory() {
        return directory;
    }

    public static void index() {
        IndexWriter writer = null;
        IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        try {
            writer = new IndexWriter(directory, iwc);
            File file = new File("D:\\indexFile\\test-search-advance-files");
            Document doc;

            for (File f :
                    file.listFiles()) {
                doc = new Document();
                FileReader fileReader = new FileReader(f);
                StringBuffer sb = new StringBuffer();
                char[] buffer = new char[1];
                while ((fileReader.read(buffer)) != -1) {
                    sb.append(buffer);
                }
//                doc.add(new Field("content", sb.toString(), contentType));
                //TextField会对内容进行索引并分词，存储内容，但不建立正排索引
                doc.add(new TextField("content", sb.toString(), Field.Store.YES));
                doc.add(new TextField("filename", f.getName(), Field.Store.YES));
                //对文本域建立正排索引，需要使用SortedDocValuesField
                doc.add(new SortedDocValuesField("fname",new BytesRef(f.getName())));
                //StringField不分词,建立索引,存储内容
                doc.add(new StringField("path", f.getAbsolutePath(), Field.Store.YES));
                //NumericDocValuesField为LongPoint类型建立正排索引用于排序 聚合，不存储内容
                doc.add(new NumericDocValuesField("date", f.lastModified()));
                //
                doc.add(new LongPoint("date_search",f.lastModified()));
                //用于存储
                doc.add(new StoredField("date_store", f.lastModified()));
                doc.add(new NumericDocValuesField("size", f.length()));
                //用于LongPoint.newRangeQuery
                doc.add(new LongPoint("size_search",f.length()));
                doc.add(new StoredField("size_store", f.length()));

                writer.addDocument(doc);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

注意：由于在后面自定义的CustomQueryParser中要使用LongPoint.newRangeQuery，所以在创建索引的时候要为size和date加入LongPoint field

先来看一下我们的索引结构

当我们用luke执行search操作时，可以看到

现在有这样一个需求：对文件大小和时间范围添加范围搜索，这种自定义查询格式的搜索需求需要扩展QueryParser

QueryParser默认的方法是getRangeQuery，这个方法对于part1和part2的处理并不能满足我们的自定义需求

新建CustomParser类，override其getRangeQuery方法

public class CustomParser extends QueryParser {
    public CustomParser(String f, Analyzer a) {
        super(f, a);
    }

    private static Pattern DATE_PATTERN = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");

    @Override
    protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean
            endInclusive) throws ParseException {

        if ("size_search".equals(field)) {
            return LongPoint.newRangeQuery(field, Long.parseLong(part1), Long.parseLong(part2));
        } else if ("date_search".equals(field)) {
            String dateType = "yyyy-MM-dd";
            if (DATE_PATTERN.matcher(part1).matches() && DATE_PATTERN.matcher(part2).matches()) {
                SimpleDateFormat sdf = new SimpleDateFormat(dateType);
                try {
                    long start = sdf.parse(part1).getTime();
                    long end = sdf.parse(part2).getTime();
                    return LongPoint.newRangeQuery(field, start, end);
                } catch (java.text.ParseException e) {
                    e.printStackTrace();
                }
            }else{
                throw new ParserException("要检索的日期格式不正确，请使用"+dateType+"这种格式");
            }

        }
        //return new TermRangeQuery(field, new BytesRef(part1), new BytesRef(part2),startInclusive,endInclusive);
        return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
    }
}

在SearchUtils中使用MyCustomParser

public class SearchUtils {
    private static IndexReader reader = null;

    static {
        try {
            reader = DirectoryReader.open(FileIndexUtils.getDirectory());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public IndexSearcher getSearcher() {
        if (reader == null) {
            try {
                reader = DirectoryReader.open(FileIndexUtils.getDirectory());
            } catch (IOException e) {
                e.printStackTrace();
            }
        } else {
            try {
                IndexReader tr = DirectoryReader.openIfChanged((DirectoryReader) reader);
                if (tr != null) {
                    reader.close();
                    reader = tr;
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return new IndexSearcher(reader);
    }

    public void search(String queryStr) {
        try {
            IndexSearcher searcher = getSearcher();

            /*//直接使用QueryParser查询不出来
            QueryParser parser = new QueryParser("content", new StandardAnalyzer());*/

            //使用自定义的QueryParser
            QueryParser parser = new CustomParser("content", new StandardAnalyzer());


            //Query query = new TermQuery(term);
            Query query = parser.parse(queryStr);
            System.out.println("Query:" + query);
            TopDocs tds;
            tds = searcher.search(query, 5);
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
            for (ScoreDoc sd :
                    tds.scoreDocs) {
                Document d = searcher.doc(sd.doc);
                System.out.println(sd.doc + ":(score:" + sd.score + ")[filename:" + d.get("filename") + "][path:" + d
                        .get("path")
                        + "][size:" + d.get("size_store") + "][date:" + sdf.format(Long.valueOf(d.get("date_store")))
                        + "]");
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }
}

测试一下

public class MainTest {
    private SearchUtils st;

    @Before
    public void init() {
        //先创建索引
        FileIndexUtils.index();
        st = new SearchUtils();
    }

    @Test
    public void test() {

        try {
//            st.search("filename:[a TO c]");

//            st.search("size_search:[40 TO 55]");

            st.search("date_search:[2018-07-20 TO 2018-07-21]");

        } catch (ParserException e) {
            System.err.println(e.getMessage());
//            e.printStackTrace();
        }
    }
}

可以看到结果是符合我们需求的，这样就实现了原有QueryParser不支持，但我们希望其支持的自定义格式的搜索功能

lucene6中自定义QueryParser实现自定义格式搜索

猜你喜欢