仍然在原有案例基础上进行
案例参考:lucene6中的创建正排索引以及进行字段排序的方法
创建索引FileIndexUtils
public class FileIndexUtils {
private static Directory directory = null;
static {
try {
directory = FSDirectory.open(Paths.get("D:\\indexFile\\test-search-advance\\"));
} catch (IOException e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
public static void index() {
IndexWriter writer = null;
IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
try {
writer = new IndexWriter(directory, iwc);
File file = new File("D:\\indexFile\\test-search-advance-files");
Document doc;
for (File f :
file.listFiles()) {
doc = new Document();
FileReader fileReader = new FileReader(f);
StringBuffer sb = new StringBuffer();
char[] buffer = new char[1];
while ((fileReader.read(buffer)) != -1) {
sb.append(buffer);
}
// doc.add(new Field("content", sb.toString(), contentType));
//TextField会对内容进行索引并分词,存储内容,但不建立正排索引
doc.add(new TextField("content", sb.toString(), Field.Store.YES));
doc.add(new TextField("filename", f.getName(), Field.Store.YES));
//对文本域建立正排索引,需要使用SortedDocValuesField
doc.add(new SortedDocValuesField("fname",new BytesRef(f.getName())));
//StringField不分词,建立索引,存储内容
doc.add(new StringField("path", f.getAbsolutePath(), Field.Store.YES));
//NumericDocValuesField为LongPoint类型建立正排索引用于排序 聚合,不存储内容
doc.add(new NumericDocValuesField("date", f.lastModified()));
//
doc.add(new LongPoint("date_search",f.lastModified()));
//用于存储
doc.add(new StoredField("date_store", f.lastModified()));
doc.add(new NumericDocValuesField("size", f.length()));
//用于LongPoint.newRangeQuery
doc.add(new LongPoint("size_search",f.length()));
doc.add(new StoredField("size_store", f.length()));
writer.addDocument(doc);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
注意:由于在后面自定义的CustomQueryParser中要使用LongPoint.newRangeQuery,所以在创建索引的时候要为size和date加入LongPoint field
先来看一下我们的索引结构
当我们用luke执行search操作时,可以看到
现在有这样一个需求:对文件大小和时间范围添加范围搜索,这种自定义查询格式的搜索需求需要扩展QueryParser
QueryParser默认的方法是getRangeQuery,这个方法对于part1和part2的处理并不能满足我们的自定义需求
新建CustomParser类,override其getRangeQuery方法
public class CustomParser extends QueryParser {
public CustomParser(String f, Analyzer a) {
super(f, a);
}
private static Pattern DATE_PATTERN = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");
@Override
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean
endInclusive) throws ParseException {
if ("size_search".equals(field)) {
return LongPoint.newRangeQuery(field, Long.parseLong(part1), Long.parseLong(part2));
} else if ("date_search".equals(field)) {
String dateType = "yyyy-MM-dd";
if (DATE_PATTERN.matcher(part1).matches() && DATE_PATTERN.matcher(part2).matches()) {
SimpleDateFormat sdf = new SimpleDateFormat(dateType);
try {
long start = sdf.parse(part1).getTime();
long end = sdf.parse(part2).getTime();
return LongPoint.newRangeQuery(field, start, end);
} catch (java.text.ParseException e) {
e.printStackTrace();
}
}else{
throw new ParserException("要检索的日期格式不正确,请使用"+dateType+"这种格式");
}
}
//return new TermRangeQuery(field, new BytesRef(part1), new BytesRef(part2),startInclusive,endInclusive);
return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
}
}
在SearchUtils中使用MyCustomParser
public class SearchUtils {
private static IndexReader reader = null;
static {
try {
reader = DirectoryReader.open(FileIndexUtils.getDirectory());
} catch (IOException e) {
e.printStackTrace();
}
}
public IndexSearcher getSearcher() {
if (reader == null) {
try {
reader = DirectoryReader.open(FileIndexUtils.getDirectory());
} catch (IOException e) {
e.printStackTrace();
}
} else {
try {
IndexReader tr = DirectoryReader.openIfChanged((DirectoryReader) reader);
if (tr != null) {
reader.close();
reader = tr;
}
} catch (IOException e) {
e.printStackTrace();
}
}
return new IndexSearcher(reader);
}
public void search(String queryStr) {
try {
IndexSearcher searcher = getSearcher();
/*//直接使用QueryParser查询不出来
QueryParser parser = new QueryParser("content", new StandardAnalyzer());*/
//使用自定义的QueryParser
QueryParser parser = new CustomParser("content", new StandardAnalyzer());
//Query query = new TermQuery(term);
Query query = parser.parse(queryStr);
System.out.println("Query:" + query);
TopDocs tds;
tds = searcher.search(query, 5);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for (ScoreDoc sd :
tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc + ":(score:" + sd.score + ")[filename:" + d.get("filename") + "][path:" + d
.get("path")
+ "][size:" + d.get("size_store") + "][date:" + sdf.format(Long.valueOf(d.get("date_store")))
+ "]");
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
}
测试一下
public class MainTest {
private SearchUtils st;
@Before
public void init() {
//先创建索引
FileIndexUtils.index();
st = new SearchUtils();
}
@Test
public void test() {
try {
// st.search("filename:[a TO c]");
// st.search("size_search:[40 TO 55]");
st.search("date_search:[2018-07-20 TO 2018-07-21]");
} catch (ParserException e) {
System.err.println(e.getMessage());
// e.printStackTrace();
}
}
}
可以看到结果是符合我们需求的,这样就实现了原有QueryParser不支持,但我们希望其支持的自定义格式的搜索功能