需求:
内容包括两部分,一部分为唯一标示即id,一部分为内容,要实现对内容基于lucene的检索,模拟实现基于uuid作为id,内容为普通的字符串
环境:
<dependency> <groupId>org.safehaus.jug</groupId> <artifactId>jug</artifactId> <version>2.0.0</version> <classifier>lgpl</classifier> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>3.4.0</version> </dependency> <dependency> <groupId>IKAnalyzer</groupId> <artifactId>IKAnalyzer</artifactId> <version>IKAnalyzer2012_u6</version> <scope>system</scope> <systemPath>D:/tools/lib/IKAnalyzer2012_u6.jar</systemPath> </dependency>
实现:
public class IndexService { private static final UUIDGenerator generator = UUIDGenerator.getInstance(); private static final String indexDir = "D:/tools/index"; private static final Occur[] QUERY_FLAGS = { Occur.SHOULD, Occur.SHOULD }; public void createIndex(String content) throws Exception { UUID uuid = generator.generateRandomBasedUUID(); Directory directory = new org.apache.lucene.store.SimpleFSDirectory( new File(indexDir)); Analyzer analyzer = new IKAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34, analyzer); IndexWriter writer = new IndexWriter(directory, config); Document doc = new Document(); doc.add(new Field("str", content, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("id", uuid.toString(), Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.optimize(); writer.close(true); } public void queryIndex(String queryString) throws Exception { Analyzer analyzer = new IKAnalyzer(); BooleanQuery bq = new BooleanQuery(); Query q = MultiFieldQueryParser.parse(Version.LUCENE_34, queryString, new String[] { "str", "id" }, QUERY_FLAGS, analyzer); bq.add(q, BooleanClause.Occur.MUST); Directory directory = new org.apache.lucene.store.SimpleFSDirectory( new File(indexDir)); IndexSearcher indexSearcher = new IndexSearcher(directory); TopDocs topDocs = indexSearcher.search(bq, 2); System.out.println(topDocs.totalHits); ScoreDoc[] hits = topDocs.scoreDocs; for (ScoreDoc scoreDoc : hits) { Document document = indexSearcher.doc(scoreDoc.doc); System.out.println(String.format("doc id is %s ,and score is %s:", document.get("id"), scoreDoc.score + "")); } indexSearcher.close(); } public static void main(String[] ar) throws Exception { IndexService indexService = new IndexService(); StringBuilder content = new StringBuilder(); content.append("茶道,就是品赏茶的美感之道。茶道是一种烹茶饮茶的生活艺术,一种以茶为媒的生活礼仪,一种以茶修身的生活方式。"); content.append("它通过沏茶、赏茶、闻茶、饮茶、增进友谊,美心修德,学习礼法,是很有益的一种和美仪式。"); content.append("喝茶能静心、静神,有助于陶冶情操、去除杂念,符合儒道的“内省修行”思想。"); content.append("茶道精神是茶文化的核心,是茶文化的灵魂。本账户的宗旨是传播茶叶,茶道,品茶,辨别的知识,让大家了解中华国粹。"); // 模拟100篇文本内容 for (int i = 0; i < 100; i++) { indexService.createIndex(content.toString()); } indexService.queryIndex("茶叶"); } }