之前也学习过solr,solr是基于lucene的基础上进行设计的,帮我们封装好了一些api,但是总感觉使用起来不是很灵活,所以在之前公司的一个小项目中尝试使用了lucene作为全文检索的支持。
文章实体类如下
public class Article { private Integer id; private String title; private Integer author; private Integer type; private String isrecommend; private Integer typenumber; private Date postmodified; private Date postdate; } public class ArticleContent { private Integer id; private String lang; private String title; private Integer articleid; private String content; }
lucene工具类,分词器使用的是IK分词,由于前段传到后台的是用编辑器生成的html代码,所以需要用到HTMLStripCharFilter过滤器将html标签过滤掉,不然会产生大量冗余的内容,影响查询速率。
"luceneUtil") (public class LuceneUtil { private String lang; private String PATH = null; private Analyzer analyzer = null; private Analyzer analyzerCn = new IKAnalyzer(true); private Analyzer analyzerEn = new StandardAnalyzer(); public IndexSearcher createIndexSearcher() { Path path2 = FileSystems.getDefault().getPath(PATH); try { FSDirectory directory = FSDirectory.open(path2); DirectoryReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); return indexSearcher; } catch (IOException e) { e.printStackTrace(); } return null; } public IndexWriter createIndexWriter() { Path path2 = FileSystems.getDefault().getPath(PATH); try { FSDirectory directory = FSDirectory.open(path2); // String string = directory.getDirectory().toFile().getAbsolutePath(); // System.out.println(string); Analyzer analyzer2 = getAnalyzer(); IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(directory, writerConfig); return indexWriter; } catch (IOException e) { e.printStackTrace(); } return null; } //创建索引 public Map<String, String> createIndex(Article article) { HashMap<String, String> hashMap = new HashMap<String, String>(); IndexWriter indexWriter = createIndexWriter(); Document document = new Document(); document.add(new StringField("id", String.valueOf(article.getId()), Field.Store.YES)); TextField titleField = new TextField("title", article.getTitle(), Field.Store.YES); // 设置标题的权重 titleField.setBoost(4F); document.add(titleField); String content = article.getContent(); StringBuilder sb = new StringBuilder(); // html过滤 HTMLStripCharFilter htmlscript = new HTMLStripCharFilter(new StringReader(content)); // 增加映射过滤 主要过滤掉换行符 NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("\r", "");// 回车 builder.add("\t", "");// 横向跳格 builder.add("\n", "");// 换行 CharFilter cs = new MappingCharFilter(builder.build(), htmlscript); try { char[] buffer = new char[10240]; int count; while ((count = cs.read(buffer)) != -1) { sb.append(new String(buffer, 0, count)); } content = sb.toString(); cs.close(); document.add(new TextField("content", content, Field.Store.YES)); indexWriter.addDocument(document); indexWriter.commit(); indexWriter.close(); hashMap.put("state", "0"); return hashMap; } catch (IOException e) { e.printStackTrace(); } hashMap.put("state", "1"); return hashMap; } public Map<String, String> createIndexByList(List<Article> list) { HashMap<String, String> hashMap = new HashMap<String, String>(); IndexWriter indexWriter = createIndexWriter(); try { for (Article article : list) { Document document = new Document(); document.add(new StringField("id", String.valueOf(article.getId()), Field.Store.YES)); document.add(new TextField("title", article.getTitle(), Field.Store.YES)); document.add(new TextField("content", article.getContent(), Field.Store.YES)); indexWriter.addDocument(document); } indexWriter.commit(); indexWriter.close(); hashMap.put("state", "0"); return hashMap; } catch (IOException e) { e.printStackTrace(); } hashMap.put("state", "1"); return hashMap; } // 页数从0开始,全文检索分页 public PageUtil<Article> searcherByKey(Integer cp, Integer ps, String key) { IndexSearcher indexSearcher = createIndexSearcher(); String[] strings = { "title", "content" }; MultiFieldQueryParser parser = new MultiFieldQueryParser(strings, analyzer); ArrayList<Article> arrayList = new ArrayList<Article>(); try { Query query = parser.parse(key); // 高亮显示 TopDocs docs = indexSearcher.search(query, 200); ScoreDoc[] scoreDocs = docs.scoreDocs; QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=red>", "</font>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer); // 返回多少个字符 highlighter.setTextFragmenter(new SimpleFragmenter(100)); Integer start = cp * ps; Integer end = (cp + 1) * ps; int length = scoreDocs.length; Integer count = length > end ? end : length; System.out.println(length); Integer allPage = (length + ps - 1) / ps; for (int i = start; i < count; i++) { int doc = scoreDocs[i].doc; Document doc2 = indexSearcher.doc(doc); String string = doc2.get("id"); Integer id = Integer.valueOf(string); String title = doc2.get("title"); String content = doc2.get("content"); String strTitle = highlighter.getBestFragment(analyzer, "title", title); String strContent = highlighter.getBestFragment(analyzer, "content", content); title = strTitle == null ? title : strTitle; content = strContent == null ? content.substring(0, 100) : strContent; Article article = new Article(id, title, content); arrayList.add(article); } PageUtil<Article> pageUtil = new PageUtil<Article>(cp, ps, arrayList, allPage); return pageUtil; } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } return null; } public void deleteIndex(String id) { IndexWriter indexWriter = createIndexWriter(); Analyzer analyzer2 = getAnalyzer(); String[] strings = { "id" }; MultiFieldQueryParser parser = new MultiFieldQueryParser(strings, analyzer2); try { Query query = parser.parse(id); indexWriter.deleteDocuments(query); indexWriter.close(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public String getLang() { return lang; } /** * 通过语言配置相应的目录和分词器 * * @param lang */ public void setLang(String lang) { this.lang = lang; if (lang.equals("cn")) { this.PATH = "../article"; this.analyzer = analyzerCn; } else { this.PATH = "../articleEn"; this.analyzer = analyzerEn; } } public String getPATH() { return PATH; } public void setPATH(String pATH) { PATH = pATH; } public Analyzer getAnalyzer() { return analyzer; } public void setAnalyzer(Analyzer analyzer) { this.analyzer = analyzer; } }