lucene创建索引 IKAnalyzer中文分词器案列

1.下载需要的jar包和配置,github资源链接:https://github.com/zhangliqingyun/jarlist/tree/master/lucene

2.创建一个目录索引

//创建一个文件索引

@Test

public void addFileDirectory() throws Exception{

//创建一个索引目录

Directory directory = FSDirectory.open(new File("./myindex6"));

//创建一个分词器

IKAnalyzer analyzer = new IKAnalyzer();

//创建一个indexWriter

IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);

        //创建一个文档

Document document = new Document();

//创建一个实体类

People people = new People();

//向实体类中添加字段数据

people.setId(9);

people.setName("美女真多啊fjhtjhtfgsjtsj方式都会认同一天我要提问");

people.setTitle("这是标题");

//向文档中添加字段数据

Field fieldId = new Field("id",people.getId().toString(),Store.YES,Index.ANALYZED);

Field fieldName = new Field("name",people.getName(),Store.YES,Index.ANALYZED);

Field fieldTitle = new Field("title",people.getTitle(),Store.YES,Index.ANALYZED);

//向文档中添加字段

document.add(fieldId);

document.add(fieldName);

document.add(fieldTitle);

//设置添加文档的相关度更大

document.setBoost(100f);

//向indexWriter中添加文档

indexWriter.addDocument(document);

//关闭indexWriter

indexWriter.close();

}

3.查询目录索引:

//查询文档索引

@Test

public void searchIndex() throws Exception{

//定义一个文档索引路径

Directory directory = FSDirectory.open(new File("./myindex"));

//创建IndexSearcher

IndexSearcher indexSearcher = new IndexSearcher(directory);

//定义一个分词器

Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

//定义一个查询器

QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[]{"title","name"}, analyzer);

//给查询条件赋值

Query query = queryParser.parse("lucene");

//向indexSearcher中添加查询条件

TopDocs topDocs =  indexSearcher.search(query, 20);

//得到文档集合

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

//保存查询到的实体类集合

List<People> list = new ArrayList<People>();

//遍历文档集合

for(ScoreDoc scoreDoc : scoreDocs){

int index = scoreDoc.doc;

Document document = indexSearcher.doc(index);

People people = new People();

people.setId(Integer.parseInt(document.get("id")));

people.setName(document.get("name"));

people.setTitle(document.get("title"));

list.add(people);

}

//打印输出查询到的结果集

for(int i = 0;i < list.size();i++){

System.out.println(list.get(i).getId());

System.out.println(list.get(i).getName());

System.out.println(list.get(i).getTitle());

}

}

4.使用term查询目录索引:

//使用Term查询

@Test

public void TermQuery() throws Exception{

//查询条件

Term term = new Term("name","lucene");

Query query = new TermQuery(term);

//定义一个文档索引路径

Directory directory = FSDirectory.open(new File("./myindex"));

//创建IndexSearcher

IndexSearcher indexSearcher = new IndexSearcher(directory);

//向indexSearcher中添加查询条件

TopDocs topDocs =  indexSearcher.search(query, 20);

//得到文档集合

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

//保存查询到的实体类集合

List<People> list = new ArrayList<People>();

//遍历文档集合

for(ScoreDoc scoreDoc : scoreDocs){

int index = scoreDoc.doc;

Document document = indexSearcher.doc(index);

People people = new People();

people.setId(Integer.parseInt(document.get("id")));

people.setName(document.get("name"));

people.setTitle(document.get("title"));

list.add(people);

}

//打印输出查询到的结果集

for(int i = 0;i < list.size();i++){

System.out.println(list.get(i).getId());

System.out.println(list.get(i).getName());

System.out.println(list.get(i).getTitle());

}

}

5.查询所有的目录索引

//查询所有的文档

@Test

public void queryAllDocs() throws Exception{

//查询所有的文档的query

Query query = new MatchAllDocsQuery();

//定义一个文档索引路径

Directory directory = FSDirectory.open(new File("./myindex"));

//创建IndexSearcher

IndexSearcher indexSearcher = new IndexSearcher(directory);

//向indexSearcher中添加查询条件

TopDocs topDocs =  indexSearcher.search(query, 20);

//得到文档集合

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

//保存查询到的实体类集合

List<People> list = new ArrayList<People>();

//遍历文档集合

for(ScoreDoc scoreDoc : scoreDocs){

int index = scoreDoc.doc;

Document document = indexSearcher.doc(index);

People people = new People();

people.setId(Integer.parseInt(document.get("id")));

people.setName(document.get("name"));

people.setTitle(document.get("title"));

list.add(people);

}

//打印输出查询到的结果集

for(int i = 0;i < list.size();i++){

System.out.println(list.get(i).getId());

System.out.println(list.get(i).getName());

System.out.println(list.get(i).getTitle());

}

}

6.按照范围查询目录索引:

//按照范围查询

@Test   

public void queryRange() throws Exception{

//按照范围查询

Query query = NumericRangeQuery.newLongRange("id", 1L, 3L, true, true);

//定义一个文档索引路径

Directory directory = FSDirectory.open(new File("./myindex2"));

//创建IndexSearcher

IndexSearcher indexSearcher = new IndexSearcher(directory);

//向indexSearcher中添加查询条件

TopDocs topDocs =  indexSearcher.search(query, 20);

//得到文档集合

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

//保存查询到的实体类集合

List<People> list = new ArrayList<People>();

//遍历文档集合

for(ScoreDoc scoreDoc : scoreDocs){

int index = scoreDoc.doc;

Document document = indexSearcher.doc(index);

People people = new People();

people.setId(NumericUtils.prefixCodedToInt(document.get("id")));

people.setName(document.get("name"));

people.setTitle(document.get("title"));

list.add(people);

}

//打印输出查询到的结果集

for(int i = 0;i < list.size();i++){

System.out.println(list.get(i).getId());

System.out.println(list.get(i).getName());

System.out.println(list.get(i).getTitle());

}

}

7.使用boolean查询目录索引:

//通配符查询

@Test   

public void wildcardQuery() throws Exception{

//组织查询语句

Term term = new Term("name","l*");

//按照范围查询

Query query = new WildcardQuery(term);

//定义一个文档索引路径

Directory directory = FSDirectory.open(new File("./myindex3"));

//创建IndexSearcher

IndexSearcher indexSearcher = new IndexSearcher(directory);

//向indexSearcher中添加查询条件

TopDocs topDocs =  indexSearcher.search(query, 20);

//得到文档集合

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

//保存查询到的实体类集合

List<People> list = new ArrayList<People>();

//遍历文档集合

for(ScoreDoc scoreDoc : scoreDocs){

int index = scoreDoc.doc;

Document document = indexSearcher.doc(index);

People people = new People();

people.setId(Integer.parseInt(document.get("id")));

people.setName(document.get("name"));

people.setTitle(document.get("title"));

list.add(people);

}

//打印输出查询到的结果集

for(int i = 0;i < list.size();i++){

System.out.println(list.get(i).getId());

System.out.println(list.get(i).getName());

System.out.println(list.get(i).getTitle());

}

}

8.高亮显示查询:

//高亮查询

@Test

public void highLighterQuery() throws Exception{

//组织查询语句

Term term = new Term("name","北京");

TermQuery termQuery = new TermQuery(term);

Term term2 = new Term("name","美女");

TermQuery termQuery2 = new TermQuery(term2);

Term term3 = new Term("name","北京美女");   

TermQuery termQuery3 = new TermQuery(term3);

 

BooleanQuery booleanQuery = new BooleanQuery();

booleanQuery.add(termQuery, Occur.SHOULD);

booleanQuery.add(termQuery2,Occur.SHOULD);

booleanQuery.add(termQuery3,Occur.SHOULD);

//索引查询的路径

Directory directory = FSDirectory.open(new File("./myindex6"));

//创建一个indexSearcher

IndexSearcher indexSearcher = new IndexSearcher(directory);

//设置高亮显示的样式

Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");

//设置一个结果查询

Scorer scorer = new QueryScorer(booleanQuery);

//创建一个高亮器

Highlighter highLighter = new Highlighter(formatter, scorer);

//搜索的摘要

Fragmenter fragmenter = new SimpleFragmenter(10);

//设置高亮摘要

highLighter.setTextFragmenter(fragmenter);

//添加查询条件

TopDocs topDocs = indexSearcher.search(booleanQuery, 20);

System.out.println(topDocs.totalHits);

//赋值到结果集中

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

//保存people的结果集

List<People> list = new ArrayList<People>();

//遍历结果集

for(ScoreDoc scoreDoc : scoreDocs){

int index = scoreDoc.doc;

System.out.println("相关度得分:"+scoreDoc.score);

Document document = indexSearcher.doc(index);

People people = new People();

people.setId(Integer.parseInt(document.get("id")));

people.setName(document.get("name"));

people.setTitle(document.get("title"));

Analyzer analyzer = new IKAnalyzer();

String name = highLighter.getBestFragment(analyzer, "name", people.getName());

people.setName(name);  

list.add(people);

}

//遍历打印输出list中存储的值

for(int i = 0;i < list.size();i++){

System.out.println(list.get(i).getId());

System.out.println(list.get(i).getName());

System.out.println(list.get(i).getTitle());

}

indexSearcher.close();

 

}

9.创建内存索引

//创建一个内存索引库

@Test

public void addMemoryIndex() throws Exception{

//创建一个内存索引库目录

Directory ramDirectory = new RAMDirectory();

//创建一个分词器

Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

//创建一个IndexWriter

IndexWriter indexWriter = new IndexWriter(ramDirectory, analyzer, MaxFieldLength.LIMITED);

//创建一个实体类people

People people = new People();

//给实体类people添加字段值

people.setId(1);

people.setAge(24);

people.setName("lucene是一个搜搜服务器");

//创建一个添加文档

Document document = new Document();

//创建文档的添加字段

Field fieldId = new Field("id",people.getId().toString(),Store.YES,Index.ANALYZED);

Field fieldAge = new Field("age",people.getAge().toString(),Store.YES,Index.ANALYZED);

Field fieldName = new Field("name",people.getName(),Store.YES,Index.ANALYZED);

document.add(fieldId);

document.add(fieldAge);

document.add(fieldName);

indexWriter.addDocument(document);

indexWriter.close();

//调用查询索引的方法

searchIndex(ramDirectory);

}

10.删除目录索引:

//删除索引

@Test

public void deleteIndex() throws Exception{

//创建一个索引目录

Directory directory = FSDirectory.open(new File("./myindex5"));

//创建一个分词器

Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

//创建一个indexwriter

IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);

//组织删除的条件语句

Term term = new Term("name","hello");

indexWriter.deleteDocuments(term);

indexWriter.close();  

}

11.更新目录索引(先删除,后增加):

//更新索引

@Test

public void updateIndex() throws Exception{

//创建一个索引目录

Directory directory = FSDirectory.open(new File("./myindex5"));

//创建一个分词器

Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

//创建一个indexwriter

IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);

//要更新的查询条件

Term term = new Term("name","hello");

//创建一个实体类对象

People people = new People();

//给实体对象赋值

people.setAge(24);

people.setId(2);

people.setName("这是更新后的hello");

//更新为的新文档

Document document = new Document();

//给文档赋值

Field fieldId = new Field("id",people.getId().toString(),Store.YES,Index.ANALYZED);

Field fieldAge = new Field("age",people.getAge().toString(),Store.YES,Index.ANALYZED);

Field fieldName = new Field("name",people.getName(),Store.YES,Index.ANALYZED);

//把字段添加到文档中

document.add(fieldId);

document.add(fieldAge);

document.add(fieldName);

//执行更新操作

indexWriter.updateDocument(term, document);

//关闭indexWriter

indexWriter.close();   

}

12.优化目录索引(合并相同项):

//优化(合并相同的数据)

@Test

public void optimize() throws Exception{

//操作索引的目录

Directory directory = FSDirectory.open(new File("./myindex5"));

//创建一个分词器

Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

//创建一个indexWriter

IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);

//优化indexWriter

indexWriter.optimize();

indexWriter.close();

 

}

13.中分分词器:

①导jar包:

②导入配置文件:

③配置IKAnalyzer.cfg.xml内容

猜你喜欢

转载自blog.csdn.net/ZHANGLIZENG/article/details/87862212
今日推荐