java操作ES

package com.test.es;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.InetAddress;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.junit.Test;
import org.springframework.stereotype.Service;

@Service
public class IndexService {

//存放html文件的目录
public static String DATA_DIR="D:\\javaTool\\20180116-e.xlsx";//中文数据:20180116-c.xlsx 英文数据:20180116-e.xlsx
public static Client client;

static {
Settings settings = Settings.settingsBuilder()
.put("cluster.name", "es-hww").build();//链接服务器名称
try {
client = TransportClient
.builder()
.settings(settings)
.build()
.addTransportAddress(
new InetSocketTransportAddress(InetAddress
.getByName("192.168.12.114"), 9300)) //服务器的第一个节点服务器
.addTransportAddress(
new InetSocketTransportAddress(InetAddress
.getByName("192.168.12.115"), 9300)) //服务器的第二个节点服务器
.addTransportAddress(
new InetSocketTransportAddress(InetAddress
.getByName("192.168.12.116"), 9300)); //服务器的第三个节点服务器
} catch (Exception e) {
e.printStackTrace();
}
}

/**
* admin():管理索引库的。client.admin().indices()
*
* 索引数据的管理:client.prepare
*
*/
@Test
public void createIndex() throws Exception {
IndicesExistsResponse resp = client.admin().indices().prepareExists("hww-opinion").execute().actionGet();
if(resp.isExists()){ //先查询有没有该索引库,有就删除
client.admin().indices().prepareDelete("hww-opinion").execute().actionGet();
}
client.admin().indices().prepareCreate("hww-opinion").execute().actionGet();//没有就创建

new XContentFactory(); //处理web2.5兼容
//创建索引 基本都是固定格式
XContentBuilder builder = XContentFactory.jsonBuilder().startObject()
.startObject("htmlbean")
.startObject("properties")
.startObject("url").field("type", "string")//链接地址
.endObject()
.startObject("title").field("type", "string")//标题
.field("analyzer", "ik_max_word")
.field("search_analyzer", "ik_max_word")
.endObject()
.startObject("author").field("type", "string")//作者
.field("index", "not_analyzed")//设置不
.endObject()
.startObject("introduction").field("type", "string")//简介
.field("analyzer", "ik_max_word")
.field("search_analyzer", "ik_max_word")
.endObject()
.startObject("content").field("type", "string")//内容
.field("analyzer", "ik_max_word")
.field("search_analyzer", "ik_max_word")
.endObject()
.startObject("publishDate").field("type", "string")//时间
.field("index", "not_analyzed")
.endObject()
.startObject("mediaType").field("type", "string")//媒体类型
.field("index", "not_analyzed")
.endObject()
.startObject("area").field("type", "string")//地区
.field("index", "not_analyzed")
.endObject()
.startObject("attribute").field("type", "string")//舆情属性
.field("index", "not_analyzed")
.endObject()
.startObject("languages").field("type", "string")//语种
.field("index", "not_analyzed")
.endObject()
.startObject("source").field("type", "string")//来源
.field("index", "not_analyzed")
.endObject()
.endObject().endObject().endObject();
PutMappingRequest mapping = Requests.putMappingRequest("hww-opinion").type("htmlbean").source(builder);
client.admin().indices().putMapping(mapping).actionGet();

}
/**
* 把源数据html文件添加到索引库中(构建索引文件)
*/
@Test
public void addHtmlToES(){
try {
readExcel(new File(DATA_DIR));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* @param 读取Excel
* @throws Exception
*/
public void readExcel(File file) throws Exception{
InputStream is = new FileInputStream(file);
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(is);
// 获取每一个工作薄
for (int numSheet = 0; numSheet < xssfWorkbook.getNumberOfSheets(); numSheet++) {
XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(numSheet);
if (xssfSheet == null) {
continue;
}
int num=0;//计数
// 获取当前工作薄的每一行
for (int rowNum = 1; rowNum <= xssfSheet.getLastRowNum(); rowNum++) {
XSSFRow xssfRow = xssfSheet.getRow(rowNum);
if (xssfRow != null) {
num++;
XSSFCell one = xssfRow.getCell(0);
//读取第一列数据
XSSFCell two = xssfRow.getCell(1);
//读取第二列数据
XSSFCell three = xssfRow.getCell(2);
//读取第三列数据
XSSFCell four = xssfRow.getCell(3);
XSSFCell five = xssfRow.getCell(4);
//需要转换数据的话直接调用getValue获取字符串
Map<String, String> dataMap =new HashMap<String, String>();
dataMap.put("url", one==null?"":one.toString());//链接地址
dataMap.put("title", two==null?"":two.toString());//标题
dataMap.put("author", three==null?"":three.toString());//作者
dataMap.put("introduction", four==null?"":four.toString());//简介
dataMap.put("content", five==null?"":five.toString());//内容
Date date = new Date();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String createdate = sdf.format(date);
dataMap.put("publishDate", createdate);//发布时间
dataMap.put("mediaType", "新闻");//媒体属性
dataMap.put("area", "东京");//地区
dataMap.put("attribute", "正面");//舆情属性
dataMap.put("languages", "英文");//语种
dataMap.put("source", "人民网");//来源
//写索引
client.prepareIndex("hww-opinion", "htmlbean").setSource(dataMap).execute().actionGet();
System.out.println(num+"===================");
}
}
}
}
//转换数据格式
private String getValue(XSSFCell xssfRow) {

if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) {
return String.valueOf(xssfRow.getBooleanCellValue());
} else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) {
return String.valueOf(xssfRow.getNumericCellValue());
} else {
return String.valueOf(xssfRow.getStringCellValue());
}
}
/**
* 搜索
* @param kw
* @param num 当前第几页
* @return
* @throws Exception
*/
public PageBean<HWWHtmlBean> search(String kw,int num,int count) throws Exception{
PageBean<HWWHtmlBean> wr =new PageBean<HWWHtmlBean>();
wr.setIndex(num);
// //构建查询条件
// MatchQueryBuilder q1 =new MatchQueryBuilder("title", kw);
// MatchQueryBuilder q2 =new MatchQueryBuilder("content", kw);
//
// //构建一个多条件查询对象
// BoolQueryBuilder q =new BoolQueryBuilder(); //组合查询条件对象
// q.should(q1);
// q.should(q2);
// RangeQueryBuilder q1 =new RangeQueryBuilder("age");
// q1.from(18);
// q1.to(40);
//q为要查询的数据
// QueryStringQueryBuilder q = new QueryStringQueryBuilder(kw);
//analyzer为ik分词,即输入的keywords通过ik进行分词。
//查询的字段为title和content
// q.analyzer("ik").field("title").field("content");
MultiMatchQueryBuilder q =new MultiMatchQueryBuilder(kw, new String[]{"area","content"});
SearchResponse resp=null;
if(wr.getIndex()==1){
resp = client.prepareSearch("hww-opinion")
.setTypes("htmlbean")
.setQuery(q)
.addHighlightedField("title")
.addHighlightedField("content")
.setHighlighterPreTags("<font color=\"red\">")
.setHighlighterPostTags("</font>")
.setHighlighterFragmentSize(14)//设置显示结果中一个碎片段的长度
.setHighlighterNumOfFragments(3)//设置显示结果中每个结果最多显示碎片段,每个碎片段之间用...隔开
.setFrom(0)
.setSize(10)
.execute().actionGet();
}else{
wr.setTotalCount(count);
resp = client.prepareSearch("hww-opinion")
.setTypes("htmlbean")
.setQuery(q)
.addHighlightedField("title")
.addHighlightedField("content")
.setHighlighterPreTags("<font color=\"red\">")
.setHighlighterPostTags("</font>")
.setHighlighterFragmentSize(14)
.setHighlighterNumOfFragments(3)
.setFrom(wr.getStartRow())
.setSize(10)
.execute().actionGet();
}
SearchHits hits= resp.getHits();
wr.setTotalCount((int)hits.getTotalHits());
for(SearchHit hit : hits.getHits()){
HWWHtmlBean bean =new HWWHtmlBean();
if(hit.getHighlightFields().get("title")==null){//title中没有包含关键字
bean.setTitle(hit.getSource().get("title").toString());//获取原来的title(没有高亮的title)
}else{
bean.setTitle(hit.getHighlightFields().get("title").getFragments()[0].toString());
}
if(hit.getHighlightFields().get("content")==null){//content中没有包含关键字
bean.setContent(hit.getSource().get("content").toString());//获取原来的content(没有高亮的content)
}else{
StringBuilder sb =new StringBuilder();
for(Text text: hit.getHighlightFields().get("content").getFragments()){
sb.append(text.toString()+"...");
}
bean.setContent(sb.toString());
}
bean.setUrl(hit.getSource().get("url").toString());
wr.setBean(bean);
}
return wr;
}
}

猜你喜欢

转载自blog.csdn.net/u011418530/article/details/80475228
今日推荐