17 ElasticSearch集成SpringBoot实现模仿京东搜索引擎

ElasticSearch集成SpringBoot实现模仿京东搜索引擎

使用的版本:

	SpringBoot:2.3.4.RELEASE
	ElasticSearch:7.17.3             --和本机的版本一致

1.创建空的Maven项目并指定使用的Springboot和ElasticSearch的版本

	<parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.3.4.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>

    <groupId>org.example</groupId>
    <artifactId>springboot-elasticsearch</artifactId>
    <version>1.0-SNAPSHOT</version>
    <!--pom.xml-->
    <properties>
        <java.version>1.8</java.version>
        <elasticsearch.version>7.17.3</elasticsearch.version>
    </properties>
Springboot的版本在parent中指定

2.引入依赖

<dependencies>

        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>7.17.3</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
            <version>2.3.4.RELEASE</version>
        </dependency>
        <dependency>
            <groupId>co.elastic.clients</groupId>
            <artifactId>elasticsearch-java</artifactId>
            <version>7.17.3</version>
            <exclusions>
                <exclusion>
                    <groupId>org.elasticsearch.client</groupId>
                    <artifactId>elasticsearch-rest-client</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>transport</artifactId>
            <version>7.17.3</version>
            <exclusions>
                <exclusion>
                    <artifactId>elasticsearch</artifactId>
                    <groupId>org.elasticsearch</groupId>
                </exclusion>
                <exclusion>
                    <artifactId>elasticsearch-core</artifactId>
                    <groupId>org.elasticsearch</groupId>
                </exclusion>
            </exclusions>
        </dependency>


        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-devtools</artifactId>
            <scope>runtime</scope>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-configuration-processor</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.73</version>
        </dependency>
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.13.1</version>
        </dependency>
    </dependencies>

3.配置Elasticsearch配置类,添加Bean

package com.qidi.config;

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

/**
 * HEU
 * Harbin Engineering University
 *
 * @author QiDi
 * @date 2022/12/15 15:16
 */
@Configuration
public class ElasticSearchClientConfig {
    
    

    @Bean
    public RestHighLevelClient restHighLevelClient(){
    
    
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("localhost",9200,"http")
                )
        );
        return client;
    }

}

4.配置实体类

package com.qidi.entity;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
 * HEU
 * Harbin Engineering University
 *
 * @author QiDi
 * @date 2022/12/15 15:26
 */
@Data
@NoArgsConstructor
@AllArgsConstructor
public class HtmlContent {
    
    

    private String title;

    private String price;

    private String imgUrl;

}

5.配置工具类

爬取JD页面数据工具类

package com.qidi.utils;

import com.qidi.entity.HtmlContent;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * HEU
 * Harbin Engineering University
 *
 * @author QiDi
 * @date 2022/12/15 15:28
 */
public class HtmlParseUtils {
    
    
    /**
     * 传入一个keyword,将会自动解析该keyword对应的JD商品页面
     * 并将解析后的HtmlContent进行return
     *
     * @param keyword
     * @return
     */
    public List<HtmlContent> parseJD(String keyword) throws IOException {
    
    

        //需要解析的JD商品页面
        String url = "https://search.jd.com/Search?keyword=" + keyword;

        //通过Jsoup解析JD商品页面
        Document document = Jsoup.parse(new URL(url), 30000);

        //通过分析JD的商品都存在J_goodsList这个div中
        Element divElement = document.getElementById("J_goodsList");

        //JD的商品信息都存在J_goodsList这个div下的li标签中
        //取出所有的li标签  构成lis
        Elements lis = divElement.getElementsByTag("li");

        //用于存放遍历出的商品信息
        ArrayList<HtmlContent> contents = new ArrayList<>();

        //对lis进行遍历取出每个li标签中的title、img和price相关信息
        //并将这些信息存到刚刚new出的contents中
        for (Element li : lis) {
    
    

            //取出title相关信息
            String title = li.getElementsByClass("p-name").eq(0).text();

            //取出img的src
            String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img");

            //取出price的相关信息
            String price = li.getElementsByClass("p-price").eq(0).text();

            contents.add(new HtmlContent(title,price,img));


        }

        return contents;
    }
}

索引常量工具类

package com.qidi.utils;

import java.util.Stack;

/**
 * HEU
 * Harbin Engineering University
 *
 * @author QiDi
 * @date 2022/12/15 18:53
 */
public class CommonConstant {
    
    
    public static final String INDEX = "test_jd_goods";
}

6.编写Service层

package com.qidi.service;

import com.alibaba.fastjson.JSON;
import com.qidi.entity.HtmlContent;
import com.qidi.utils.CommonConstant;
import com.qidi.utils.HtmlParseUtils;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.AbstractHighlighterBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import javax.naming.directory.SearchResult;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.TimeUnit;

/**
 * HEU
 * Harbin Engineering University
 *
 * @author QiDi
 * @date 2022/12/15 18:42
 */
@Service
public class ContentService {
    
    

    @Autowired
    RestHighLevelClient restHighLevelClient;

    /**
     * 将这些数据插入到Elasticsearch的索引库中
     *
     * @param keyword
     * @return
     * @throws IOException
     */
    public Boolean addContent(String keyword) throws IOException {
    
    

        //应用刚刚编写的HtmlParseUtil解析关键词对应的JD商品信息--获取到JD商品信息对应的contents
        HtmlParseUtils htmlParseUtils = new HtmlParseUtils();
        List<HtmlContent> contents = htmlParseUtils.parseJD(keyword);

        //将刚刚解析的数据放到Elasticsearch中
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");    //设置超时时间为2min

        for (HtmlContent content : contents) {
    
    
            //将遍历出的content添加到test_jd_goods这个索引中
            bulkRequest.add(
                    new IndexRequest(CommonConstant.INDEX)
                            .source(JSON.toJSONString(content)
                                    , XContentType.JSON));
        }

        //执行批量插入文档的请求  返回一个Response
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);

        //返回bulk指令是否执行成功
        return !bulk.hasFailures();
    }


    public List<Map<String, Object>> searchPageForHighLight(String keyword, int pageNo, int pageSize) throws IOException {
    
    

        if (pageNo <= 1) {
    
    
            pageNo = 1;
        }

        //条件搜索
        SearchRequest searchRequest = new SearchRequest(CommonConstant.INDEX);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

        //分页
        searchSourceBuilder.from(pageNo);
        searchSourceBuilder.size(pageSize);

        //精确查找
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
        searchSourceBuilder.query(termQueryBuilder);
        searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));

        //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field("title");
        highlightBuilder.requireFieldMatch(false)//关闭多个高亮显示
                .preTags("<span style='color:red'>").postTags("</span>");
        searchSourceBuilder.highlighter(highlightBuilder);

        //执行搜索
        searchRequest.source(searchSourceBuilder);
        SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        //解析结果
        List<Map<String, Object>> list = new ArrayList<>();
        for (SearchHit documentField : response.getHits().getHits()) {
    
    
            Map<String, HighlightField> highlightFields = documentField.getHighlightFields();
            HighlightField title = highlightFields.get("title");
            Map<String, Object> sourceAsMap = documentField.getSourceAsMap();   // 原来的结果
            // 解析高亮的字段, 将原来的字段换为我们高亮的字段即可!
            if (title != null) {
    
    
                Text[] fragments = title.fragments();
                String n_title = "";
                for (Text text : fragments) {
    
    
                    n_title += text;
                }
                sourceAsMap.put("title", n_title);
            }
            list.add(sourceAsMap);
        }
        return list;
    }
}

7.编写Controller层

package com.qidi.controller;

import com.qidi.service.ContentService;
import com.sun.org.apache.xpath.internal.operations.Bool;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;

import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
 * HEU
 * Harbin Engineering University
 *
 * @author QiDi
 * @date 2022/12/15 19:07
 */
@RestController
public class ContentController {
    
    

    @Autowired
    ContentService contentService;


    @GetMapping("/parse/{keyword}")
    public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
    
    
        return contentService.addContent(keyword);
    }


    @GetMapping("/search/{keywords}/{pageNo}/{pageSize}")
    public List<Map<String, Object>> search(@PathVariable("keywords") String keywords, @PathVariable("pageNo") int pageNo, @PathVariable("pageSize") int pageSize) throws IOException {
    
    
        return contentService.searchPageForHighLight(keywords, pageNo, pageSize);
    }
}

package com.qidi.controller;

import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;

/**
 * HEU
 * Harbin Engineering University
 *
 * @author QiDi
 * @date 2022/12/15 19:04
 */
@Controller
public class IndexController {
    
    


    @GetMapping({
    
    "/", "/index"})
    public String index(){
    
    
        return "index";
    }
}

gitee地址:https://gitee.com/zidiqqq6/springboot-elasticsearch.git

猜你喜欢

转载自blog.csdn.net/weixin_68930048/article/details/128330232
17