ElasticSearch —— 使用 ES 实现一个搜索项目

依赖

spring boot 2.2.5.RELEASE,其他:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.62</version>
</dependency>

<!-- jsoup -->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.10.2</version>
</dependency>

<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-web</artifactId>
</dependency>

<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-devtools</artifactId>
    <scope>runtime</scope>
    <optional>true</optional>
</dependency>
<dependency>
    <groupId>org.projectlombok</groupId>
    <artifactId>lombok</artifactId>
    <optional>true</optional>
</dependency>

配置 ES 连接

指定了用户名、密码和地址、端口:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
@Configuration
public class ElasticSearchConfig {
    @Bean
    public RestHighLevelClient restHighLevelClient() {
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(AuthScope.ANY,
                new UsernamePasswordCredentials("elastic", "XCkZUSgrTHOF0krUsXgJ"));
        return new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("192.168.79.131", 9200, "http"))
                        .setHttpClientConfigCallback(httpClientBuilder -> {
                            httpClientBuilder.disableAuthCaching();
                            return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                        }));
    }
}

准备数据

从京东爬点数据存到 ES。

使用 Jsoup 解析 HTML:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
public class HtmlParseUtil {
    public static void main(String[] args) throws IOException {
        new HtmlParseUtil().parseJd("高效").forEach(System.out::println);
    }

    public List<JdContent> parseJd(String keyword) throws IOException {
        ArrayList<JdContent> jdContents = new ArrayList<>();

        String url = "https://search.jd.com/Search?keyword=" + keyword + "&enc=utf-8";
        Document document = Jsoup.parse(new URL(url), 30000);
        Element jGoodsList = document.getElementById("J_goodsList");
        Elements elementsByTag = jGoodsList.getElementsByTag("li");
        for (Element element : elementsByTag) {
            String img = element.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = element.getElementsByClass("p-price").eq(0).text();
            String title = element.getElementsByClass("p-name").eq(0).text();
            jdContents.add(new JdContent(title, img, price));
        }
        return jdContents;
    }
}

准备一个 service,传入搜索关键字,调用上面的 HtmlParseUtil 爬取数据、存入 ES:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
public Boolean parseContent(String keyword) throws IOException {
    List<JdContent> contents = new HtmlParseUtil().parseJd(keyword);
    BulkRequest bulkRequest = new BulkRequest();
    bulkRequest.timeout("2m");
    for (JdContent content : contents) {
        bulkRequest.add(
                new IndexRequest("goods")
                        .source(JSON.toJSONString(content), XContentType.JSON));
    }
    BulkResponse bulk = restHighLevelClientt.bulk(bulkRequest, RequestOptions.DEFAULT);
    return bulk.hasFailures();
}

返回 false 则表示数据存入 ES 时未发生错误。

搜索

关键字搜索

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
public List<Map<String, Object>> searchPageable(String keyword, Integer currentPage, Integer pageSize) throws IOException {
    if (currentPage <= 1) {
        currentPage = 1;
    }
    SearchRequest searchRequest = new SearchRequest("goods");
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

    // 分页
    searchSourceBuilder.from(currentPage);
    searchSourceBuilder.size(pageSize);

    //        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
    //        MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("title", keyword).analyzer("ik_max_word");
    WildcardQueryBuilder queryBuilder = QueryBuilders.wildcardQuery("title", "*" + keyword + "*");
    searchSourceBuilder.query(queryBuilder);
    searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);


    ArrayList<Map<String, Object>> ret = new ArrayList<>();
    for (SearchHit searchHit : searchResponse.getHits().getHits()) {
        Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();

        ret.add(searchHit.getSourceAsMap());
    }
    return ret;
}

测试:

高亮

和关键字搜索非常相似,只是把搜索出的 field 关键字部分添加 span 标签和 color: red 属性~~(我不太喜欢这种方式)~~:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
public List<Map<String, Object>> highlightSearchPageable(String keyword, Integer currentPage, Integer pageSize) throws IOException {
    if (currentPage <= 1) {
        currentPage = 1;
    }
    SearchRequest searchRequest = new SearchRequest("goods");
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

    // 分页
    searchSourceBuilder.from(currentPage);
    searchSourceBuilder.size(pageSize);

    //        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
    //        MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("title", keyword).analyzer("ik_max_word");
    WildcardQueryBuilder queryBuilder = QueryBuilders.wildcardQuery("title", "*" + keyword + "*");
    searchSourceBuilder.query(queryBuilder);
    searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));

    // highlight
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    highlightBuilder.field("title");
    highlightBuilder.preTags("<span style='color:red'>");
    highlightBuilder.postTags("</span>");
    searchSourceBuilder.highlighter(highlightBuilder);

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);


    ArrayList<Map<String, Object>> ret = new ArrayList<>();
    for (SearchHit searchHit : searchResponse.getHits().getHits()) {
        Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
        HighlightField title = highlightFields.get("title");
        Map<String, Object> sourceAsMap = searchHit.getSourceAsMap();

        if (title != null) {
            Text[] fragments = title.fragments();
            String n_title = "";
            for (Text fragment : fragments) {
                n_title += fragment;
            }
            sourceAsMap.put("title", n_title);
        }

        ret.add(sourceAsMap);
    }
    return ret;
}

测试:

加载评论