ElasticSearch用于数据检索,效率非常高效,尤其是在大数据环境下,所以学习非常有必要!
1. 安装这里我使用阿里云服务器,并且采用Docker 安装ES
安装elasticsearch
# 1.拉取镜像 docker pull elasticsearch:7.7.1 # 2.生成容器 docker run -d -p 9300:9300 -p 9200:9200 --name es -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -e "discovery.type=single-node" -v /root/es/plugins:/usr/share/elasticsearch/plugins -v /root/es/data:/usr/share/elasticsearch/data elasticsearch:7.7.1
安装kibana
# 1.下载kibana镜像到本地 docker pull kibana:7.7.1 # 2.启动kibana容器 docker run -d --name kibana -e ELASTICSEARCH_URL=http://47.101.52.63:9200 -p 5601:5601 kibana:7.7.1
安装elasticsearch-head
# 1.下载镜像 docker pull mobz/elasticsearch-head:5 # 2.生成容器 docker run -d -p 9100:9100 --name es-head docker.io/mobz/elasticsearch-head:5 # 3.在这里可能会出现跨域拒绝访问问题 进入elasticsearch容器内部,修改配置文件elasticsearch.yml docker ps -a #拿到运行容器elasticsearch 的 id docker exec -it ******(容器id) /bin/bash cd ./config vi elasticsearch.yml 在elasticsearch.yml中添加: http.cors.enabled: true http.cors.allow-origin: "*" 然后重启容器 docker restart es
安装IK分词器
# 1.下载对应版本的IK分词器 wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.8.2/elasticsearch-analysis-ik-7.7.1.zip # 2.解压到plugins/elasticsearch文件夹中 yum install -y unzip #下载unzip unzip -d plugins/elasticsearch elasticsearch-analysis-ik-7.7.1.zip # 3.添加自定义扩展词和停用词 cd plugins/elasticsearch/config vim IKAnalyzer.cfg.xml2.项目实战(基于es的仿京东搜索)# 4.在ik分词器目录下config目录中创建ext_dict.dic文件 编码一定要为UTF-8才能生效 vim ext_dict.dic 加入扩展词即可 # 5. 在ik分词器目录下config目录中创建ext_stopword.dic文件 vim ext_stopwords.dic 加入停用词即可 # 6.将此容器提交成为一个新的镜像 docker commit -a="zk" -m="with IKAnalyzer" b35d35f72b8d zk/elasticsearch:6.8.2 # 7.使用新生成的这个es镜像创建容器,并挂载数据卷 docker run -d --name es -p 9200:9200 -p 9300:9300 -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -v /usr/local/IKAnalyzer:/usr/share/elasticsearch/plugins/elasticsearch/config zk/elasticsearch:6.8.2 IK Analyzer 扩展配置 ext_dict.dic ext_stopwords.dic
- 爬虫
- 导入jsoup依赖
org.jsoup jsoup 1.10.2 - 编写测试,生成工具类
package com.ittao.utils; import com.ittao.entity.Content; import org.jsoup.Jsoup; import org.jsoup.nodes.document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; public class HtmlParseUtil { // public static void main(String[] args) throws IOException { // HtmlParseUtil.parseJd("黄涛").forEach(System.out::println); // // } public static ListparseJd(String keyword) throws IOException { //1.获取搜索url String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8"; //2.通过jsoup解析 获取文档对象 document document = Jsoup.parse(new URL(url), 30000); //3.接下来的操作和js一样了 Element j_goodsList = document.getElementById("J_goodsList"); Elements elements = j_goodsList.getElementsByTag("li"); ArrayList contentArrayList = new ArrayList<>(); for (Element element : elements) { String img = element.getElementsByTag("img").eq(0).attr("src"); String price = element.getElementsByClass("p-price").text(); String title = element.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setTitle(title); content.setImg(img); content.setPrice(price); contentArrayList.add(content); } return contentArrayList; } }
- 前后端分离实现
-
后端实现
整体结构
pom.xml
4.0.0 org.springframework.boot spring-boot-starter-parent 2.3.0.RELEASE com.ittao elasticsearch_study 0.0.1-SNAPSHOT elasticsearch_study Demo project for Spring Boot 1.8 7.7.1 org.springframework.boot spring-boot-starter-data-elasticsearch org.springframework.boot spring-boot-starter-web org.springframework.boot spring-boot-devtools runtime true org.springframework.boot spring-boot-configuration-processor true org.projectlombok lombok true org.jsoup jsoup 1.10.2 com.alibaba fastjson 1.2.61 org.springframework.boot spring-boot-starter-test test org.junit.vintage junit-vintage-engine org.springframework.boot spring-boot-maven-plugin config
package com.ittao.config; import org.apache.http.HttpHost; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @Configuration public class ElasticsearchConfig { @Bean public RestHighLevelClient restHighLevelClient(){ RestHighLevelClient client = new RestHighLevelClient( RestClient.builder( new HttpHost("47.101.52.63", 9200, "http"))); return client; } }
entity
package com.ittao.entity; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @Data @AllArgsConstructor @NoArgsConstructor public class Content { private String title; private String img; private String price; }
package com.ittao.entity; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import lombok.experimental.Accessors; import org.springframework.stereotype.Component; @Data @AllArgsConstructor @NoArgsConstructor @Accessors(chain = true) @Component public class User { private String name; private int age; }
service
package com.ittao.service.impl; import com.alibaba.fastjson.JSON; import com.ittao.entity.Content; import com.ittao.service.ContentService; import com.ittao.utils.HtmlParseUtil; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.unit.Timevalue; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.FuzzyQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Service; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @Service public class ContentServiceImpl implements ContentService { @Autowired @Qualifier("restHighLevelClient") private RestHighLevelClient client; @Override public boolean addToEs(String keyword) throws IOException { //1.获取要添加的数据 List
contentList = HtmlParseUtil.parseJd(keyword); //2.创建批量添加请求 BulkRequest request = new BulkRequest(); //3.批量添加数据 for (Content content : contentList) { request.add(new IndexRequest("jd_goods"). //添加到jd_goods这个索引中 source(JSON.toJSONString(content), XContentType.JSON)); } request.timeout(new Timevalue(2, TimeUnit.MINUTES)); //4.执行批量添加请求 BulkResponse response = client.bulk(request, RequestOptions.DEFAULT); //5.获取响应 return !response.hasFailures(); } @Override public List controller
package com.ittao.Controller; import com.ittao.service.ContentService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.CrossOrigin; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RestController; import java.io.IOException; import java.util.List; import java.util.Map; @RestController @CrossOrigin public class ContentController { @Autowired private ContentService contentService; @GetMapping("/addToEs/{keyword}") public boolean addToEs(@PathVariable("keyword") String keyword) throws IOException { return contentService.addToEs(keyword); } @GetMapping("/searchPage/{keyword}/{pageNo}/{pageSize}") public List
> searchPage(@PathVariable("keyword") String keyword, @PathVariable("pageNo") int pageNo, @PathVariable("pageSize") int pageSize) throws IOException { return contentService.searchPage(keyword, pageNo, pageSize); } } utils
package com.ittao.utils; import com.ittao.entity.Content; import org.jsoup.Jsoup; import org.jsoup.nodes.document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; public class HtmlParseUtil { // public static void main(String[] args) throws IOException { // HtmlParseUtil.parseJd("黄涛").forEach(System.out::println); // // } public static List
parseJd(String keyword) throws IOException { //1.获取搜索url String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8"; //2.通过jsoup解析 获取文档对象 document document = Jsoup.parse(new URL(url), 30000); //3.接下来的操作和js一样了 Element j_goodsList = document.getElementById("J_goodsList"); Elements elements = j_goodsList.getElementsByTag("li"); ArrayList contentArrayList = new ArrayList<>(); for (Element element : elements) { String img = element.getElementsByTag("img").eq(0).attr("src"); String price = element.getElementsByClass("p-price").text(); String title = element.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setTitle(title); content.setImg(img); content.setPrice(price); contentArrayList.add(content); } return contentArrayList; } }
前端实现
首页
ElasticSearch的简单实战 第一个功能:从京东商城中爬取我们搜索的数据,存放到eslasticsearch中
第二个功能:从eslasticsearch中根据关键字查询我们的数据,进行展示
点我去搜索数据
点我去爬取数据
.text{ font-size: 20px; } .link{ text-align: left; margin-left: 450px; } .logo{ height: 200px; }
查询页面
!搜索 点我去爬取数据 点我去首页
- {{item.price}} 1300+条评价 文轩网旗舰店
.el-header, .el-footer { text-align: center; line-height: 80px; } .el-main { text-align: center; line-height: 800px; } body > .el-container { margin-bottom: 40px; } .el-container:nth-child(5) .el-aside, .el-container:nth-child(6) .el-aside { line-height: 260px; } .el-container:nth-child(7) .el-aside { line-height: 320px; } .content { width: 100%; height: 1200px; margin: 50px auto; } .row { width: 100%; height: 400px; float: left; } .col { width: 25%; height: 400px; float: left; } .image { text-align: left; margin-left: 20px; } .p-price { text-align: left; margin-left: 20px; color: red; } .p-title1{ font-size: 10px; } .p-commit { text-align: left; } .p-shop { text-align: left; } ul li { list-style-type: none; }
生成数据页面
生成 点我去查询 点我去首页 .el-header, .el-footer { text-align: center; line-height: 80px; } .el-main { text-align: center; line-height: 800px; } body > .el-container { margin-bottom: 40px; } .el-container:nth-child(5) .el-aside, .el-container:nth-child(6) .el-aside { line-height: 260px; } .el-container:nth-child(7) .el-aside { line-height: 320px; }
route中index.js
import Vue from 'vue' import VueRouter from 'vue-router' import Search from '../views/Search.vue' import Home from '../views/Home.vue' import GenerateData from '../views/GenerateData.vue' Vue.use(VueRouter) const routes = [ { path: '/', name: 'Home', component: Home }, { path: '/search', name: 'Search', component: Search }, { path: '/generateData', name: 'GenerateData', component: GenerateData } ] const router = new VueRouter({ mode: 'history', base: process.env.base_URL, routes }) export default router
main.js
import Vue from 'vue' import App from './App.vue' import router from './router' import store from './store' import ElementUI from 'element-ui' import 'element-ui/lib/theme-chalk/index.css' import axios from 'axios' Vue.config.productionTip = false Vue.use(ElementUI); Vue.prototype.$http = axios Vue.prototype.$http.defaults.baseURL = 'http://localhost:8989' // `baseURL` 将自动加在 `url` 前面,除非 `url` 是一个绝对 URL new Vue({ router, store, render: h => h(App) }).$mount('#app')3.总结
通过学习,对es的基本使用算是初步的掌握了.学习es的步骤如下
1.es的安装,尤其通过docker安装
2.es的简单restful api的使用,包括简单查询和复杂查询,通过kibana可视化界面操作
3.es的java客户端工具api学习,通过java语句去实现增删改查,其实本质上和查询语句类似,该有的方法都有,
4.最后通过es仿京东搜索的实战练习,达到了对es有基本的运行能力