大神请看
本文不一定写的全部都对,如果有哪里写得不好或者不对,可以提但请不要喷,不喜勿看绕道走就是了,虽然有可能在一些小知识点上会误导小白,但是还是可以看看基础的ES操作,对于使用还是一点帮助的
数据的查询
前两回说了Index的创建和数据的操作,那现在既然有了数据,自然要有查询的功能,查询我们PHP开发最熟悉的应该就是Mysql数据库的查询,and,or,in,=,gt,lt,egt,elt,between等等,这里我们讲一下思路,学会怎么根据ES的官方查询api反推 ruflin/elastica 这个包的使用。
相关的文档
https://www.elastic.co/guide/cn/elasticsearch/php/current/_search_operations.html
https://www.elastic.co/guide/en/elasticsearch/reference/current/search.html
相关api
GET /index名称/_search
参数为json,raw数据传输
插入数据
为了方便我们做查询,我们先插入一批数据,共计101条
$config = $this->config();
$client = new Client($config);
$indexName = 'test';
$index = $client->getIndex($indexName);
for ($i = 10000; $i <= 10100; $i++) {
$documents[] = new Document($i, [
'id' => $i,
'name' => '白鹭 ' . $i,
'en_name' => 'bai lu ' . $i
]);
}
$index->addDocuments($documents);
查询所有数据
举个例子,查询Index中的所有数据
假如用api去查
GET /test/_search
参数:
{
"query": {
"match_all": {}
}
}
结果:默认只匹配出来10条,返回条数是可以设置的,但是可以看到全部是101条
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 101,###### 看这里
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "test",
"_type": "_doc",
"_id": "10000",
"_score": 1.0,
"_source": {
"id": 10000,
"name": "白鹭 10000",
"en_name": "bai lu 10000"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10001",
"_score": 1.0,
"_source": {
"id": 10001,
"name": "白鹭 10001",
"en_name": "bai lu 10001"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10002",
"_score": 1.0,
"_source": {
"id": 10002,
"name": "白鹭 10002",
"en_name": "bai lu 10002"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10003",
"_score": 1.0,
"_source": {
"id": 10003,
"name": "白鹭 10003",
"en_name": "bai lu 10003"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10004",
"_score": 1.0,
"_source": {
"id": 10004,
"name": "白鹭 10004",
"en_name": "bai lu 10004"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10005",
"_score": 1.0,
"_source": {
"id": 10005,
"name": "白鹭 10005",
"en_name": "bai lu 10005"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10006",
"_score": 1.0,
"_source": {
"id": 10006,
"name": "白鹭 10006",
"en_name": "bai lu 10006"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10007",
"_score": 1.0,
"_source": {
"id": 10007,
"name": "白鹭 10007",
"en_name": "bai lu 10007"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10008",
"_score": 1.0,
"_source": {
"id": 10008,
"name": "白鹭 10008",
"en_name": "bai lu 10008"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10009",
"_score": 1.0,
"_source": {
"id": 10009,
"name": "白鹭 10009",
"en_name": "bai lu 10009"
}
}
]
}
}
代码实现
$config = $this->config();
$client = new Client($config);
$indexName = 'test';
$index = $client->getIndex($indexName);
$data = $index->search(''); // 调用search方法即可查数据,传入空字符串即是查全部数据
// 数据太多,这里就不展示了
好,那现在假如我们只要查id在10040 ~ 10042这3条数据,那我们该怎么做呢
以官方的restful api为例,我们根据文档可以轻松写出
GET /test/_search
参数(我们挨个讲解一下):
{
"query": {
"range": { # 代表范围查询
"id": { # 查询字段为Id
"lte": "10042", # lte为小于等于
"gte": "10040" # gte为大于等于
}
}
}
}
结果:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "test",
"_type": "_doc",
"_id": "10040",
"_score": 1.0,
"_source": {
"id": 10040,
"name": "白鹭 10040",
"en_name": "bai lu 10040"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10041",
"_score": 1.0,
"_source": {
"id": 10041,
"name": "白鹭 10041",
"en_name": "bai lu 10041"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10042",
"_score": 1.0,
"_source": {
"id": 10042,
"name": "白鹭 10042",
"en_name": "bai lu 10042"
}
}
]
}
}
转换为php代码实现
$range = new Query\Range();
$range->setParam('id', [
'lte' => '10042',
'gte' => '10040'
]);
$data = $index->search($range);
还是这个例子,我们between写法不止可以写成between,在数据库中还可以写成 id >=10040 and id <= 10042,这就涉及到了ES中的AND语法,接下来我们继续实操一下,这个例子下的这个写法会复杂化,当然,这只是为了展示一下而已。
GET /test/_search
参数(我们挨个讲解一下):
{
"query": {
"bool": { # bool是进行判断时必须携带的一个字段
"must": [ # must就相当于数据库的and,必须满足must这个数组里的所有条件
{
"range": { # 代表范围查询
"id": { # 查询字段为Id
"lte": "10042" # lte为小于等于
}
}
},
{
"range": {
"id": {
"gte": "10040" # gte为大于等于
}
}
}
]
}
}
}
结果和上述一样
转换成php代码
$bool = new Query\BoolQuery();
$range = new Query\Range();
$range->setParam('id', [
'lte' => '10042',
]);
$range1 = new Query\Range();
$range1->setParam('id', [
'gte' => '10040'
]);
$bool->addMust($range);
$bool->addMust($range1);
$data = $index->search($bool);
其实到这里我们就可以看得出,每一个es的api的关键字都能在 ruflin/elastica 找到类与之相对应,然后按照api参数的层级关系,就不难转换成代码了,我们再举个例子,现在要查小于10001或者大于10099的数据。
GET /test/_search
参数(我们讲解一下不同的点):
{
"query": {
"bool": {
"should": [ # should其实就相当于数据库中的or,该数组下不需要所有条件都满足
{
"range": {
"id": {
"lt": "10001"
}
}
},
{
"range": {
"id": {
"gt": "10099"
}
}
}
]
}
}
}
结果:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "test",
"_type": "_doc",
"_id": "10000",
"_score": 1.0,
"_source": {
"id": 10000,
"name": "白鹭 10000",
"en_name": "bai lu 10000"
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "10100",
"_score": 1.0,
"_source": {
"id": 10100,
"name": "白鹭 10100",
"en_name": "bai lu 10100"
}
}
]
}
}
转换成php代码
$bool = new Query\BoolQuery();
$range = new Query\Range();
$range->addField('id', [
'lt' => '10002',
]);
$range1 = new Query\Range();
$range1->addField('id', [
'gt' => '10099'
]);
$bool->addShould($range);
$bool->addShould($range1);
$data = $index->search($bool);
基本上查询就是照着官方api的参数,转变成一个个类去设置对应字段的值,其实理解到这一点之后,多看看官方的api就基本能写明白es的查询了。
最后讲一下复杂一点的查询,也就是复合查询,这里直接以api参数讲解
这个例子是查询app_code是2000或者2001并且app_id=A100201或app_code是2000或者2001并且app_id=A100100的数据
{
"query": {
"bool": {
"should": [ # or查询
{
"bool": {
"must": [ # and查询
{"terms": {"app_code": ["2000","2001"]}}, # terms相当于in查询
{"term": {"app_id": "A100201"}} # keyword匹配
]
}
},
{
"bool": {
"must": [
{"terms": {"app_code": ["2000","2001"]}},
{"term": {"app_id": "A100100"}}
]
}
}
]
}
}
}
复合查询比较麻烦,存在多层嵌套的关系,但只要花点时间,还是可以处理好的。