【clickhouse】Clickhouse 查询之 Array JOIN

在这里插入图片描述

1.概述

转载:Clickhouse 查询之 Array JOIN

Array JOIN 子句允许在数据表的内部,与数组或者嵌套的字段进行JOIN操作,从而将一行数据变多行。适用于行转列操作。

CREATE TABLE datasets.city
(
    `province` String,
    `arr_city` Array(String),
    `arr_rank` Array(UInt8)
)
ENGINE = Log
 
 
 
Ok.
 
0 rows in set. Elapsed: 0.012 sec. 
 
insert into city values('hubei',['wuhan','xiangyang'],[1,2]),
('guangdong',['guangzhou','shenzhen','zhuhai'],[1,2,3]);
 
insert into city values('beijing',[],[10]),('shanghai',[],[20]);
 

查看原始数据:

Clickhouse> select * from city;
 
SELECT *
FROM city
 
┌─province──┬─arr_city──────────────────────────┬─arr_rank─┐
│ hubei     │ ['wuhan','xiangyang'][1,2]    │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'][1,2,3]  │
│ beijing   │ [][10]     │
│ shanghai  │ [][20]     │
│ hongkong  │ [][]       │
└───────────┴───────────────────────────────────┴──────────┘
 
5 rows in set. Elapsed: 0.003 sec. 

1.INNER ARRAY JOIN:

 
Clickhouse> select province,city from city array join arr_city as city;
 
SELECT 
    province,
    city
FROM city
ARRAY JOIN arr_city AS city
 
┌─province──┬─city──────┐
│ hubei     │ wuhan     │
│ hubei     │ xiangyang │
│ guangdong │ guangzhou │
│ guangdong │ shenzhen  │
│ guangdong │ zhuhai    │
└───────────┴───────────┘
 
5 rows in set. Elapsed: 0.002 sec. 
 
 
Clickhouse> select province,arr_city,city from city array join arr_city as city;
 
SELECT 
    province,
    arr_city,
    city
FROM city
ARRAY JOIN arr_city AS city
 
┌─province──┬─arr_city──────────────────────────┬─city──────┐
│ hubei     │ ['wuhan','xiangyang']             │ wuhan     │
│ hubei     │ ['wuhan','xiangyang']             │ xiangyang │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ guangzhou │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ shenzhen  │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ zhuhai    │
└───────────┴───────────────────────────────────┴───────────┘
 
5 rows in set. Elapsed: 0.002 sec. 

2. LEFT Array JOIN :

Clickhouse> select province,arr_city,city from city LEFT array join arr_city as city FORMAT PrettyCompactMonoBlock;
 
SELECT 
    province,
    arr_city,
    city
FROM city
LEFT ARRAY JOIN arr_city AS city
FORMAT PrettyCompactMonoBlock
 
┌─province──┬─arr_city──────────────────────────┬─city──────┐
│ hubei     │ ['wuhan','xiangyang']             │ wuhan     │
│ hubei     │ ['wuhan','xiangyang']             │ xiangyang │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ guangzhou │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ shenzhen  │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ zhuhai    │
│ beijing   │ []                                │           │
│ shanghai  │ []                                │           │
│ hongkong  │ []                                │           │
└───────────┴───────────────────────────────────┴───────────┘
 
8 rows in set. Elapsed: 0.010 sec. . 

当同时对数组字段进行ARRAY JOIN 操作时候,查询的计算逻辑是按行合并而不是产生笛卡尔积:

 
Clickhouse> select province,arr_city,arr_rank,v ,arrayMap(x->x*x,arr_rank) mapv,v1 from city c left array join arr_rank as v,mapv as v1 FORMAT PrettyCompactMonoBlock;
 
SELECT 
    province, 
    arr_city, 
    arr_rank, 
    v, 
    arrayMap(x -> (x * x), arr_rank) AS mapv, 
    v1
FROM city AS c
LEFT ARRAY JOIN 
    arr_rank AS v, 
    mapv AS v1
FORMAT PrettyCompactMonoBlock
 
┌─province──┬─arr_city──────────────────────────┬─arr_rank─┬──v─┬─mapv────┬──v1─┐
│ hubei     │ ['wuhan','xiangyang'][1,2]1[1,4]1 │
│ hubei     │ ['wuhan','xiangyang'][1,2]2[1,4]4 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'][1,2,3]1[1,4,9]1 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'][1,2,3]2[1,4,9]4 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'][1,2,3]3[1,4,9]9 │
│ beijing   │ [][10]10[100]100 │
│ shanghai  │ [][20]20[400]400 │
│ hongkong  │ [][]0[]0 │
└───────────┴───────────────────────────────────┴──────────┴────┴─────────┴─────┘
 
8 rows in set. Elapsed: 0.004 sec. 
 
 
 
 
--- 
Clickhouse> SELECT province , arr_rank, a, num, mapped
:-] FROM city
:-] ARRAY JOIN arr_rank AS a, arrayEnumerate(arr_rank) AS num, arrayMap(x -> x + 1, arr_rank) AS mapped;
 
SELECT 
    province, 
    arr_rank, 
    a, 
    num, 
    mapped
FROM city
ARRAY JOIN 
    arr_rank AS a, 
    arrayEnumerate(arr_rank) AS num, 
    arrayMap(x -> (x + 1), arr_rank) AS mapped
 
┌─province──┬─arr_rank─┬──a─┬─num─┬─mapped─┐
│ hubei     │ [1,2]112 │
│ hubei     │ [1,2]223 │
│ guangdong │ [1,2,3]112 │
│ guangdong │ [1,2,3]223 │
│ guangdong │ [1,2,3]334 │
│ beijing   │ [10]10111 │
│ shanghai  │ [20]20121 │
└───────────┴──────────┴────┴─────┴────────┘
 
7 rows in set. Elapsed: 0.005 sec. 

arr_rank 和mapped 数组并没有产生笛卡尔积。

嵌套数据类型本质是数组,ARRAY JOIN 也支持嵌套数据类型。
嵌套表创建:

Clickhouse> create table t_nest(province String, addr Nested(no UInt8,city String,area String)) engine=MergeTree() order by province;
 
CREATE TABLE t_nest
(
    `province` String,
    `addr` Nested(    no UInt8,     city String,     area String)
)
ENGINE = MergeTree()
ORDER BY province
 
 
Clickhouse> desc t_nest;
 
DESCRIBE TABLE t_nest
 
┌─name──────┬─type──────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ province  │ String        │              │                    │         │                  │                │
│ addr.no   │ Array(UInt8)  │              │                    │         │                  │                │
│ addr.city │ Array(String) │              │                    │         │                  │                │
│ addr.area │ Array(String) │              │                    │         │                  │                │
└───────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 
4 rows in set. Elapsed: 0.003 sec. 
 
Clickhouse> insert into t_nest values('Hubei',[1,2,3],['wuhan','wuhan','wuhan'],['wuchang','hankou','hanyang']);
 
INSERT INTO t_nest VALUES
 
Ok.
 
1 rows in set. Elapsed: 0.005 sec. 
 
Clickhouse> insert into t_nest values('Guangdong',[10,20,30],['guangzhou','shenzhen','shenzhen'],['baiyun','nanshan','baoan']);
 
INSERT INTO t_nest VALUES
 
Ok.
 
1 rows in set. Elapsed: 0.002 sec. 
 
Clickhouse> insert into t_nest values('Shanghai',[],[],[]);
 
INSERT INTO t_nest VALUES
 
Ok.
Clickhouse> select * from t_nest FORMAT PrettyCompactMonoBlock;
 
SELECT *
FROM t_nest
FORMAT PrettyCompactMonoBlock
 
┌─province──┬─addr.no────┬─addr.city───────────────────────────┬─addr.area──────────────────────┐
│ Guangdong │ [10,20,30]['guangzhou','shenzhen','shenzhen']['baiyun','nanshan','baoan']   │
│ Hubei     │ [1,2,3]['wuhan','wuhan','wuhan']['wuchang','hankou','hanyang'] │
│ Shanghai  │ [][][]                             │
└───────────┴────────────┴─────────────────────────────────────┴────────────────────────────────┘
 
3 rows in set. Elapsed: 0.003 sec. 
 

对嵌套类型数据的访问 :
1.ARRAY JOIN 可以直接使用字段列名:

Clickhouse> select province,addr.no,addr.city,addr.area from t_nest array join addr FORMAT PrettyCompactMonoBlock;
 
SELECT 
    province,
    addr.no,
    addr.city,
    addr.area
FROM t_nest
ARRAY JOIN addr
FORMAT PrettyCompactMonoBlock
 
┌─province──┬─addr.no─┬─addr.city─┬─addr.area─┐
│ Hubei     │       1 │ wuhan     │ wuchang   │
│ Hubei     │       2 │ wuhan     │ hankou    │
│ Hubei     │       3 │ wuhan     │ hanyang   │
│ Guangdong │      10 │ guangzhou │ baiyun    │
│ Guangdong │      20 │ shenzhen  │ nanshan   │
│ Guangdong │      30 │ shenzhen  │ baoan     │
└───────────┴─────────┴───────────┴───────────┘
 
6 rows in set. Elapsed: 0.005 sec. 
2.也可以使用点访问符号的形式:
Clickhouse> select province,addr.no,addr.city,addr.area from t_nest array join addr.no,addr.city,addr.area FORMAT PrettyCompactMonoBlock;
 
SELECT 
    province,
    addr.no,
    addr.city,
    addr.area
FROM t_nest
ARRAY JOIN 
    addr.no,
    addr.city,
    addr.area
FORMAT PrettyCompactMonoBlock
 
┌─province──┬─addr.no─┬─addr.city─┬─addr.area─┐
│ Hubei     │       1 │ wuhan     │ wuchang   │
│ Hubei     │       2 │ wuhan     │ hankou    │
│ Hubei     │       3 │ wuhan     │ hanyang   │
│ Guangdong │      10 │ guangzhou │ baiyun    │
│ Guangdong │      20 │ shenzhen  │ nanshan   │
│ Guangdong │      30 │ shenzhen  │ baoan     │
└───────────┴─────────┴───────────┴───────────┘
 
6 rows in set. Elapsed: 0.004 sec. 

可以看到少了一条记录:

Clickhouse> select province,addr.no,addr.city,addr.area from t_nest left array join addr.no,addr.city,addr.area FORMAT PrettyCompactMonoBlock;
 
SELECT 
    province,
    addr.no,
    addr.city,
    addr.area
FROM t_nest
LEFT ARRAY JOIN 
    addr.no,
    addr.city,
    addr.area
FORMAT PrettyCompactMonoBlock
 
┌─province──┬─addr.no─┬─addr.city─┬─addr.area─┐
│ Shanghai  │       0 │           │           │
│ Guangdong │      10 │ guangzhou │ baiyun    │
│ Guangdong │      20 │ shenzhen  │ nanshan   │
│ Guangdong │      30 │ shenzhen  │ baoan     │
│ Hubei     │       1 │ wuhan     │ wuchang   │
│ Hubei     │       2 │ wuhan     │ hankou    │
│ Hubei     │       3 │ wuhan     │ hanyang   │
└───────────┴─────────┴───────────┴───────────┘
 
7 rows in set. Elapsed: 0.003 sec. 

3.嵌套类型也支持ARRAY JOIN部分嵌套字段:

Clickhouse> select province,addr.no,addr.city,addr.area from t_nest left array join addr.no FORMAT PrettyCompactMonoBlock;
 
SELECT 
    province,
    addr.no,
    addr.city,
    addr.area
FROM t_nest
LEFT ARRAY JOIN addr.no
FORMAT PrettyCompactMonoBlock
 
┌─province──┬─addr.no─┬─addr.city───────────────────────────┬─addr.area──────────────────────┐
│ Shanghai  │       0[][]                             │
│ Hubei     │       1['wuhan','wuhan','wuhan']['wuchang','hankou','hanyang'] │
│ Hubei     │       2['wuhan','wuhan','wuhan']['wuchang','hankou','hanyang'] │
│ Hubei     │       3['wuhan','wuhan','wuhan']['wuchang','hankou','hanyang'] │
│ Guangdong │      10['guangzhou','shenzhen','shenzhen']['baiyun','nanshan','baoan']   │
│ Guangdong │      20['guangzhou','shenzhen','shenzhen']['baiyun','nanshan','baoan']   │
│ Guangdong │      30['guangzhou','shenzhen','shenzhen']['baiyun','nanshan','baoan']   │
└───────────┴─────────┴─────────────────────────────────────┴────────────────────────────────┘
 
7 rows in set. Elapsed: 0.002 sec. 
 

在此种情形下只有array join的数组才会被展开。

4.在嵌套类型时候可以通过别名的方式访问原始数组:

Clickhouse> select province,addr.no,addr.city,addr.area,a.no,a.city,a.area from t_nest left array join addr as a  FORMAT PrettyCompactMonoBlock;
 
SELECT 
    province,
    addr.no,
    addr.city,
    addr.area,
    a.no,
    a.city,
    a.area
FROM t_nest
LEFT ARRAY JOIN addr AS a
FORMAT PrettyCompactMonoBlock
 
┌─province──┬─addr.no────┬─addr.city───────────────────────────┬─addr.area──────────────────────┬─a.no─┬─a.city────┬─a.area──┐
│ Guangdong │ [10,20,30]['guangzhou','shenzhen','shenzhen']['baiyun','nanshan','baoan']10 │ guangzhou │ baiyun  │
│ Guangdong │ [10,20,30]['guangzhou','shenzhen','shenzhen']['baiyun','nanshan','baoan']20 │ shenzhen  │ nanshan │
│ Guangdong │ [10,20,30]['guangzhou','shenzhen','shenzhen']['baiyun','nanshan','baoan']30 │ shenzhen  │ baoan   │
│ Shanghai  │ [][][]0 │           │         │
│ Hubei     │ [1,2,3]['wuhan','wuhan','wuhan']['wuchang','hankou','hanyang']1 │ wuhan     │ wuchang │
│ Hubei     │ [1,2,3]['wuhan','wuhan','wuhan']['wuchang','hankou','hanyang']2 │ wuhan     │ hankou  │
│ Hubei     │ [1,2,3]['wuhan','wuhan','wuhan']['wuchang','hankou','hanyang']3 │ wuhan     │ hanyang │
└───────────┴────────────┴─────────────────────────────────────┴────────────────────────────────┴──────┴───────────┴─────────┘
 
7 rows in set. Elapsed: 0.004 sec. 
 
 

猜你喜欢

转载自blog.csdn.net/qq_21383435/article/details/113742309