1.概述
Array JOIN 子句允许在数据表的内部,与数组或者嵌套的字段进行JOIN操作,从而将一行数据变多行。适用于行转列操作。
CREATE TABLE datasets.city
(
`province` String,
`arr_city` Array(String),
`arr_rank` Array(UInt8)
)
ENGINE = Log
Ok.
0 rows in set. Elapsed: 0.012 sec.
insert into city values('hubei',['wuhan','xiangyang'],[1,2]),
('guangdong',['guangzhou','shenzhen','zhuhai'],[1,2,3]);
insert into city values('beijing',[],[10]),('shanghai',[],[20]);
查看原始数据:
Clickhouse> select * from city;
SELECT *
FROM city
┌─province──┬─arr_city──────────────────────────┬─arr_rank─┐
│ hubei │ ['wuhan','xiangyang'] │ [1,2] │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ [1,2,3] │
│ beijing │ [] │ [10] │
│ shanghai │ [] │ [20] │
│ hongkong │ [] │ [] │
└───────────┴───────────────────────────────────┴──────────┘
5 rows in set. Elapsed: 0.003 sec.
1.INNER ARRAY JOIN:
Clickhouse> select province,city from city array join arr_city as city;
SELECT
province,
city
FROM city
ARRAY JOIN arr_city AS city
┌─province──┬─city──────┐
│ hubei │ wuhan │
│ hubei │ xiangyang │
│ guangdong │ guangzhou │
│ guangdong │ shenzhen │
│ guangdong │ zhuhai │
└───────────┴───────────┘
5 rows in set. Elapsed: 0.002 sec.
Clickhouse> select province,arr_city,city from city array join arr_city as city;
SELECT
province,
arr_city,
city
FROM city
ARRAY JOIN arr_city AS city
┌─province──┬─arr_city──────────────────────────┬─city──────┐
│ hubei │ ['wuhan','xiangyang'] │ wuhan │
│ hubei │ ['wuhan','xiangyang'] │ xiangyang │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ guangzhou │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ shenzhen │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ zhuhai │
└───────────┴───────────────────────────────────┴───────────┘
5 rows in set. Elapsed: 0.002 sec.
2. LEFT Array JOIN :
Clickhouse> select province,arr_city,city from city LEFT array join arr_city as city FORMAT PrettyCompactMonoBlock;
SELECT
province,
arr_city,
city
FROM city
LEFT ARRAY JOIN arr_city AS city
FORMAT PrettyCompactMonoBlock
┌─province──┬─arr_city──────────────────────────┬─city──────┐
│ hubei │ ['wuhan','xiangyang'] │ wuhan │
│ hubei │ ['wuhan','xiangyang'] │ xiangyang │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ guangzhou │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ shenzhen │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ zhuhai │
│ beijing │ [] │ │
│ shanghai │ [] │ │
│ hongkong │ [] │ │
└───────────┴───────────────────────────────────┴───────────┘
8 rows in set. Elapsed: 0.010 sec. .
当同时对数组字段进行ARRAY JOIN 操作时候,查询的计算逻辑是按行合并而不是产生笛卡尔积:
Clickhouse> select province,arr_city,arr_rank,v ,arrayMap(x->x*x,arr_rank) mapv,v1 from city c left array join arr_rank as v,mapv as v1 FORMAT PrettyCompactMonoBlock;
SELECT
province,
arr_city,
arr_rank,
v,
arrayMap(x -> (x * x), arr_rank) AS mapv,
v1
FROM city AS c
LEFT ARRAY JOIN
arr_rank AS v,
mapv AS v1
FORMAT PrettyCompactMonoBlock
┌─province──┬─arr_city──────────────────────────┬─arr_rank─┬──v─┬─mapv────┬──v1─┐
│ hubei │ ['wuhan','xiangyang'] │ [1,2] │ 1 │ [1,4] │ 1 │
│ hubei │ ['wuhan','xiangyang'] │ [1,2] │ 2 │ [1,4] │ 4 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ [1,2,3] │ 1 │ [1,4,9] │ 1 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ [1,2,3] │ 2 │ [1,4,9] │ 4 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ [1,2,3] │ 3 │ [1,4,9] │ 9 │
│ beijing │ [] │ [10] │ 10 │ [100] │ 100 │
│ shanghai │ [] │ [20] │ 20 │ [400] │ 400 │
│ hongkong │ [] │ [] │ 0 │ [] │ 0 │
└───────────┴───────────────────────────────────┴──────────┴────┴─────────┴─────┘
8 rows in set. Elapsed: 0.004 sec.
---
Clickhouse> SELECT province , arr_rank, a, num, mapped
:-] FROM city
:-] ARRAY JOIN arr_rank AS a, arrayEnumerate(arr_rank) AS num, arrayMap(x -> x + 1, arr_rank) AS mapped;
SELECT
province,
arr_rank,
a,
num,
mapped
FROM city
ARRAY JOIN
arr_rank AS a,
arrayEnumerate(arr_rank) AS num,
arrayMap(x -> (x + 1), arr_rank) AS mapped
┌─province──┬─arr_rank─┬──a─┬─num─┬─mapped─┐
│ hubei │ [1,2] │ 1 │ 1 │ 2 │
│ hubei │ [1,2] │ 2 │ 2 │ 3 │
│ guangdong │ [1,2,3] │ 1 │ 1 │ 2 │
│ guangdong │ [1,2,3] │ 2 │ 2 │ 3 │
│ guangdong │ [1,2,3] │ 3 │ 3 │ 4 │
│ beijing │ [10] │ 10 │ 1 │ 11 │
│ shanghai │ [20] │ 20 │ 1 │ 21 │
└───────────┴──────────┴────┴─────┴────────┘
7 rows in set. Elapsed: 0.005 sec.
arr_rank 和mapped 数组并没有产生笛卡尔积。
嵌套数据类型本质是数组,ARRAY JOIN 也支持嵌套数据类型。
嵌套表创建:
Clickhouse> create table t_nest(province String, addr Nested(no UInt8,city String,area String)) engine=MergeTree() order by province;
CREATE TABLE t_nest
(
`province` String,
`addr` Nested( no UInt8, city String, area String)
)
ENGINE = MergeTree()
ORDER BY province
Clickhouse> desc t_nest;
DESCRIBE TABLE t_nest
┌─name──────┬─type──────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ province │ String │ │ │ │ │ │
│ addr.no │ Array(UInt8) │ │ │ │ │ │
│ addr.city │ Array(String) │ │ │ │ │ │
│ addr.area │ Array(String) │ │ │ │ │ │
└───────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
4 rows in set. Elapsed: 0.003 sec.
Clickhouse> insert into t_nest values('Hubei',[1,2,3],['wuhan','wuhan','wuhan'],['wuchang','hankou','hanyang']);
INSERT INTO t_nest VALUES
Ok.
1 rows in set. Elapsed: 0.005 sec.
Clickhouse> insert into t_nest values('Guangdong',[10,20,30],['guangzhou','shenzhen','shenzhen'],['baiyun','nanshan','baoan']);
INSERT INTO t_nest VALUES
Ok.
1 rows in set. Elapsed: 0.002 sec.
Clickhouse> insert into t_nest values('Shanghai',[],[],[]);
INSERT INTO t_nest VALUES
Ok.
Clickhouse> select * from t_nest FORMAT PrettyCompactMonoBlock;
SELECT *
FROM t_nest
FORMAT PrettyCompactMonoBlock
┌─province──┬─addr.no────┬─addr.city───────────────────────────┬─addr.area──────────────────────┐
│ Guangdong │ [10,20,30] │ ['guangzhou','shenzhen','shenzhen'] │ ['baiyun','nanshan','baoan'] │
│ Hubei │ [1,2,3] │ ['wuhan','wuhan','wuhan'] │ ['wuchang','hankou','hanyang'] │
│ Shanghai │ [] │ [] │ [] │
└───────────┴────────────┴─────────────────────────────────────┴────────────────────────────────┘
3 rows in set. Elapsed: 0.003 sec.
对嵌套类型数据的访问 :
1.ARRAY JOIN 可以直接使用字段列名:
Clickhouse> select province,addr.no,addr.city,addr.area from t_nest array join addr FORMAT PrettyCompactMonoBlock;
SELECT
province,
addr.no,
addr.city,
addr.area
FROM t_nest
ARRAY JOIN addr
FORMAT PrettyCompactMonoBlock
┌─province──┬─addr.no─┬─addr.city─┬─addr.area─┐
│ Hubei │ 1 │ wuhan │ wuchang │
│ Hubei │ 2 │ wuhan │ hankou │
│ Hubei │ 3 │ wuhan │ hanyang │
│ Guangdong │ 10 │ guangzhou │ baiyun │
│ Guangdong │ 20 │ shenzhen │ nanshan │
│ Guangdong │ 30 │ shenzhen │ baoan │
└───────────┴─────────┴───────────┴───────────┘
6 rows in set. Elapsed: 0.005 sec.
2.也可以使用点访问符号的形式:
Clickhouse> select province,addr.no,addr.city,addr.area from t_nest array join addr.no,addr.city,addr.area FORMAT PrettyCompactMonoBlock;
SELECT
province,
addr.no,
addr.city,
addr.area
FROM t_nest
ARRAY JOIN
addr.no,
addr.city,
addr.area
FORMAT PrettyCompactMonoBlock
┌─province──┬─addr.no─┬─addr.city─┬─addr.area─┐
│ Hubei │ 1 │ wuhan │ wuchang │
│ Hubei │ 2 │ wuhan │ hankou │
│ Hubei │ 3 │ wuhan │ hanyang │
│ Guangdong │ 10 │ guangzhou │ baiyun │
│ Guangdong │ 20 │ shenzhen │ nanshan │
│ Guangdong │ 30 │ shenzhen │ baoan │
└───────────┴─────────┴───────────┴───────────┘
6 rows in set. Elapsed: 0.004 sec.
可以看到少了一条记录:
Clickhouse> select province,addr.no,addr.city,addr.area from t_nest left array join addr.no,addr.city,addr.area FORMAT PrettyCompactMonoBlock;
SELECT
province,
addr.no,
addr.city,
addr.area
FROM t_nest
LEFT ARRAY JOIN
addr.no,
addr.city,
addr.area
FORMAT PrettyCompactMonoBlock
┌─province──┬─addr.no─┬─addr.city─┬─addr.area─┐
│ Shanghai │ 0 │ │ │
│ Guangdong │ 10 │ guangzhou │ baiyun │
│ Guangdong │ 20 │ shenzhen │ nanshan │
│ Guangdong │ 30 │ shenzhen │ baoan │
│ Hubei │ 1 │ wuhan │ wuchang │
│ Hubei │ 2 │ wuhan │ hankou │
│ Hubei │ 3 │ wuhan │ hanyang │
└───────────┴─────────┴───────────┴───────────┘
7 rows in set. Elapsed: 0.003 sec.
3.嵌套类型也支持ARRAY JOIN部分嵌套字段:
Clickhouse> select province,addr.no,addr.city,addr.area from t_nest left array join addr.no FORMAT PrettyCompactMonoBlock;
SELECT
province,
addr.no,
addr.city,
addr.area
FROM t_nest
LEFT ARRAY JOIN addr.no
FORMAT PrettyCompactMonoBlock
┌─province──┬─addr.no─┬─addr.city───────────────────────────┬─addr.area──────────────────────┐
│ Shanghai │ 0 │ [] │ [] │
│ Hubei │ 1 │ ['wuhan','wuhan','wuhan'] │ ['wuchang','hankou','hanyang'] │
│ Hubei │ 2 │ ['wuhan','wuhan','wuhan'] │ ['wuchang','hankou','hanyang'] │
│ Hubei │ 3 │ ['wuhan','wuhan','wuhan'] │ ['wuchang','hankou','hanyang'] │
│ Guangdong │ 10 │ ['guangzhou','shenzhen','shenzhen'] │ ['baiyun','nanshan','baoan'] │
│ Guangdong │ 20 │ ['guangzhou','shenzhen','shenzhen'] │ ['baiyun','nanshan','baoan'] │
│ Guangdong │ 30 │ ['guangzhou','shenzhen','shenzhen'] │ ['baiyun','nanshan','baoan'] │
└───────────┴─────────┴─────────────────────────────────────┴────────────────────────────────┘
7 rows in set. Elapsed: 0.002 sec.
在此种情形下只有array join的数组才会被展开。
4.在嵌套类型时候可以通过别名的方式访问原始数组:
Clickhouse> select province,addr.no,addr.city,addr.area,a.no,a.city,a.area from t_nest left array join addr as a FORMAT PrettyCompactMonoBlock;
SELECT
province,
addr.no,
addr.city,
addr.area,
a.no,
a.city,
a.area
FROM t_nest
LEFT ARRAY JOIN addr AS a
FORMAT PrettyCompactMonoBlock
┌─province──┬─addr.no────┬─addr.city───────────────────────────┬─addr.area──────────────────────┬─a.no─┬─a.city────┬─a.area──┐
│ Guangdong │ [10,20,30] │ ['guangzhou','shenzhen','shenzhen'] │ ['baiyun','nanshan','baoan'] │ 10 │ guangzhou │ baiyun │
│ Guangdong │ [10,20,30] │ ['guangzhou','shenzhen','shenzhen'] │ ['baiyun','nanshan','baoan'] │ 20 │ shenzhen │ nanshan │
│ Guangdong │ [10,20,30] │ ['guangzhou','shenzhen','shenzhen'] │ ['baiyun','nanshan','baoan'] │ 30 │ shenzhen │ baoan │
│ Shanghai │ [] │ [] │ [] │ 0 │ │ │
│ Hubei │ [1,2,3] │ ['wuhan','wuhan','wuhan'] │ ['wuchang','hankou','hanyang'] │ 1 │ wuhan │ wuchang │
│ Hubei │ [1,2,3] │ ['wuhan','wuhan','wuhan'] │ ['wuchang','hankou','hanyang'] │ 2 │ wuhan │ hankou │
│ Hubei │ [1,2,3] │ ['wuhan','wuhan','wuhan'] │ ['wuchang','hankou','hanyang'] │ 3 │ wuhan │ hanyang │
└───────────┴────────────┴─────────────────────────────────────┴────────────────────────────────┴──────┴───────────┴─────────┘
7 rows in set. Elapsed: 0.004 sec.