备注:测试数据库版本为MySQL 8.0
测试数据:
create table it_research(deptno int, ename varchar(20));
insert into it_research values (100,'HOPKINS');
insert into it_research values (100,'JONES');
insert into it_research values (100,'TONEY');
insert into it_research values (200,'MORALES');
insert into it_research values (200,'P.WHITAKER');
insert into it_research values (200,'MARCIANO');
insert into it_research values (200,'ROBINSON');
insert into it_research values (300,'LACY');
insert into it_research values (300,'WRIGHT');
insert into it_research values (300,'J.TAYLOR');
CREATE TABLE IT_APPS (deptno int,ename varchar(20));
insert into it_apps values (400,'CORRALES');
insert into it_apps values (400,'MAYWEATHER');
insert into it_apps values (400,'CASTILLO');
insert into it_apps values (400,'MARQUEZ');
insert into it_apps values (400,'MOSLEY');
insert into it_apps values (500,'GATTI');
insert into it_apps values (500,'CALZAGHE');
insert into it_apps values (600,'LAMOTTA');
insert into it_apps values (600,'HAGLER');
insert into it_apps values (600,'HEARNS');
insert into it_apps values (600,'FRAZIER');
insert into it_apps values (700,'GUINN');
insert into it_apps values (700,'JUDAH');
insert into it_apps values (700,'MARGARITO');
一.需求
把两个结果集叠在一起,然后把他们转置为两列,另外,还要为每列加一个"标题"。
例如,有两个表,它们包含公司中有关员工的信息,这些员工在不同地区从事开发工作(也即研究和应用):
mysql> select * from it_research;
±-------±-----------+
| deptno | ename |
±-------±-----------+
| 100 | HOPKINS |
| 100 | JONES |
| 100 | TONEY |
| 200 | MORALES |
| 200 | P.WHITAKER |
| 200 | MARCIANO |
| 200 | ROBINSON |
| 300 | LACY |
| 300 | WRIGHT |
| 300 | J.TAYLOR |
±-------±-----------+
10 rows in set (0.00 sec)
mysql> select * from it_apps;
±-------±-----------+
| deptno | ename |
±-------±-----------+
| 400 | CORRALES |
| 400 | MAYWEATHER |
| 400 | CASTILLO |
| 400 | MARQUEZ |
| 400 | MOSLEY |
| 500 | GATTI |
| 500 | CALZAGHE |
| 600 | LAMOTTA |
| 600 | HAGLER |
| 600 | HEARNS |
| 600 | FRAZIER |
| 700 | GUINN |
| 700 | JUDAH |
| 700 | MARGARITO |
±-------±-----------+
14 rows in set (0.00 sec)
要创建一个报表,它分两栏列出两个表中的员工。
对于每一列,都返回deptno和ename。
最后,返回下列结果集:
二.解决方案
本解决方案只需要一个简单的堆叠及转置(合并后转置)并且再"拧"一次:deptno 一定在每个员工的ename之前。
这里的技巧采用了笛卡尔积为每个deptno生产附加行,这样才有足够的行显示所有员工和deptno。
with tmp1 as
(
select deptno,
ename,
count(*) over (partition by deptno) cnt
from it_apps
order by deptno,ename
),
tmp2 as
(
select 1 id union select 2
),
tmp3 as
(
select deptno,
ename,
cnt,
row_number() over w1 as 'rn'
from tmp1,tmp2
window w1 as (partition by deptno order by id,ename)
),
tmp4 as
(
select 1 flag1,
1 flag2,
case when rn = 1 then deptno else concat(' ',ename) end it_dept
from tmp3
where rn <= cnt + 1
)
,
tmp5 as
(
select deptno,
ename,
count(*) over (partition by deptno) cnt
from it_research
order by deptno,ename
),
tmp6 as
(
select deptno,
ename,
cnt,
row_number() over w2 as 'rn'
from tmp5,tmp2
window w2 as (partition by deptno order by id,ename)
),
tmp7 as
(
select 1 flag1,0 flag2,case when rn = 1 then deptno else concat(' ',ename) end it_dept
from tmp6
where rn <= cnt + 1
),
tmp8 as
(
select flag1,
flag2,
it_dept
from tmp7
union all
select flag1,
flag2,
it_dept
from tmp4
),
tmp9 AS
(
select flag1,
flag2,
it_dept,
row_number() over w3 as 'rn2'
from tmp8
window w3 as ()
),
tmp10 as
(
select sum(flag1) over (partition by flag2 order by flag1,rn2) flag,
flag2,
it_dept
from tmp9
)
select max(case when flag2 = 0 then it_dept end) research,
max(case when flag2 = 1 then it_dept end) apps
from tmp10
group by flag
order by flag
测试记录:
mysql> with tmp1 as
-> (
-> select deptno,
-> ename,
-> count(*) over (partition by deptno) cnt
-> from it_apps
-> order by deptno,ename
-> ),
-> tmp2 as
-> (
-> select 1 id union select 2
-> ),
-> tmp3 as
-> (
-> select deptno,
-> ename,
-> cnt,
-> row_number() over w1 as 'rn'
-> from tmp1,tmp2
-> window w1 as (partition by deptno order by id,ename)
-> ),
-> tmp4 as
-> (
-> select 1 flag1,
-> 1 flag2,
-> case when rn = 1 then deptno else concat(' ',ename) end it_dept
-> from tmp3
-> where rn <= cnt + 1
-> )
-> ,
-> tmp5 as
-> (
-> select deptno,
-> ename,
-> count(*) over (partition by deptno) cnt
-> from it_research
-> order by deptno,ename
-> ),
-> tmp6 as
-> (
-> select deptno,
-> ename,
-> cnt,
-> row_number() over w2 as 'rn'
-> from tmp5,tmp2
-> window w2 as (partition by deptno order by id,ename)
-> ),
-> tmp7 as
-> (
-> select 1 flag1,0 flag2,case when rn = 1 then deptno else concat(' ',ename) end it_dept
-> from tmp6
-> where rn <= cnt + 1
-> ),
-> tmp8 as
-> (
-> select flag1,
-> flag2,
-> it_dept
-> from tmp7
-> union all
-> select flag1,
-> flag2,
-> it_dept
-> from tmp4
-> ),
-> tmp9 AS
-> (
-> select flag1,
-> flag2,
-> it_dept,
-> row_number() over w3 as 'rn2'
-> from tmp8
-> window w3 as ()
-> ),
-> tmp10 as
-> (
-> select sum(flag1) over (partition by flag2 order by flag1,rn2) flag,
-> flag2,
-> it_dept
-> from tmp9
-> )
-> select max(case when flag2 = 0 then it_dept end) research,
-> max(case when flag2 = 1 then it_dept end) apps
-> from tmp10
-> group by flag
-> order by flag ;
+--------------+--------------+
| research | apps |
+--------------+--------------+
| 100 | 400 |
| JONES | CORRALES |
| TONEY | MARQUEZ |
| HOPKINS | MAYWEATHER |
| 200 | MOSLEY |
| MORALES | CASTILLO |
| P.WHITAKER | 500 |
| ROBINSON | GATTI |
| MARCIANO | CALZAGHE |
| 300 | 600 |
| LACY | HAGLER |
| WRIGHT | HEARNS |
| J.TAYLOR | LAMOTTA |
| NULL | FRAZIER |
| NULL | 700 |
| NULL | JUDAH |
| NULL | MARGARITO |
| NULL | GUINN |
+--------------+--------------+
18 rows in set (0.00 sec)
这样看起来临时表都10个临时表了,逻辑看起来太复杂了,下面我们拆开来讲解
2.1 分解求出it_apps各部门及员工
我们首先来看看tmp4的结果集:
mysql> with tmp1 as
-> (
-> select deptno,
-> ename,
-> count(*) over (partition by deptno) cnt
-> from it_apps
-> order by deptno,ename
-> ),
-> tmp2 as
-> (
-> select 1 id union select 2
-> ),
-> tmp3 as
-> (
-> select deptno,
-> ename,
-> cnt,
-> row_number() over w1 as 'rn'
-> from tmp1,tmp2
-> window w1 as (partition by deptno order by id,ename)
-> ),
-> tmp4 as
-> (
-> select 1 flag1,
-> 1 flag2,
-> case when rn = 1 then deptno else concat(' ',ename) end it_dept
-> from tmp3
-> where rn <= cnt + 1
-> )
-> select * from tmp4;
+-------+-------+--------------+
| flag1 | flag2 | it_dept |
+-------+-------+--------------+
| 1 | 1 | 400 |
| 1 | 1 | CORRALES |
| 1 | 1 | MARQUEZ |
| 1 | 1 | MAYWEATHER |
| 1 | 1 | MOSLEY |
| 1 | 1 | CASTILLO |
| 1 | 1 | 500 |
| 1 | 1 | GATTI |
| 1 | 1 | CALZAGHE |
| 1 | 1 | 600 |
| 1 | 1 | HAGLER |
| 1 | 1 | HEARNS |
| 1 | 1 | LAMOTTA |
| 1 | 1 | FRAZIER |
| 1 | 1 | 700 |
| 1 | 1 | JUDAH |
| 1 | 1 | MARGARITO |
| 1 | 1 | GUINN |
+-------+-------+--------------+
18 rows in set (0.00 sec)
那么tmp4的结果集是怎么来的呢?
我们来看看tmp1和tmp2以及tmp3
这个地方为什么要有tmp2,一个id为1 和 2 的表,其实因为第一行要显示deptno,后面的才是ename,所以此时应该是n+1,如果只是n,这个地方deptno带不出来。
tmp3就是在tmp1和tmp2的基础上进行了加工,记住order by这个地方一定要正确,不然数据会不准确
tmp3加工完成后,根据每个deptno rn为1的则显示deptno,从2到n+1遍历ename,根据rn <= cnt + 1来进行控制
mysql> select deptno,
-> ename,
-> count(*) over (partition by deptno) cnt
-> from it_apps
-> order by deptno,ename ;
+--------+------------+-----+
| deptno | ename | cnt |
+--------+------------+-----+
| 400 | CASTILLO | 5 |
| 400 | CORRALES | 5 |
| 400 | MARQUEZ | 5 |
| 400 | MAYWEATHER | 5 |
| 400 | MOSLEY | 5 |
| 500 | CALZAGHE | 2 |
| 500 | GATTI | 2 |
| 600 | FRAZIER | 4 |
| 600 | HAGLER | 4 |
| 600 | HEARNS | 4 |
| 600 | LAMOTTA | 4 |
| 700 | GUINN | 3 |
| 700 | JUDAH | 3 |
| 700 | MARGARITO | 3 |
+--------+------------+-----+
14 rows in set (0.00 sec)
mysql> select 1 id union select 2 ;
+----+
| id |
+----+
| 1 |
| 2 |
+----+
2 rows in set (0.00 sec)
mysql> with tmp1 as
-> (
-> select deptno,
-> ename,
-> count(*) over (partition by deptno) cnt
-> from it_apps
-> order by deptno,ename
-> ),
-> tmp2 as
-> (
-> select 1 id union select 2
-> )
-> select deptno,
-> ename,
-> cnt,
-> row_number() over w1 as 'rn'
-> from tmp1,tmp2
-> window w1 as (partition by deptno order by id,ename)
-> ;
+--------+------------+-----+----+
| deptno | ename | cnt | rn |
+--------+------------+-----+----+
| 400 | CASTILLO | 5 | 1 |
| 400 | CORRALES | 5 | 2 |
| 400 | MARQUEZ | 5 | 3 |
| 400 | MAYWEATHER | 5 | 4 |
| 400 | MOSLEY | 5 | 5 |
| 400 | CASTILLO | 5 | 6 |
| 400 | CORRALES | 5 | 7 |
| 400 | MARQUEZ | 5 | 8 |
| 400 | MAYWEATHER | 5 | 9 |
| 400 | MOSLEY | 5 | 10 |
| 500 | CALZAGHE | 2 | 1 |
| 500 | GATTI | 2 | 2 |
| 500 | CALZAGHE | 2 | 3 |
| 500 | GATTI | 2 | 4 |
| 600 | FRAZIER | 4 | 1 |
| 600 | HAGLER | 4 | 2 |
| 600 | HEARNS | 4 | 3 |
| 600 | LAMOTTA | 4 | 4 |
| 600 | FRAZIER | 4 | 5 |
| 600 | HAGLER | 4 | 6 |
| 600 | HEARNS | 4 | 7 |
| 600 | LAMOTTA | 4 | 8 |
| 700 | GUINN | 3 | 1 |
| 700 | JUDAH | 3 | 2 |
| 700 | MARGARITO | 3 | 3 |
| 700 | GUINN | 3 | 4 |
| 700 | JUDAH | 3 | 5 |
| 700 | MARGARITO | 3 | 6 |
+--------+------------+-----+----+
28 rows in set (0.00 sec)
2.2 it_apps与it_research进行拼接
tmp9得到如下结果:
此时就可以知道 flag1和flag2的作用了,flag2用来区别是 apps还是research,flag1根据flag2的区别进行累计求和,然后根据新的flag来进行group by,这样就可以将apps和research进行展示了
tmp10返回如下结果:
有了tmp10的结果,就很可以根据flag直接分组,通过max函数进行求求值。