SQL 累计和
计算逻辑:
-- 日期
with date as (
select distinct
concat(substr(date,1,4),substr(date,6,2)) as year_month
,trunc(date,'MM') as month_frist -- 每个月第一天
from AAAA.DATE -- 每天日历表
where date between '2018-01-01' and date_sub('${zdt.format("yyyy-MM-dd")}',1) -- 昨天的函数
)
--- 每个月新上线的商户数
,shopnum as (select
concat(substr(online_time,1,4),substr(online_time,6,2)) as year_month
,count(shop) as shopnum
from BBBB.SHOP_TABLE -- 每天上线的商户表
group by concat(substr(online_time,1,4),substr(online_time,6,2)))
-- 每个月匹上每个月新上线的商户数
,end as (select
date.year_month ,month_frist
, coalesce(shopnum,0) as shopnum
from date
left join shopnum
on date.year_month = shopnum.year_month
)
-- 用到sum()over() sum开窗函数
select
year_month
,month_frist
,sum(shopnum) over (order by year_month rows between unbounded preceding and current row) as shopnum -- 累计商户数
,shopnum as shopnum_up -- 每个月新增商户数
from end
;
sum开窗函数使用:
----sum开窗函数
select cookieid
,createtime
,pv
,sum(pv) over (partition by cookieid order by createtime rows between unbounded preceding and current row) as pv1
--partition by 后可省略,当没有分组时
--pv1:分组内从起点到当前行的pv累积,如,11号的pv1=10号的pv+11号的pv, 12号=10号+11号+12号
--Oorder by createtime 按createtime对查询读取的记录进行排序,就是窗口范围内的排序
--rows between unbounded preceding and current row 定义起点和终点,
--unbounded preceding 为起点,表明从第一行开始, current row为默认值,就是这一句等价于:
--rows unbounded preceding
--preceding:在前 N 行的意思。
--FOLLOWING:在后 N 行的意思。
,sum(pv) over (partition by cookieid order by createtime) as pv2
--pv2同pv1
,sum(pv) over (partition by cookieid) as pv3
--pv3: 分组内(cookie1)所有的pv累加
,sum(pv) over (partition by cookieid order by createtime rows between 3 preceding and current row) as pv4
--pv4: 分组内当前行+往前3行,如,11号=10号+11号, 12号=10号+11号+12号, 13号=10号+11号+12号+13号, 14号=11号+12号+13号+14号
,sum(pv) over (partition by cookieid order by createtime rows between 3 preceding and 1 following) as pv5
--pv5: 分组内当前行+往前3行+往后1行,如,14号=11号+12号+13号+14号+15号=5+7+3+2+4=21
,sum(pv) over (partition by cookieid order by createtime rows between current row and unbounded following) as pv6
from CCCC.test06