1.with as 使用 + 求占比
with sale as(
select 'a' as department_id,'001' as item_id,100 as money union all
select 'a' as department_id,'002' as item_id,200 as money union all
select 'b' as department_id,'003' as item_id,300 as money union all
select 'b' as department_id,'004' as item_id,400 as money)
--求分组后占分组的占比
select *,round(money/cast(sum(money)over(partition by department_id) as float),2) as p from sale;
result:
sale.department_id sale.item_id sale.money p
a 001 100 0.33
a 002 200 0.67
b 004 400 0.57
b 003 300 0.43
--求分组后占所有的占比
select *,round(money/cast(sum(money)over() as float),2) as p from sale;
result:
sale.department_id sale.item_id sale.money p
a 001 100 0.1
b 003 300 0.3
a 002 200 0.2
b 004 400 0.4
2. hive将行结果转成列形式+求占比
SELECT day,app_version,category,
count(*) as `总用户数`,
sum(`count`) as `总记录数`,
floor(sum(`count`)/count(*)) as `人均记录数`,
sum(case when isvalid=1 then 1 else 0 end) as `有效用户数`,
sum(case when isvalid=0 then 1 else 0 end) as `无效用户数`,
round(sum(case when isvalid=0 then 1 else 0 end)/count(*),4) as `无效用户数占比`
from test
WHERE app_version='3.5.0'
GROUP BY day,app_version,category ORDER BY day,app_version,category
3.普通行结果求占比
select day,app_version,category,connect_time,cnt,cnt/sum(cnt) over(partition by day,app_version,category) as p
from (
SELECT day,app_version,category,floor(connect_time/10)*10 as connect_time,
count(*) as cnt
from test
WHERE isvalid=1 and keeptimes>24*3600*1000
GROUP BY day,app_version,category,floor(connect_time/10)*10
) t
group by day,app_version,category,connect_time,cnt
ORDER BY day,app_version,category,connect_time,cnt