hive skills

1.with as 使用 + 求占比

with sale as(
select 'a' as department_id,'001' as item_id,100 as money union all
select 'a' as department_id,'002' as item_id,200 as money union all
select 'b' as department_id,'003' as item_id,300 as money union all
select 'b' as department_id,'004' as item_id,400 as money)
--求分组后占分组的占比
select *,round(money/cast(sum(money)over(partition by department_id) as float),2) as p from sale;
result:
sale.department_id	sale.item_id	sale.money	p
a	001	100	0.33
a	002	200	0.67
b	004	400	0.57
b	003	300	0.43
--求分组后占所有的占比
select *,round(money/cast(sum(money)over() as float),2) as p from sale;
result:
sale.department_id	sale.item_id	sale.money	p
a	001	100	0.1
b	003	300	0.3
a	002	200	0.2
b	004	400	0.4

2. hive将行结果转成列形式+求占比

SELECT  day,app_version,category,
count(*) as `总用户数`,
sum(`count`) as `总记录数`,
floor(sum(`count`)/count(*)) as `人均记录数`,
sum(case when isvalid=1 then 1 else 0 end) as `有效用户数`,
sum(case when isvalid=0 then 1 else 0 end) as `无效用户数`,
round(sum(case when isvalid=0 then 1 else 0 end)/count(*),4) as `无效用户数占比`
from test
WHERE app_version='3.5.0'
GROUP BY day,app_version,category ORDER BY day,app_version,category

3.普通行结果求占比

select day,app_version,category,connect_time,cnt,cnt/sum(cnt) over(partition by day,app_version,category) as p
from (
SELECT day,app_version,category,floor(connect_time/10)*10 as connect_time,
count(*) as cnt 
from test
WHERE isvalid=1 and keeptimes>24*3600*1000
GROUP BY day,app_version,category,floor(connect_time/10)*10 
) t
group by day,app_version,category,connect_time,cnt
ORDER BY day,app_version,category,connect_time,cnt

猜你喜欢

转载自blog.csdn.net/zhouyan8603/article/details/83592057
今日推荐