连续N天不登录的数据获取(hive)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/BabyFish13/article/details/83897138

此处利用临时表方便操作,也使思路更加清晰。

--1、获取基础数据
drop table if exists xxyl1110_uid_basic;
create table xxyl1110_uid_basic as
select uid,min(type) type,min(pt_day) reg_day
from oss_bi_type_of_all_user
where pt_day between '2018-03-01' and '2018-03-15'
  and type in(1,3)
group by uid;
--2、利用lead进行数据数据串行处理
drop table if exists xxyl1110_user_login_lead;
create table xxyl1110_user_login_lead as
select a1.uid,a2.type,a2.reg_day,a1.pt_day,lead(a1.pt_day,1,'2018-09-16')over(partition by a1.uid order by a1.pt_day) next1_pt_day
from oss_bi_type_of_all_user a1
inner join xxyl1110_uid_basic a2 on a1.uid=a2.uid
where a1.pt_day between '2018-03-01' and '2018-09-16' and a1.type in(1,2,3,4);
--3、连续多日不登录的计算
drop table if exists xxyl1110_user_continuity_nologin_days;
create table xxyl1110_user_continuity_nologin_days as
select uid,type,reg_day,pt_day,next1_pt_day,datediff(next1_pt_day,pt_day)-1 continuity_nologin_days
from xxyl1110_user_login_lead ;
--4、获取首个连续14日及以上不登录的相应记录
drop table if exists xxyl1110_user_lost_day;
create table xxyl1110_user_lost_day as
select uid,type,reg_day,pt_day lost_day,next1_pt_day afterlost_first_log_day
from (select uid,type,reg_day,pt_day,next1_pt_day,row_number()over(partition by uid,type,reg_day order by pt_day asc) rn
from xxyl1110_user_continuity_nologin_days
where continuity_nologin_days>=14) a1
where rn=1;

猜你喜欢

转载自blog.csdn.net/BabyFish13/article/details/83897138