Hive留存计算

准备表

CREATE EXTERNAL TABLE ods_app_log (
    sdk_ver string
    ,time_zone string
    ,commit_id string
    ,commit_time string
    ,pid string
    ,app_token string
    ,app_id string
    ,device_id string
    ,device_id_type string
    ,release_channel string
    ,app_ver_name string
    ,app_ver_code string
    ,os_name string
    ,os_ver string
    ,LANGUAGE string
    ,country string
    ,manufacture string
    ,device_model string
    ,resolution string
    ,net_type string
    ,account string
    ,app_device_id string
    ,mac string
    ,android_id string
    ,imei string
    ,cid_sn string
    ,build_num string
    ,mobile_data_type string
    ,promotion_channel string
    ,carrier string
    ,city string
    ,user_id string
    ) partitioned BY (
    day string
    ,os string
    ) row format delimited fields terminated BY '\001' location '/app-log-data/clean';

ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-21',os = 'android') location '/app-log-data/clean/2017-09-21/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-21',os = 'ios') location '/app-log-data/clean/2017-09-21/ios';


ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-22',os = 'android') location '/app-log-data/clean/2017-09-22/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-22',os = 'ios') location '/app-log-data/clean/2017-09-22/ios';

ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-23',os = 'android') location '/app-log-data/clean/2017-09-23/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-23',os = 'ios') location '/app-log-data/clean/2017-09-23/ios';

ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-24',os = 'android') location '/app-log-data/clean/2017-09-24/android';
ALTER TABLE ods_app_log ADD PARTITION (day = '2017-09-25',os = 'ios') location '/app-log-data/clean/2017-09-25/ios';

--新增用户表
CREATE EXTERNAL TABLE etl_user_new_day (
    sdk_ver string
    ,time_zone string
    ,commit_id string
    ,commit_time string
    ,pid string
    ,app_token string
    ,app_id string
    ,device_id string
    ,device_id_type string
    ,release_channel string
    ,app_ver_name string
    ,app_ver_code string
    ,os_name string
    ,os_ver string
    ,LANGUAGE string
    ,country string
    ,manufacture string
    ,device_model string
    ,resolution string
    ,net_type string
    ,account string
    ,app_device_id string
    ,mac string
    ,android_id string
    ,imei string
    ,cid_sn string
    ,build_num string
    ,mobile_data_type string
    ,promotion_channel string
    ,carrier string
    ,city string
    ,user_id string
    ) partitioned BY (
    day string
    ) row format delimited fields terminated BY '\001';
    
-- 1 历史用户表
create table etl_user_history(user_id string);
insert into etl_user_history(user_id)
select distinct user_id from ods_app_log;


-- 2 当日新增用户表:存所有字段(每个人时间最早的一条),带有一个分区字段:day string;
create table etl_user_new_day like etl_user_active_day;

中间处理

当日活跃-历史用户表 --> 新增用户表的当日分区
insert  into etl_user_new_day partition(day='2017-09-21')
SELECT sdk_ver
    ,time_zone
    ,commit_id
    ,commit_time
    ,pid
    ,app_token
    ,app_id
    ,device_id
    ,device_id_type
    ,release_channel
    ,app_ver_name
    ,app_ver_code
    ,os_name
    ,os_ver
    ,LANGUAGE
    ,country
    ,manufacture
    ,device_model
    ,resolution
    ,net_type
    ,account
    ,app_device_id
    ,mac
    ,android_id
    ,imei
    ,cid_sn
    ,build_num
    ,mobile_data_type
    ,promotion_channel
    ,carrier
    ,city
    ,a.user_id
from  ods_app_log a left join  etl_user_history b on a.user_id = b.user_id
where a.day='2017-09-21' and b.user_id is null;

-- 2 将当日新增用户的user_id追加到历史表
insert into table etl_user_history
select distinct user_id from etl_user_new_day where day='2017-09-21';

-- 1 日新维度统计报表--数据建模
create table dim_user_new_day(os_name string,city string,release_channel string,app_ver_name string,cnts int)
partitioned by (day string, dim string);

计算留存

--次日留存用户统计
--昨天的数据跟今天的数据关联,yestoday=today-1天
select yestoday.day
,count(distinct yestoday.user_id) cnt1
,count(distinct today.user_id) cnt2 
from
(select * from ods_app_log where day>='2017-09-21' and day<='2017-09-24')  as yestoday
left  join 
(select * from  ods_app_log where day>='2017-09-21' and day<='2017-09-24')  as today 
on yestoday.day =date_add(today.day,-1) and yestoday.user_id=today.user_id
group by yestoday.day;


select yestoday.day
,count(distinct yestoday.user_id) as cnt1
,count(distinct today.user_id) as cnt2 
from
(select user_id,day from ods_app_log where day='2017-09-21')  as yestoday
left  join 
(select user_id,day from  ods_app_log where day='2017-09-22')  as today 
on yestoday.user_id=today.user_id
group by yestoday.day;
发布了35 篇原创文章 · 获赞 6 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/colby_chenlun/article/details/100578235