六、数据仓库电商项目——ADS层、APP层

数仓搭建-ADS层

设备主题

活跃设备数主题(日、周、月)

需求定义:

  • 日活:当日活跃的设备数
  • 周活:当周活跃的设备数
  • 月活:当月活跃的设备数
 drop table if exists ads_uv_count;
create external table ads_uv_count( 
    `dt` string COMMENT '统计日期',
    `day_count` bigint COMMENT '当日用户数量',
    `wk_count`  bigint COMMENT '当周用户数量',
    `mn_count`  bigint COMMENT '当月用户数量',
    `is_weekend` string COMMENT 'Y,N是否是周末,用于得到本周最终结果',
    `is_monthend` string COMMENT 'Y,N是否是月末,用于得到本月最终结果' 
) COMMENT '活跃设备数'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_uv_count/';


 
insert into table ads_uv_count 
select  
    '2020-03-29' dt,
    daycount.ct,
    wkcount.ct,
    mncount.ct,
    if(date_add(next_day('2020-03-29','MO'),-1)='2020-03-29','Y','N') ,
    if(last_day('2020-03-29')='2020-03-29','Y','N') 
from 
(
    select  
        '2020-03-29' dt,
        count(*) ct
    from dwt_uv_topic
    where login_date_last='2020-03-29'  
)daycount join 
( 
    select  
        '2020-03-29' dt,
        count (*) ct
    from dwt_uv_topic
    where login_date_last>=date_add(next_day('2020-03-29','MO'),-7) 
    and login_date_last<= date_add(next_day('2020-03-29','MO'),-1) 
) wkcount on daycount.dt=wkcount.dt
join 
( 
    select  
        '2020-03-29' dt,
        count (*) ct
    from dwt_uv_topic
    where date_format(login_date_last,'yyyy-MM')=date_format('2020-03-29','yyyy-MM')  
)mncount on daycount.dt=mncount.dt;

二、用户主题

用户主题信息

 

 drop table if exists ads_user_topic;
create external table ads_user_topic(
    `dt` string COMMENT '统计日期',
    `day_users` string COMMENT '活跃会员数',
    `day_new_users` string COMMENT '新增会员数',
    `day_new_payment_users` string COMMENT '新增消费会员数',
    `payment_users` string COMMENT '总付费会员数',
    `users` string COMMENT '总会员数',
    `day_users2users` decimal(10,2) COMMENT '会员活跃率',
    `payment_users2users` decimal(10,2) COMMENT '总会员付费率',
    `day_new_users2users` decimal(10,2) COMMENT '会员新鲜度'
) COMMENT '会员主题信息表'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_user_topic';


 insert into table ads_user_topic
select
    '2020-03-29',
    sum(if(login_date_last='2020-03-29',1,0)),
    sum(if(login_date_first='2020-03-29',1,0)),
    sum(if(payment_date_first='2020-03-29',1,0)),
    sum(if(payment_count>0,1,0)),
    count(*),
    sum(if(login_date_last='2020-03-29',1,0))/count(*),
    sum(if(payment_count>0,1,0))/count(*),
    sum(if(login_date_first='2020-03-29',1,0))/sum(if(login_date_last='2020-03-29',1,0))
from dwt_user_topic

漏斗分析,统计“浏览->购物车->下单->支付”的转化率

统计“浏览->购物车->下单->支付”的转化率
思路:统计各个行为的人数,然后计算比值。

 drop table if exists ads_user_action_convert_day;
create external  table ads_user_action_convert_day(
    `dt` string COMMENT '统计日期',
    `total_visitor_m_count`  bigint COMMENT '总访问人数',
    `cart_u_count` bigint COMMENT '加入购物车的人数',
    `visitor2cart_convert_ratio` decimal(10,2) COMMENT '访问到加入购物车转化率',
    `order_u_count` bigint     COMMENT '下单人数',
    `cart2order_convert_ratio`  decimal(10,2) COMMENT '加入购物车到下单转化率',
    `payment_u_count` bigint     COMMENT '支付人数',
    `order2payment_convert_ratio` decimal(10,2) COMMENT '下单到支付的转化率'
 ) COMMENT '用户行为漏斗分析'
row format delimited  fields terminated by '\t'
location '/warehouse/gmall/ads/ads_user_action_convert_day/';


 insert into table ads_user_action_convert_day
select 
    '2020-03-29',
    uv.day_count,
    ua.cart_count,
    cast(ua.cart_count/uv.day_count as  decimal(10,2)) visitor2cart_convert_ratio,
    ua.order_count,
    cast(ua.order_count/ua.cart_count as  decimal(10,2)) visitor2order_convert_ratio,
    ua.payment_count,
    cast(ua.payment_count/ua.order_count as  decimal(10,2)) order2payment_convert_ratio
from  
(
    select 
        dt,
        sum(if(cart_count>0,1,0)) cart_count,
        sum(if(order_count>0,1,0)) order_count,
        sum(if(payment_count>0,1,0)) payment_count
    from dws_user_action_daycount
where dt='2020-03-29'
group by dt
)ua join ads_uv_count uv on uv.dt=ua.dt;

商品主题

商品个数信息

商品销量排名 

drop table if exists ads_product_sale_top10;
create external table ads_product_sale_top10(
    `dt` string COMMENT '统计日期',
    `sku_id` string COMMENT '商品ID',
    ` payment_num` bigint COMMENT '销量'
) COMMENT '商品个数信息'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_product_sale_top10'; 

 
insert into table ads_product_sale_topN
select
    '2020-03-29' dt,
    sku_id,
    payment_amount
from
    dws_sku_action_daycount
where
    dt='2020-03-29'
order by payment_amount desc
limit 10;

 商品退款率排名(最近30天)

 drop table if exists ads_product_refund_topN;
create external table ads_product_refund_topN(
    `dt` string COMMENT '统计日期',
    `sku_id` string COMMENT '商品ID',
    `refund_ratio` decimal(10,2) COMMENT '退款率'
) COMMENT '商品退款率TopN'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_product_refund_topN';


 
insert into table ads_product_refund_topN
select
    '2020-03-29',
    sku_id,
    refund_last_30d_count/payment_last_30d_count*100 refund_ratio
from dwt_sku_topic
order by refund_ratio desc
limit 10;

营销主题(用户+商品+购买行为)

需求分析:统计每日下单数,下单金额及下单用户数。
 

 drop table if exists ads_order_daycount;
create external table ads_order_daycount(
    dt string comment '统计日期',
    order_count bigint comment '单日下单笔数',
    order_amount bigint comment '单日下单金额',
    order_users bigint comment '单日下单用户数'
) comment '每日订单总计表'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_order_daycount';

 
insert into table ads_order_daycount
select
    '2020-03-29' dt,
    order_count,
    order_amount,
    order_users
from
(
    select
        '2020-03-29' dt,
        sum(order_count) order_count,
        sum(order_amount) order_amount
    from
        dws_sku_action_daycount
    where
        dt='2020-03-29'
) tmp_order_num
join
(
    select
        '2020-03-29' dt,
        sum(if(order_count>0,1,0)) order_users
    from
        dws_user_action_daycount
    where
        dt='2020-03-29'
) tmp_order_users
on 
tmp_order_num.dt=tmp_order_users.dt;

支付信息统计

每日支付金额、支付人数、支付商品数、支付笔数以及下单到支付的平均时长(取自DWD)

 drop table if exists ads_payment_daycount;
create external table ads_payment_daycount(
    dt string comment '统计日期',
    order_count bigint comment '单日支付笔数',
    order_amount bigint comment '单日支付金额',
    payment_user_count bigint comment '单日支付人数',
    payment_sku_count bigint comment '单日支付商品数',
    payment_avg_time double comment '下单到支付的平均时长,取分钟数'
) comment '每日订单总计表'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_payment_daycount';


hive (gmall)>
insert into table ads_payment_daycount
select
    tmp_payment.dt,
    tmp_payment.payment_count,
    tmp_payment.payment_amount,
    tmp_payment.payment_user_count,
    tmp_skucount.payment_sku_count,
    tmp_time.payment_avg_time
from
(
    select
        '2020-03-15' dt,
        sum(payment_count) payment_count,
        sum(payment_amount) payment_amount,
        sum(if(payment_count>0,1,0)) payment_user_count
    from dws_user_action_daycount
    where dt='2020-03-15'
)tmp_payment
join
(
    select
        '2020-03-15' dt,
        sum(if(payment_count>0,1,0)) payment_sku_count 
    from dws_sku_action_daycount
    where dt='2020-03-15'
)tmp_skucount on tmp_payment.dt=tmp_skucount.dt
join
(
    select
        '2020-03-15' dt,
        sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time
    from dwd_fact_order_info
    where dt='2020-03-15'
    and payment_time is not null
)tmp_time on tmp_payment.dt=tmp_time.dt

复购率

 

 drop table ads_sale_tm_category1_stat_mn;
create external table ads_sale_tm_category1_stat_mn
(  
    tm_id string comment '品牌id',
    category1_id string comment '1级品类id ',
    category1_name string comment '1级品类名称 ',
    buycount   bigint comment  '购买人数',
    buy_twice_last bigint  comment '两次以上购买人数',
    buy_twice_last_ratio decimal(10,2)  comment  '单次复购率',
    buy_3times_last   bigint comment   '三次以上购买人数',
    buy_3times_last_ratio decimal(10,2)  comment  '多次复购率',
    stat_mn string comment '统计月份',
    stat_date string comment '统计日期' 
)   COMMENT '复购率统计'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';

 
insert into table ads_sale_tm_category1_stat_mn
select   
    mn.sku_tm_id,
    mn.sku_category1_id,
    mn.sku_category1_name,
    sum(if(mn.order_count>=1,1,0)) buycount,
    sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
    sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0)) buyTwiceLastRatio,
    sum(if(mn.order_count>=3,1,0))  buy3timeLast  ,
    sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0)) buy3timeLastRatio ,
    date_format('2019-02-10' ,'yyyy-MM') stat_mn,
    '2019-02-10' stat_date
from 
(
    select 
        user_id, 
        sd.sku_tm_id,
        sd.sku_category1_id,
        sd.sku_category1_name,
        sum(order_count) order_count
    from dws_sale_detail_daycount sd 
    where date_format(dt,'yyyy-MM')=date_format('2019-02-10' ,'yyyy-MM')
    group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name
) mn
group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;

猜你喜欢

转载自blog.csdn.net/qq_22473611/article/details/116798729
今日推荐