五、数据仓库电商项目——DWT层

DWT层 

DWT层将DWS层每日聚合的数据进行积累,DWT层不是分区表,是一个累积型全量表,并且数据来源与DWS层。

累积性全量表:查询要改动的旧数据,查询新增和变化的新数据,新旧关联,以新换旧,导入覆盖。

DWT设备主题宽表

在这里插入图片描述

用户主题宽表

 

drop table if exists dwt_user_topic;
create external table dwt_user_topic
(
    user_id string  comment '用户id',
    login_date_first string  comment '首次登录时间',
    login_date_last string  comment '末次登录时间',
    login_count bigint comment '累积登录天数',
    login_last_30d_count bigint comment '最近30日登录天数',
    order_date_first string  comment '首次下单时间',
    order_date_last string  comment '末次下单时间',
    order_count bigint comment '累积下单次数',
    order_amount decimal(16,2) comment '累积下单金额',
    order_last_30d_count bigint comment '最近30日下单次数',
    order_last_30d_amount bigint comment '最近30日下单金额',
    payment_date_first string  comment '首次支付时间',
    payment_date_last string  comment '末次支付时间',
    payment_count decimal(16,2) comment '累积支付次数',
    payment_amount decimal(16,2) comment '累积支付金额',
    payment_last_30d_count decimal(16,2) comment '最近30日支付次数',
    payment_last_30d_amount decimal(16,2) comment '最近30日支付金额'
 )COMMENT '用户主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_user_topic/'
tblproperties ("parquet.compression"="lzo");



 insert overwrite table dwt_user_topic
select
    nvl(new.user_id,old.user_id),
    if(old.login_date_first is null and new.login_count>0,'2020-03-29',old.login_date_first),
    if(new.login_count>0,'2020-03-29',old.login_date_last),
    nvl(old.login_count,0)+if(new.login_count>0,1,0),
    nvl(new.login_last_30d_count,0),
    if(old.order_date_first is null and new.order_count>0,'2020-03-29',old.order_date_first),
    if(new.order_count>0,'2020-03-29',old.order_date_last),
    nvl(old.order_count,0)+nvl(new.order_count,0),
    nvl(old.order_amount,0)+nvl(new.order_amount,0),
    nvl(new.order_last_30d_count,0),
    nvl(new.order_last_30d_amount,0),
    if(old.payment_date_first is null and new.payment_count>0,'2020-03-29',old.payment_date_first),
    if(new.payment_count>0,'2020-03-29',old.payment_date_last),
    nvl(old.payment_count,0)+nvl(new.payment_count,0),
    nvl(old.payment_amount,0)+nvl(new.payment_amount,0),
    nvl(new.payment_last_30d_count,0),
    nvl(new.payment_last_30d_amount,0)
from
dwt_user_topic old
full outer join
(
    select
        user_id,
        sum(if(dt='2020-03-29',login_count,0)) login_count,
        sum(if(dt='2020-03-29',order_count,0)) order_count,
        sum(if(dt='2020-03-29',order_amount,0)) order_amount,
        sum(if(dt='2020-03-29',payment_count,0)) payment_count,
        sum(if(dt='2020-03-29',payment_amount,0)) payment_amount,
        sum(if(login_count>0,1,0)) login_last_30d_count,
        sum(order_count) order_last_30d_count,
        sum(order_amount) order_last_30d_amount,
        sum(payment_count) payment_last_30d_count,
        sum(payment_amount) payment_last_30d_amount
    from dws_user_action_daycount
    where dt>=date_add( '2020-03-29',-30)
    group by user_id
)new
on old.user_id=new.user_id;

商品主题宽表

hive (gmall)> 
drop table if exists dwt_sku_topic;
create external table dwt_sku_topic
(
    sku_id string comment 'sku_id',`在这里插入代码片`
    spu_id string comment 'spu_id',
    order_last_30d_count bigint comment '最近30日被下单次数',
    order_last_30d_num bigint comment '最近30日被下单件数',
    order_last_30d_amount decimal(16,2)  comment '最近30日被下单金额',
    order_count bigint comment '累积被下单次数',
    order_num bigint comment '累积被下单件数',
    order_amount decimal(16,2) comment '累积被下单金额',
    payment_last_30d_count   bigint  comment '最近30日被支付次数',
    payment_last_30d_num bigint comment '最近30日被支付件数',
    payment_last_30d_amount  decimal(16,2) comment '最近30日被支付金额',
    payment_count   bigint  comment '累积被支付次数',
    payment_num bigint comment '累积被支付件数',
    payment_amount  decimal(16,2) comment '累积被支付金额',
    refund_last_30d_count bigint comment '最近三十日退款次数',
    refund_last_30d_num bigint comment '最近三十日退款件数',
    refund_last_30d_amount decimal(10,2) comment '最近三十日退款金额',
    refund_count bigint comment '累积退款次数',
    refund_num bigint comment '累积退款件数',
    refund_amount decimal(10,2) comment '累积退款金额',
    cart_last_30d_count bigint comment '最近30日被加入购物车次数',
    cart_last_30d_num bigint comment '最近30日被加入购物车件数',
    cart_count bigint comment '累积被加入购物车次数',
    cart_num bigint comment '累积被加入购物车件数',
    favor_last_30d_count bigint comment '最近30日被收藏次数',
    favor_count bigint comment '累积被收藏次数',
    appraise_last_30d_good_count bigint comment '最近30日好评数',
    appraise_last_30d_mid_count bigint comment '最近30日中评数',
    appraise_last_30d_bad_count bigint comment '最近30日差评数',
    appraise_last_30d_default_count bigint comment '最近30日默认评价数',
    appraise_good_count bigint comment '累积好评数',
    appraise_mid_count bigint comment '累积中评数',
    appraise_bad_count bigint comment '累积差评数',
    appraise_default_count bigint comment '累积默认评价数'
 )COMMENT '商品主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_sku_topic/'
tblproperties ("parquet.compression"="lzo");
INSERT OVERWRITE INTO dwt_sku_topic
SELECT nvl(new.sku_id, old.sku_id), dwd_dim_sku_info.spu_id
	, nvl(new.order_count30, 0)
	, nvl(new.order_num30, 0)
	, nvl(new.order_amount30, 0)
	, nvl(old.order_count, 0) + nvl(new.order_count, 0)
	, nvl(old.order_num, 0) + nvl(new.order_num, 0)
	, nvl(old.order_amount, 0) + nvl(new.order_amount, 0)
	, nvl(new.payment_count30, 0)
	, nvl(new.payment_num30, 0)
	, nvl(new.payment_amount30, 0)
	, nvl(old.payment_count, 0) + nvl(new.payment_count, 0)
	, nvl(old.payment_num, 0) + nvl(new.payment_count, 0)
	, nvl(old.payment_amount, 0) + nvl(new.payment_count, 0)
	, nvl(new.refund_count30, 0)
	, nvl(new.refund_num30, 0)
	, nvl(new.refund_amount30, 0)
	, nvl(old.refund_count, 0) + nvl(new.refund_count, 0)
	, nvl(old.refund_num, 0) + nvl(new.refund_num, 0)
	, nvl(old.refund_amount, 0) + nvl(new.refund_amount, 0)
	, nvl(new.cart_count30, 0)
	, nvl(new.cart_num30, 0)
	, nvl(old.cart_count, 0) + nvl(new.cart_count, 0)
	, nvl(old.cart_num, 0) + nvl(new.cart_num, 0)
	, nvl(new.favor_count30, 0)
	, nvl(old.favor_count, 0) + nvl(new.favor_count, 0)
	, nvl(new.appraise_good_count30, 0)
	, nvl(new.appraise_mid_count30, 0)
	, nvl(new.appraise_bad_count30, 0)
	, nvl(new.appraise_default_count30, 0)
	, nvl(old.appraise_good_count, 0) + nvl(new.appraise_good_count, 0)
	, nvl(old.appraise_mid_count, 0) + nvl(new.appraise_mid_count, 0)
	, nvl(old.appraise_bad_count, 0) + nvl(new.appraise_bad_count, 0)
	, nvl(old.appraise_default_count, 0) + nvl(new.appraise_default_count, 0)
FROM (
	SELECT sku_id, spu_id, order_last_30d_count, order_last_30d_num, order_last_30d_amount
		, order_count, order_num, order_amount, payment_last_30d_count, payment_last_30d_num
		, payment_last_30d_amount, payment_count, payment_num, payment_amount, refund_last_30d_count
		, refund_last_30d_num, refund_last_30d_amount, refund_count, refund_num, refund_amount
		, cart_last_30d_count, cart_last_30d_num, cart_count, cart_num, favor_last_30d_count
		, favor_count, appraise_last_30d_good_count, appraise_last_30d_mid_count, appraise_last_30d_bad_count, appraise_last_30d_default_count
		, appraise_good_count, appraise_mid_count, appraise_bad_count, appraise_default_count
	FROM dwt_sku_topic
) old
	FULL JOIN (
		SELECT sku_id
			, sum(if(dt = '2020-03-29', order_count, 0)) AS order_count
			, sum(if(dt = '2020-03-29', order_num, 0)) AS order_num
			, sum(if(dt = '2020-03-29', order_amount, 0)) AS order_amount
			, sum(if(dt = '2020-03-29', payment_count, 0)) AS payment_count
			, sum(if(dt = '2020-03-29', payment_num, 0)) AS payment_num
			, sum(if(dt = '2020-03-29', payment_amount, 0)) AS payment_amount
			, sum(if(dt = '2020-03-29', refund_count, 0)) AS refund_count
			, sum(if(dt = '2020-03-29', refund_num, 0)) AS refund_num
			, sum(if(dt = '2020-03-29', refund_amount, 0)) AS refund_amount
			, sum(if(dt = '2020-03-29', cart_count, 0)) AS cart_count
			, sum(if(dt = '2020-03-29', cart_num, 0)) AS cart_num
			, sum(if(dt = '2020-03-29', favor_count, 0)) AS favor_count
			, sum(if(dt = '2020-03-29', appraise_good_count, 0)) AS appraise_good_count
			, sum(if(dt = '2020-03-29', appraise_mid_count, 0)) AS appraise_mid_count
			, sum(if(dt = '2020-03-29', appraise_bad_count, 0)) AS appraise_bad_count
			, sum(if(dt = '2020-03-29', appraise_default_count, 0)) AS appraise_default_count
			, sum(order_count) AS order_count30, sum(order_num) AS order_num30
			, sum(order_amount) AS order_amount30, sum(payment_count) AS payment_count30
			, sum(payment_num) AS payment_num30, sum(payment_amount) AS payment_amount30
			, sum(refund_count) AS refund_count30, sum(refund_num) AS refund_num30
			, sum(refund_amount) AS refund_amount30, sum(cart_count) AS cart_count30
			, sum(cart_num) AS cart_num30, sum(favor_count) AS favor_count30
			, sum(appraise_good_count) AS appraise_good_count30, sum(appraise_mid_count) AS appraise_mid_count30
			, sum(appraise_bad_count) AS appraise_bad_count30, sum(appraise_default_count) AS appraise_default_count30
		FROM dws_sku_action_daycount
		WHERE dt >= date_add('2020-03-29', -30)
		GROUP BY sku_id
	) new
	ON new.sku_id = old.sku_id
	LEFT JOIN dwd_dim_sku_info ON new.sku_id = dwd_dim_sku_info.id;

 优惠券主题宽表(预留)

在这里插入图片描述

活动主题宽表(预留) 

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/qq_22473611/article/details/116797791
今日推荐