电商数仓---(脚本模板+写的SQL规律+Sqoop导表模板)

脚本模板

#!/bin/bash

#!2.定义变量

#!3.获取时间

#!4.sql

#!5.执行sql

声明
gmall是数据库名
看见时间加 $do_date 看见表名加${APP}
例子

#!/bin/bash

#!2.定义变量
APP=gmall
hive=/export/servers/hive/bin/hive

#!3.获取时间
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi

#!4.sql
sql="
load data inpath '/origin_data/$APP/db/order_info/$do_date' OVERWRITE into table
${APP}.ods_order_info partition(dt='$do_date');
"
#!5.执行sql
$hive -e "$sql"

写SQL的规律

写SQL的规律:
1、先找到目标表
2、分析一下,需要哪些表能满足目标表的所有字段
  即准备所有输入表。
3、写逻辑
    3.1 insert overwrite table目标包名称
        观察目标表是否需要考虑分区
    3.2固定格式,如果需要多表join,
       先把整体大框写出,再具体去写一个一个子查询
      select
      
      from
      (
      
      )b1
       join
       (
         
        )b2
        on
3.3遇到统计什么次数用count
3.4遇到统计什么金额用sum
3.5如果是累积表,获取旧表(目标表)数据,
    再获取新表(输入表)数据
3.6遇到统计累积值,旧的+新的
3.7累积表中获取首次时间
   旧的时间为null,取当前时间,否则取旧的
3.8累积表中获取末次时间(最近时间)
   新的id不为空,取当前时间,否则取旧的
3.9天数和次数的转换。
    if(new.login_count>0,1,0)
3.10使用group by时要注意:
    查询的字段:i.分组里面有;2.常量;3.聚合函数
3.11累积30天等指标
3.12如果涉及的表比较多可以采用with tmp的方法

导表模板

导表分三类:
#!/bin/bash

时间的处理:T+1模式﹐支持了自己输入时间脚本的第二个参数时间

sqoop参数的基本函数($1=表名   $2=过滤条件)

全量select * from 表 where1 = 1
增量select * from 表 where createtime paytime = 当前时间
新增和变量 select * from 表 where createtime or operatetim = 当前时间

脚本输入的第一个参数   是表名字或者first all
单表导入

first 23个表(地区表)
all 21个表(少地区表)

样例有点长

#! /bin/bash
sqoop=/export/servers/sqoop/bin/sqoop
do_date=`date -d '-1 day' +%F`
if [[ -n "$2" ]]; then
do_date=$2
fi
import_data(){
$sqoop import \
--connect jdbc:mysql://hadoop12:3306/gmall \
--username root \
--password yy8266603 \
--target-dir /origin_data/gmall/db/$1/$do_date \
--delete-target-dir \
--query "$2 and \$CONDITIONS" \
--num-mappers 1 \
--fields-terminated-by '\t' \
--compress \
--compression-codec lzop \
--null-string '\\N' \
--null-non-string '\\N'
hadoop jar /export/servers/hadoop-2.7.7/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer /origin_data/gmall/db/$1/$do_date
}
import_order_info(){
import_data order_info "select
		id,
		final_total_amount,
		order_status,
		user_id,
		out_trade_no,
		create_time,
		operate_time,
		province_id,
		benefit_reduce_amount,
		original_total_amount,
		feight_fee
		from order_info
		where (date_format(create_time,'%Y-%m-%d')='$do_date'
		or date_format(operate_time,'%Y-%m-%d')='$do_date')"
}
import_coupon_use(){
import_data coupon_use "select
		id,
		coupon_id,
		user_id,
		order_id,
		coupon_status,
		get_time,
		using_time,
		used_time
		from coupon_use
		where (date_format(get_time,'%Y-%m-%d')='$do_date'
		or date_format(using_time,'%Y-%m-%d')='$do_date'
		or date_format(used_time,'%Y-%m-%d')='$do_date')"
}
import_order_status_log(){
import_data order_status_log "select
		id,
		order_id,
		order_status,
		operate_time
		from order_status_log
		where
		date_format(operate_time,'%Y-%m-%d')='$do_date'"
}
import_activity_order(){
import_data activity_order "select
		id,
		activity_id,
		order_id,
		create_time
		from activity_order
		where
		date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_user_info(){
import_data "user_info" "select
		id,
		name,
		birthday,
		gender,
		email,
		user_level,
		create_time,
		operate_time
		from user_info
		where (DATE_FORMAT(create_time,'%Y-%m-%d')='$do_date'
		or DATE_FORMAT(operate_time,'%Y-%m-%d')='$do_date')"
}
import_order_detail(){
import_data order_detail "select
		od.id,
		order_id,
		user_id,
		sku_id,
		sku_name,
		order_price,
		sku_num,
		od.create_time
		from order_detail od
		join order_info oi
		on od.order_id=oi.id
		where
		DATE_FORMAT(od.create_time,'%Y-%m-%d')='$do_date'"
}
import_payment_info(){
import_data "payment_info" "select
		id,
		out_trade_no,
		order_id,
		user_id,
		alipay_trade_no,
		total_amount,
		subject,
		payment_type,
		payment_time
		from payment_info
		where
		DATE_FORMAT(payment_time,'%Y-%m-%d')='$do_date'"
}
import_comment_info(){
import_data comment_info "select
		id,
		user_id,
		sku_id,
		spu_id,
		order_id,
		appraise,
		comment_txt,
		create_time
		from comment_info
		where date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_order_refund_info(){
import_data order_refund_info "select
		id,
		user_id,
		order_id,
		sku_id,
		refund_type,
		refund_num,
		refund_amount,
		refund_reason_type,
		create_time
		from order_refund_info
		where
		date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_sku_info(){
import_data sku_info "select
		id,
		spu_id,
		price,
		sku_name,
		sku_desc,
		weight,
		tm_id,
		category3_id,
		create_time
		from sku_info where 1=1"
}
import_base_category1(){
import_data "base_category1" "select
		id,
		name
		from base_category1 where 1=1"
}
import_base_category2(){
import_data "base_category2" "select
		id,
		name,
		category1_id
		from base_category2 where 1=1"
}
import_base_category3(){
import_data "base_category3" "select
		id,
		name,
		category2_id
		from base_category3 where 1=1"
}
import_base_province(){
import_data base_province "select
		id,
		name,
		region_id,
		area_code,
		iso_code
		from base_province
		where 1=1"
}
import_base_region(){
import_data base_region "select
		id,
		region_name
		from base_region
		where 1=1"
}
import_base_trademark(){
import_data base_trademark "select
		tm_id,
		tm_name
		from base_trademark
		where 1=1"
}
import_spu_info(){
import_data spu_info "select
		id,
		spu_name,
		category3_id,
		tm_id
		from spu_info
		where 1=1"
}
import_favor_info(){
import_data favor_info "select
		id,
		user_id,
		sku_id,
		spu_id,
		is_cancel,
		create_time,
		cancel_time
		from favor_info
		where 1=1"
}
import_cart_info(){
import_data cart_info "select
		id,
		user_id,
		sku_id,
		cart_price,
		sku_num,
		sku_name,
		create_time,
		operate_time,
		is_ordered,
		order_time
		from cart_info
		where 1=1"
}
import_coupon_info(){
import_data coupon_info "select
		id,
		coupon_name,
		coupon_type,
		condition_amount,
		condition_num,
		activity_id,
		benefit_amount,
		benefit_discount,
		create_time,
		range_type,
		spu_id,
		tm_id,
		category3_id,
		limit_num,
		operate_time,
		expire_time
		from coupon_info
		where 1=1"
}
import_activity_info(){
import_data activity_info "select
		id,
		activity_name,
		activity_type,
		start_time,
		end_time,
		create_time
		from activity_info
		where 1=1"
}
import_activity_rule(){
import_data activity_rule "select
		id,
		activity_id,
		condition_amount,
		condition_num,
		benefit_amount,
		benefit_discount,
		benefit_level
		from activity_rule
		where 1=1"
}
import_base_dic(){
import_data base_dic "select
		dic_code,
		dic_name,
		parent_code,
		create_time,
		operate_time
		from base_dic
		where 1=1"
}
case $1 in
"order_info")
  import_order_info
;;
"base_category1")
  import_base_category1
 ;;
"base_category2")
  import_base_category2
;;
"base_category3")
  import_base_category3
;;
"order_detail")
  import_order_detail
;;
"sku_info")
  import_sku_info
;;
"user_info")
  import_user_info
;;
"payment_info")
  import_payment_info
;;
"base_province")
  import_base_province
;;
"base_region")
  import_base_region
;;
"base_trademark")
  import_base_trademark
;;
"activity_info")
  import_activity_info
;;
"activity_order")
  import_activity_order
;;
"cart_info")
  import_cart_info
;;
"comment_info")
  import_comment_info
;;
"coupon_info")
  import_coupon_info
;;
"coupon_use")
  import_coupon_use
;;
"favor_info")
  import_favor_info
;;
"order_refund_info")
  import_order_refund_info
;;
"order_status_log")
  import_order_status_log
;;
"spu_info")
  import_spu_info
;;
"activity_rule")
  import_activity_rule
;;
"base_dic")
  import_base_dic
;;
"first")
	import_base_category1
	import_base_category2
	import_base_category3
	import_order_info
	import_order_detail
	import_sku_info
	import_user_info
	import_payment_info
	import_base_province
	import_base_region
	import_base_trademark
	import_activity_info
	import_activity_order
	import_cart_info
	import_comment_info
	import_coupon_use
	import_coupon_info
	import_favor_info
	import_order_refund_info
	import_order_status_log
	import_spu_info
	import_activity_rule
	import_base_dic
;;
"all")
	import_base_category1
	import_base_category2
	import_base_category3
	import_order_info
	import_order_detail
	import_sku_info
	import_user_info
	import_payment_info
	import_base_trademark
	import_activity_info
	import_activity_order
	import_cart_info
	import_comment_info
	import_coupon_use
	import_coupon_info
	import_favor_info
	import_order_refund_info
	import_order_status_log
	import_spu_info
	import_activity_rule
	import_base_dic
;;
esac

猜你喜欢

转载自blog.csdn.net/qq_46548855/article/details/107673649