电商数仓-(业务数据导入HDFS)

在这里插入图片描述
脚本编写
1.在/usr/local/bin目录下创建

vim mysql_to_hdfs.sh

添加如下内容:

#! /bin/bash
sqoop=/export/servers/sqoop/bin/sqoop
do_date=`date -d '-1 day' +%F`
if [[ -n "$2" ]]; then
do_date=$2
fi
import_data(){
$sqoop import \
--connect jdbc:mysql://hadoop12:3306/gmall \
--username root \
--password yy8266603 \
--target-dir /origin_data/gmall/db/$1/$do_date \
--delete-target-dir \
--query "$2 and \$CONDITIONS" \
--num-mappers 1 \
--fields-terminated-by '\t' \
--compress \
--compression-codec lzop \
--null-string '\\N' \
--null-non-string '\\N'
hadoop jar /export/servers/hadoop-2.7.7/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer /origin_data/gmall/db/$1/$do_date
}
import_order_info(){
import_data order_info "select
		id,
		final_total_amount,
		order_status,
		user_id,
		out_trade_no,
		create_time,
		operate_time,
		province_id,
		benefit_reduce_amount,
		original_total_amount,
		feight_fee
		from order_info
		where (date_format(create_time,'%Y-%m-%d')='$do_date'
		or date_format(operate_time,'%Y-%m-%d')='$do_date')"
}
import_coupon_use(){
import_data coupon_use "select
		id,
		coupon_id,
		user_id,
		order_id,
		coupon_status,
		get_time,
		using_time,
		used_time
		from coupon_use
		where (date_format(get_time,'%Y-%m-%d')='$do_date'
		or date_format(using_time,'%Y-%m-%d')='$do_date'
		or date_format(used_time,'%Y-%m-%d')='$do_date')"
}
import_order_status_log(){
import_data order_status_log "select
		id,
		order_id,
		order_status,
		operate_time
		from order_status_log
		where
		date_format(operate_time,'%Y-%m-%d')='$do_date'"
}
import_activity_order(){
import_data activity_order "select
		id,
		activity_id,
		order_id,
		create_time
		from activity_order
		where
		date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_user_info(){
import_data "user_info" "select
		id,
		name,
		birthday,
		gender,
		email,
		user_level,
		create_time,
		operate_time
		from user_info
		where (DATE_FORMAT(create_time,'%Y-%m-%d')='$do_date'
		or DATE_FORMAT(operate_time,'%Y-%m-%d')='$do_date')"
}
import_order_detail(){
import_data order_detail "select
		od.id,
		order_id,
		user_id,
		sku_id,
		sku_name,
		order_price,
		sku_num,
		od.create_time
		from order_detail od
		join order_info oi
		on od.order_id=oi.id
		where
		DATE_FORMAT(od.create_time,'%Y-%m-%d')='$do_date'"
}
import_payment_info(){
import_data "payment_info" "select
		id,
		out_trade_no,
		order_id,
		user_id,
		alipay_trade_no,
		total_amount,
		subject,
		payment_type,
		payment_time
		from payment_info
		where
		DATE_FORMAT(payment_time,'%Y-%m-%d')='$do_date'"
}
import_comment_info(){
import_data comment_info "select
		id,
		user_id,
		sku_id,
		spu_id,
		order_id,
		appraise,
		comment_txt,
		create_time
		from comment_info
		where date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_order_refund_info(){
import_data order_refund_info "select
		id,
		user_id,
		order_id,
		sku_id,
		refund_type,
		refund_num,
		refund_amount,
		refund_reason_type,
		create_time
		from order_refund_info
		where
		date_format(create_time,'%Y-%m-%d')='$do_date'"
}
import_sku_info(){
import_data sku_info "select
		id,
		spu_id,
		price,
		sku_name,
		sku_desc,
		weight,
		tm_id,
		category3_id,
		create_time
		from sku_info where 1=1"
}
import_base_category1(){
import_data "base_category1" "select
		id,
		name
		from base_category1 where 1=1"
}
import_base_category2(){
import_data "base_category2" "select
		id,
		name,
		category1_id
		from base_category2 where 1=1"
}
import_base_category3(){
import_data "base_category3" "select
		id,
		name,
		category2_id
		from base_category3 where 1=1"
}
import_base_province(){
import_data base_province "select
		id,
		name,
		region_id,
		area_code,
		iso_code
		from base_province
		where 1=1"
}
import_base_region(){
import_data base_region "select
		id,
		region_name
		from base_region
		where 1=1"
}
import_base_trademark(){
import_data base_trademark "select
		tm_id,
		tm_name
		from base_trademark
		where 1=1"
}
import_spu_info(){
import_data spu_info "select
		id,
		spu_name,
		category3_id,
		tm_id
		from spu_info
		where 1=1"
}
import_favor_info(){
import_data favor_info "select
		id,
		user_id,
		sku_id,
		spu_id,
		is_cancel,
		create_time,
		cancel_time
		from favor_info
		where 1=1"
}
import_cart_info(){
import_data cart_info "select
		id,
		user_id,
		sku_id,
		cart_price,
		sku_num,
		sku_name,
		create_time,
		operate_time,
		is_ordered,
		order_time
		from cart_info
		where 1=1"
}
import_coupon_info(){
import_data coupon_info "select
		id,
		coupon_name,
		coupon_type,
		condition_amount,
		condition_num,
		activity_id,
		benefit_amount,
		benefit_discount,
		create_time,
		range_type,
		spu_id,
		tm_id,
		category3_id,
		limit_num,
		operate_time,
		expire_time
		from coupon_info
		where 1=1"
}
import_activity_info(){
import_data activity_info "select
		id,
		activity_name,
		activity_type,
		start_time,
		end_time,
		create_time
		from activity_info
		where 1=1"
}
import_activity_rule(){
import_data activity_rule "select
		id,
		activity_id,
		condition_amount,
		condition_num,
		benefit_amount,
		benefit_discount,
		benefit_level
		from activity_rule
		where 1=1"
}
import_base_dic(){
import_data base_dic "select
		dic_code,
		dic_name,
		parent_code,
		create_time,
		operate_time
		from base_dic
		where 1=1"
}
case $1 in
"order_info")
  import_order_info
;;
"base_category1")
  import_base_category1
 ;;
"base_category2")
  import_base_category2
;;
"base_category3")
  import_base_category3
;;
"order_detail")
  import_order_detail
;;
"sku_info")
  import_sku_info
;;
"user_info")
  import_user_info
;;
"payment_info")
  import_payment_info
;;
"base_province")
  import_base_province
;;
"base_region")
  import_base_region
;;
"base_trademark")
  import_base_trademark
;;
"activity_info")
  import_activity_info
;;
"activity_order")
  import_activity_order
;;
"cart_info")
  import_cart_info
;;
"comment_info")
  import_comment_info
;;
"coupon_info")
  import_coupon_info
;;
"coupon_use")
  import_coupon_use
;;
"favor_info")
  import_favor_info
;;
"order_refund_info")
  import_order_refund_info
;;
"order_status_log")
  import_order_status_log
;;
"spu_info")
  import_spu_info
;;
"activity_rule")
  import_activity_rule
;;
"base_dic")
  import_base_dic
;;
"first")
	import_base_category1
	import_base_category2
	import_base_category3
	import_order_info
	import_order_detail
	import_sku_info
	import_user_info
	import_payment_info
	import_base_province
	import_base_region
	import_base_trademark
	import_activity_info
	import_activity_order
	import_cart_info
	import_comment_info
	import_coupon_use
	import_coupon_info
	import_favor_info
	import_order_refund_info
	import_order_status_log
	import_spu_info
	import_activity_rule
	import_base_dic
;;
"all")
	import_base_category1
	import_base_category2
	import_base_category3
	import_order_info
	import_order_detail
	import_sku_info
	import_user_info
	import_payment_info
	import_base_trademark
	import_activity_info
	import_activity_order
	import_cart_info
	import_comment_info
	import_coupon_use
	import_coupon_info
	import_favor_info
	import_order_refund_info
	import_order_status_log
	import_spu_info
	import_activity_rule
	import_base_dic
;;
esac

修改脚本权限

 chmod 777 mysql_to_hdfs.sh

初次导入

mysql_to_hdfs.sh first 2020-03-10

每日导入

mysql_to_hdfs.sh all 2020-03-11

项目经验
Hive 中的 Null 在底层是以“\N”来存储,而 MySQL 中的 Null 在底层就是 Null,为了保证数据两端的一致性。在导出数据时采用–input-null-string 和–input-null-non-string 两个参数。导入数据时采用–null-string 和–null-non-string。

猜你喜欢

转载自blog.csdn.net/qq_46548855/article/details/107599799