大数据数仓项目--知行教育_访问咨询主题_全量流程

4.6 全量流程

OLTP原始数据(mysql)——》数据采集(ODS)——》清洗转换(DWD)——》统计分析(DWS)——》导出至OLAP(Mysql),如图:
在这里插入图片描述

4.6.1 数据采集

4.6.1.1 web_chat_ems表
4.6.1.1.1 SQL:

select id,
       create_date_time,
       session_id,
       sid,
       create_time,
       seo_source,
       seo_keywords,
       ip,
       area,
       country,
       province,
       city,
       origin_channel,
       user         as user_match,
       manual_time,
       begin_time,
       end_time,
       last_customer_msg_time_stamp,
       last_agent_msg_time_stamp,
       reply_msg_count,
       msg_count,
       browser_name,
       os_info,
       "2019-07-01" as starts_time
from web_chat_ems_2019_07;

4.6.1.1.2 Sqoop:

sqoop import \
--connect jdbc:mysql://192.168.52.150:3306/nev \
--username root \
--password 123456 \
--query 'select id, create_date_time, session_id, sid, create_time, seo_source, seo_keywords, ip, area, country, province, city, origin_channel, user as user_match, manual_time, begin_time, end_time, last_customer_msg_time_stamp, last_agent_msg_time_stamp, reply_msg_count, msg_count, browser_name, os_info, "2019-07-01" as starts_time from web_chat_ems_2019_07 where $CONDITIONS' \
--hcatalog-database itcast_ods_test \
--hcatalog-table web_chat_ems \
-m 100 \
--split-by id


bin/sqoop import \
--connect jdbc:mysql://192.168.10.10:3306/nev \
--username root \
--query 'select id, create_date_time, session_id, sid, create_time, seo_source, seo_keywords, ip, area, country, province, city, origin_channel, user as user_match, manual_time, begin_time, end_time, last_customer_msg_time_stamp, last_agent_msg_time_stamp, reply_msg_count, msg_count, browser_name, os_info, "2019-07-01" as starts_time from web_chat_ems_2019_07 where $CONDITIONS' \
--hcatalog-database itcast_ods_test \
--hcatalog-table web_chat_ems \
-m 100 \
--split-by id

-m 100,指的是使用100个MapReduce任务并行处理;
而split-by参数,是指以哪个字段为基础进行分割。

4.6.1.2 web_chat_text_ems表
4.6.1.2.1 SQL

select id,
       referrer,
       from_url,
       landing_page_url,
       url_title,
       platform_description,
       other_params,
       history,
       "2019-07-01" as start_time
from web_chat_text_ems_2019_07;

4.6.1.2.2 Sqoop

sqoop import \
--connect jdbc:mysql://192.168.52.150:3306/nev \
--username root \
--password 123456 \
--query 'select id,referrer,from_url,landing_page_url,url_title,platform_description,other_params,history, "2019-07-01" as start_time from web_chat_text_ems_2019_07 where $CONDITIONS' \
--hcatalog-database itcast_ods \
--hcatalog-table web_chat_text_ems \
-m 100 \
--split-by id

bin/sqoop import \
--connect jdbc:mysql://192.168.10.10:3306/nev \
--username root \
--query 'select id,referrer,from_url,landing_page_url,url_title,platform_description,other_params,history, "2019-07-01" as start_time from web_chat_text_ems_2019_07 where $CONDITIONS' \
--hcatalog-database itcast_ods \
--hcatalog-table web_chat_text_ems \
-m 100 \
--split-by id

4.6.2.4 代码

--动态分区配置
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
--hive压缩
set hive.exec.compress.intermediate=true;
set hive.exec.compress.output=true;
--写入时压缩生效
set hive.exec.orc.compression.strategy=COMPRESSION;


insert into table itcast_dwd.visit_consult_dwd partition (yearinfo, monthinfo, dayinfo)
select
    wce.session_id,
    wce.sid,
    unix_timestamp(wce.create_time, 'yyyy-MM-dd HH:mm:ss.SSS') as create_time,
    wce.seo_source,
    wce.ip,
    wce.area,
    cast(if(wce.msg_count is null, 0, wce.msg_count) as int) as msg_count,
    wcte.referrer,
    wcte.from_url,
    wcte.landing_page_url,
    wcte.url_title,
    wcte.platform_description,
    wcte.other_params,
    wcte.history,
    substr(wce.create_time, 12, 2) as hourinfo,
    quarter(wce.create_time) as quarterinfo,
    substr(wce.create_time, 1, 4) as yearinfo,
    substr(wce.create_time, 6, 2) as monthinfo,
    substr(wce.create_time, 9, 2) as dayinfo
from itcast_ods.web_chat_ems wce inner join itcast_ods.web_chat_text_ems wcte
on wce.id = wcte.id;

4.6.3 统计分析

4.6.3.1 分析
DWD层之后是DWM中间层和DWS业务层。回顾建模分析阶段,我们已经得到了指标相关的维度:年、季度、月、天、小时、地区、来源渠道、页面。分两大类:
时间维度:年、季度、月、天、小时
业务属性维度:地区、来源渠道、页面、总访问量。
在DWS层按照不同维度使用count+distinct来统计指标,形成宽表。
空值处理
事实表中的维度关联键不能存在空值,关联的维度信息必须用代理键(-1)而不是空值表示未知的条件。
4.6.3.2 代码
我们的维度一共有两大类:时间维度和产品属性维度,在DWS层我们可以产出一个宽表,将所有维度的数据都生成出来,供APP层和OLAP应用来使用。
4.6.3.2.1 地区分组
统计地区维度时,需要设置产品属性类型groupType为1(地区),同时将其他产品属性设置为-1(搜索来源、来源渠道、会话来源页面),便于团队理解,减少自己和团队出错率的同时也降低了沟通成本。
在insertsql中,尽量为查询出的字段加上别名,特别是字段多的表,便于识别。
小时维度:

--分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=100000;
set hive.exec.max.created.files=150000;
--hive压缩
set hive.exec.compress.intermediate=true;
set hive.exec.compress.output=true;
--写入时压缩生效
set hive.exec.orc.compression.strategy=COMPRESSION;

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid)        as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip)         as ip_total,
    area,
    '-1' as seo_source,
    '-1' as origin_channel,
    hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
    '-1' as from_url,
    '1' as grouptype,
    '1' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
group by area, yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;

天维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select 
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
    '-1' as from_url,
    '1' as grouptype,
    '2' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd 
group by area, yearinfo, quarterinfo, monthinfo, dayinfo;

月维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select 
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo) as time_str,
    '-1' as from_url,
    '1' as grouptype,
    '3' as time_type,
    yearinfo, monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd 
group by area, yearinfo, quarterinfo, monthinfo;

季度维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select 
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-Q',quarterinfo) as time_str,
    '-1' as from_url,
    '1' as grouptype,
    '4' as time_type,
    yearinfo,
    '-1' as monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd 
group by area, yearinfo, quarterinfo;

年维度:

INSERT  INTO TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select 
   COUNT(DISTINCT wce.sid) as sid_total,
   COUNT(DISTINCT wce.session_id) as sessionid_total,
   COUNT(DISTINCT wce.ip) as ip_total,
   wce.area as area,
   '-1' as seo_source,
   '-1' as origin_channel,
   '-1' as hourinfo,
   '-1' as quarterinfo,
   wce.yearinfo as time_str,
   '-1' as from_url,
   '1' as groupType,
   '5' as time_type,
   wce.yearinfo as yearinfo,
   '-1' as monthinfo,
   '-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
group by wce.area,wce.yearinfo;

4.6.3.2.2 搜索来源分组

小时维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select 
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    seo_source,
    '-1' as origin_channel,
    hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
    '-1' as from_url,
    '2' as grouptype,
    '1' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd 
group by seo_source, yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;

天维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
    '-1' as from_url,
    '2' as grouptype,
    '2' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
group by seo_source, yearinfo, quarterinfo, monthinfo, dayinfo;

月维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo) as time_str,
    '-1' as from_url,
    '2' as grouptype,
    '3' as time_type,
    yearinfo, monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd
group by seo_source, yearinfo, quarterinfo, monthinfo;

季度维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-Q',quarterinfo) as time_str,
    '-1' as from_url,
    '2' as grouptype,
    '4' as time_type,
    yearinfo,
    '-1' as monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd
group by seo_source, yearinfo, quarterinfo;

年维度:

INSERT  INTO TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
   COUNT(DISTINCT wce.sid) as sid_total,
   COUNT(DISTINCT wce.session_id) as sessionid_total,
   COUNT(DISTINCT wce.ip) as ip_total,
   '-1' as  area,
   seo_source,
   '-1' as origin_channel,
   '-1' as hourinfo,
   '-1' as quarterinfo,
   wce.yearinfo as time_str,
   '-1' as from_url,
   '2' as groupType,
   '5' as time_type,
   wce.yearinfo as yearinfo,
   '-1' as monthinfo,
   '-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
group by wce.seo_source,wce.yearinfo;

4.6.3.2.3 来源渠道分组

小时维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    origin_channel,
    hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
    '-1' as from_url,
    '3' as grouptype,
    '1' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
group by origin_channel, yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;

天维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
    '-1' as from_url,
    '3' as grouptype,
    '2' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
group by origin_channel, yearinfo, quarterinfo, monthinfo, dayinfo;

月维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo) as time_str,
    '-1' as from_url,
    '3' as grouptype,
    '3' as time_type,
    yearinfo, monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd
group by origin_channel, yearinfo, quarterinfo, monthinfo;

季度维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-Q',quarterinfo) as time_str,
    '-1' as from_url,
    '3' as grouptype,
    '4' as time_type,
    yearinfo,
    '-1' as monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd
group by origin_channel, yearinfo, quarterinfo;

年维度:

INSERT  INTO TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
   COUNT(DISTINCT wce.sid) as sid_total,
   COUNT(DISTINCT wce.session_id) as sessionid_total,
   COUNT(DISTINCT wce.ip) as ip_total,
   '-1' as  area,
   '-1' as seo_source,
   origin_channel,
   '-1' as hourinfo,
   '-1' as quarterinfo,
   wce.yearinfo as time_str,
   '-1' as from_url,
   '3' as groupType,
   '5' as time_type,
   wce.yearinfo as yearinfo,
   '-1' as monthinfo,
   '-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
group by wce.origin_channel,wce.yearinfo;

4.6.3.2.4 会话来源页面分组

小时维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select 
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    '-1' as origin_channel,
    hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
    from_url,
    '4' as grouptype,
    '1' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd 
group by from_url, yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;

天维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
    from_url,
    '4' as grouptype,
    '2' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
group by from_url, yearinfo, quarterinfo, monthinfo, dayinfo;

月维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo) as time_str,
    from_url,
    '4' as grouptype,
    '3' as time_type,
    yearinfo, monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd
group by from_url, yearinfo, quarterinfo, monthinfo;

季度维度:

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-Q',quarterinfo) as time_str,
    from_url,
    '4' as grouptype,
    '4' as time_type,
    yearinfo,
    '-1' as monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd
group by from_url, yearinfo, quarterinfo;

年维度:

INSERT  INTO TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
   COUNT(DISTINCT wce.sid) as sid_total,
   COUNT(DISTINCT wce.session_id) as sessionid_total,
   COUNT(DISTINCT wce.ip) as ip_total,
   '-1' as  area,
   '-1' as seo_source,
   '-1' as origin_channel,
   '-1' as hourinfo,
   '-1' as quarterinfo,
   wce.yearinfo as time_str,
   from_url,
   '4' as groupType,
   '5' as time_type,
   wce.yearinfo as yearinfo,
   '-1' as monthinfo,
   '-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
group by wce.from_url,wce.yearinfo;

4.6.3.2.5 总访问量

小时(小时段区间的基础数据)
因为小时段数据可以直接sum求和,因此OLAP应用可以在小时数据基础上,进行简单的sum操作以获取到区间小时段数据。

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select 
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    '-1' as origin_channel,
    hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo,' ',hourinfo) as time_str,
    '-1' as from_url,
    '5' as grouptype,
    '1' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd 
group by yearinfo, quarterinfo, monthinfo, dayinfo, hourinfo;

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo,'-',dayinfo) as time_str,
    '-1' as from_url,
    '5' as grouptype,
    '2' as time_type,
    yearinfo, monthinfo, dayinfo
from itcast_dwd.visit_consult_dwd
group by yearinfo, quarterinfo, monthinfo, dayinfo;

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-',monthinfo) as time_str,
    '-1' as from_url,
    '5' as grouptype,
    '3' as time_type,
    yearinfo, monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd
group by yearinfo, quarterinfo, monthinfo;

季度

insert into itcast_dws.visit_dws partition (yearinfo, monthinfo, dayinfo)
select
    count(distinct sid) as sid_total,
    count(distinct session_id) as session_total,
    count(distinct ip) as ip_total,
    '-1' as area,
    '-1' as seo_source,
    '-1' as origin_channel,
    '-1' as hourinfo,
    quarterinfo,
    concat(yearinfo,'-Q',quarterinfo) as time_str,
    '-1' as from_url,
    '5' as grouptype,
    '4' as time_type,
    yearinfo,
    '-1' as monthinfo,
    '-1' as dayinfo
from itcast_dwd.visit_consult_dwd
group by yearinfo, quarterinfo;

INSERT  INTO TABLE itcast_dws.visit_dws PARTITION (yearinfo,monthinfo,dayinfo)
select
   COUNT(DISTINCT wce.sid) as sid_total,
   COUNT(DISTINCT wce.session_id) as sessionid_total,
   COUNT(DISTINCT wce.ip) as ip_total,
   '-1' as  area,
   '-1' as seo_source,
   '-1' as origin_channel,
   '-1' as hourinfo,
   '-1' as quarterinfo,
   wce.yearinfo as time_str,
   '-1' as from_url,
   '5' as groupType,
   '5' as time_type,
   wce.yearinfo as yearinfo,
   '-1' as monthinfo,
   '-1' as dayinfo
from itcast_dwd.visit_consult_dwd wce
group by wce.yearinfo;

4.6.4 导出数据

4.6.4.1 创建mysql表

create database scrm_bi default character set utf8mb4 collate utf8mb4_general_ci;

CREATE TABLE `itcast_visit` (
  sid_total int(11) COMMENT '根据sid去重求count',
  sessionid_total int(11) COMMENT '根据sessionid去重求count',
  ip_total int(11) COMMENT '根据IP去重求count',
  area varchar(32) COMMENT '区域信息',
  seo_source varchar(32) COMMENT '搜索来源',
  origin_channel varchar(32) COMMENT '来源渠道',
  hourinfo varchar(32) COMMENT '小时信息',
  quarterinfo varchar(32) COMMENT '季度',
  time_str varchar(32) COMMENT '时间明细',
  from_url varchar(32) comment '会话来源页面',
  groupType varchar(32) COMMENT '产品属性类型:1.地区;2.搜索来源;3.来源渠道;4.会话来源页面;5.总访问量',
  time_type varchar(32) COMMENT '时间聚合类型:1、按小时聚合;2、按天聚合;3、按月聚合;4、按季度聚合;5、按年聚合;',
  yearinfo varchar(32) COMMENT '年信息',
  monthinfo varchar(32) COMMENT '月信息',
  dayinfo varchar(32) COMMENT '日信息'
);

4.6.4.2 执行sqoop导出脚本

sqoop export \
--connect "jdbc:mysql://192.168.52.150:3306/scrm_bi?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password '123456' \
--table itcast_visit \
--hcatalog-database itcast_dws \
--hcatalog-table visit_dws \
-m 100

猜你喜欢

转载自blog.csdn.net/xianyu120/article/details/111686465