Hive工作笔记

1.like得到想要的字段,然后插入分区。
insert overwrite table sgv.shenxinfu2 partition(area='gz',dt='${time}') select applog.xmlpath(s.result,"url") as url from shenxfdb.action as s
where s.dt='${time}' and s.area='${area}' and (applog.xmlpath(s.result,"url") like 'static.adwo.com%' or applog.xmlpath(s.result,"url") like '%lnk0.com%' or
applog.xmlpath(s.result,"url") like 'mi.gdt.qq.com%' or applog.xmlpath(s.result,"url") like 'et.w.inmobi.com%' or applog.xmlpath(s.result,"url") like '%doubleclick.net%' or
applog.xmlpath(s.result,"url") like '%clickc.admaster.com.cn%' or applog.xmlpath(s.result,"url")
like 'track.dmrtb.com%' or applog.xmlpath(s.result,"url") like '%e.cn.miaozhen.com%' );

2.case when 切割字符串

select syslog.`datetime`,CASE WHEN   
substr(syslog.`datetime`,0,10)=='2017-11-08'
THEN  hour(syslog.`datetime`)
ELSE  substr(syslog.`datetime`,8,2)
END  AS syslog_hour

from syslog where dt='2017110822' limit 100


3.加载分区

insert overwrite table applog.union_syslog partition(dt='20171103') select  s.sta_mac, s.area from applog.syslog as s  where s.sta_mac!='blank' and substr(s.dt , 1 , 8)='20171103';


4.左关联    存在a表,不存在b表中的数据

select distinct  trap.sta_mac  from
(
select  lower(concat_ws('.',substr(regexp_replace(s.user_mac,':','' ) ,0,4) ,substr(regexp_replace(s.user_mac,':','' ) ,5,4),substr(regexp_replace(s.user_mac,':','' ) ,9,4))) as sta_mac,
s.area as area,s.user_ac_type as type,s.ap_mac as apmac from applog.trapsource as s  where s.user_mac <> 'blank' and  s.user_ac_type!=0 and s.dt = '20171103'
)
trap left outer join applog.union_syslog as syslog on trap.sta_mac=syslog.sta_mac where syslog.sta_mac is null and trap.type!=0 and trap.area='km'



5.创建分区表

CREATE EXTERNAL TABLE IF NOT EXISTS applog.union_syslog(
sta_mac STRING,
area STRING
)
PARTITIONED BY (dt STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY "\001"

扫描二维码关注公众号,回复: 3163470 查看本文章


6.删除分区表


drop table applog.union_trap



猜你喜欢

转载自blog.csdn.net/qq_25954159/article/details/78490031