left semi join VS left join思考:
建表
CREATE TABLE `kv1`( `k1` string, `v1` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'field.delim'='\t', 'line.delim'='\n', 'serialization.format'='\t') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ; CREATE TABLE `kv2`( `k2` string, `v2` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'field.delim'='\t', 'line.delim'='\n', 'serialization.format'='\t') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ;
插入数据
insert into table kv1 select 1,"a1" union all select 2,"b1" union all select 3,"c1"; insert into table kv2 select 4,"a2" union all select 2,"b2" union all select 3,"c2";
left semi join:==>left semi +inner join 可以这样理解
select * from kv1 left semi join kv2 on kv1.k1=kv2.k2; +---------+---------+--+ | kv1.k1 | kv1.v1 | +---------+---------+--+ | 2 | b1 | | 3 | c1 | +---------+---------+--+
left join:
select * from kv1 left join kv2 on kv1.k1=kv2.k2; +---------+---------+---------+---------+--+ | kv1.k1 | kv1.v1 | kv2.k2 | kv2.v2 | +---------+---------+---------+---------+--+ | 1 | a1 | NULL | NULL | | 2 | b1 | 2 | b2 | | 3 | c1 | 3 | c2 | +---------+---------+---------+---------+--+