left semi join VS left join

left semi join VS left join思考:

建表

 CREATE TABLE `kv1`(
   `k1` string,
   `v1` string)
 ROW FORMAT SERDE
   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
 WITH SERDEPROPERTIES (
   'field.delim'='\t',
   'line.delim'='\n',
   'serialization.format'='\t')
 STORED AS INPUTFORMAT
   'org.apache.hadoop.mapred.TextInputFormat'
 OUTPUTFORMAT
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
   ;

CREATE TABLE `kv2`(
`k2` string,
`v2` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'='\t',
'line.delim'='\n',
'serialization.format'='\t')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
;

插入数据

insert into table kv1
select 1,"a1"
union all
select 2,"b1"
union all
select 3,"c1";
   
   
insert into table kv2
select 4,"a2"
union all
select 2,"b2"
union all
select 3,"c2";   

left semi join:==>left semi +inner join 可以这样理解

select  * from
kv1 left semi join kv2
on kv1.k1=kv2.k2;
+---------+---------+--+
| kv1.k1  | kv1.v1  |
+---------+---------+--+
| 2       | b1      |
| 3       | c1      |
+---------+---------+--+

left join:

select  * from
kv1 left  join kv2
on kv1.k1=kv2.k2;
+---------+---------+---------+---------+--+
| kv1.k1  | kv1.v1  | kv2.k2  | kv2.v2  |
+---------+---------+---------+---------+--+
| 1       | a1      | NULL    | NULL    |
| 2       | b1      | 2       | b2      |
| 3       | c1      | 3       | c2      |
+---------+---------+---------+---------+--+

猜你喜欢

转载自www.cnblogs.com/wqbin/p/11048796.html