pig基本语法——join

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wild46cat/article/details/82728502

主要参考:

https://book.itxueyuan.com/3b7D/PDLO

1、基础数据:

==============================================

[root@cdh1 data]# cat demodata

xiaoxiao,12,12.1f

aaa,13,1.1f

kjkj,12,12.1f

ddf,19,12.8f

youyou,89,12.3f

[root@cdh1 data]# cat demodata2

xiaoxiao,99,aaaaaaaaaaaa

aaa,88,bbbbbbbbbbb

kjkj,77,ccccccccccc

ddf,66,dddddddddd

xuexue,11,sdfsdfsdfsdf

==============================================

2、innerjoin

grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);

grunt> B = load '/root/xytest/pig/data/demodata2' using PigStorage(',') as (name:chararray,score:int,address:chararray);

grunt> C = join A by name,B by name;

grunt> dump C;

输出结果:

(aaa,13,1.1,aaa,88,bbbbbbbbbbb)

(ddf,19,12.8,ddf,66,dddddddddd)

(kjkj,12,12.1,kjkj,77,ccccccccccc)

(xiaoxiao,12,12.1,xiaoxiao,99,aaaaaaaaaaaa)

3、left outer join

grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);

grunt> B = load '/root/xytest/pig/data/demodata2' using PigStorage(',') as (name:chararray,score:int,address:chararray);

grunt> D = join A by name left,B by name;

grunt> dump D;

输出结果:

(aaa,13,1.1,aaa,88,bbbbbbbbbbb)

(ddf,19,12.8,ddf,66,dddddddddd)

(kjkj,12,12.1,kjkj,77,ccccccccccc)

(youyou,89,12.3,,,)

(xiaoxiao,12,12.1,xiaoxiao,99,aaaaaaaaaaaa)

4、right outer join

grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);

grunt> B = load '/root/xytest/pig/data/demodata2' using PigStorage(',') as (name:chararray,score:int,address:chararray);

grunt> E = join A by name right,B by name;

grunt> dump E;

输出结果:

(aaa,13,1.1,aaa,88,bbbbbbbbbbb)

(ddf,19,12.8,ddf,66,dddddddddd)

(kjkj,12,12.1,kjkj,77,ccccccccccc)

(,,,xuexue,11,sdfsdfsdfsdf)

(xiaoxiao,12,12.1,xiaoxiao,99,aaaaaaaaaaaa)

猜你喜欢

转载自blog.csdn.net/wild46cat/article/details/82728502
pig