1、创建分区表
create table partition1(id int,name string) partitioned by (month string) row format delimited fields terminated by '\t';
(注意分区字段不是表中存在的数据)
2、加载数据
load data local inpath '/opt/module/...' into table 数据库名.表名 partition(month='201901')
3、创建二级分区并加载数据
#创建分区
create table partition2(id int,name string) partitioned by (month string,day string) row format delimited fields terminated by '\t';
#加载数据到分区
#分区month='201901',day='01'
load data local inpath '/opt/module/...' into table 数据库名.表名 partition (month='201901',day='01');
#分区month='201901',day='02'
load data local inpath '/opt/module/...' into table 数据库名.表名 partition (month='201901',day='02');
#分区month='201901',day='03'
load data local inpath '/opt/module/...' into table 数据库名.表名 partition (month='201901',day='03');
#分区month='201902',day='01'
load data local inpath '/opt/module/...' into table 数据库名.表名 partition (month='201902',day='01');
#分区month='201902',day='02'
load data local inpath '/opt/module/...' into table 数据库名.表名 partition (month='201902',day='02');
#分区month='201902',day='03'
load data local inpath '/opt/module/...' into table 数据库名.表名 partition (month='201902',day='03');
4、联合查询查询分区数据
select * from partition1 where month='201901' and day='01'
union
select * from partition1 where month='201901' and day='02'
union
select * from partition1 where month='201901' and day='03';
5、增加/删除分区(可以一次操作多个分区)
alter table 表名 add[drop] partition(month=' ') partition(modnth=' ');
6、查看分区表有多少分区
show partitions 分区表名;
7、把数据直接上传到分区目录,让分区表和数据产生关联
创建目录->上传数据->执行修复->查询数据
//创建目录
hive (default)> dfs -mkdir -p /user/.../partition1/month=202005/day=1;
//上传数据
hive (default)> dfs -put /opt/datas/dept.txt /user/.../partition1/month=202005/day=1;
//执行修复
hive> msck repair table partition1;
//查询数据
hive> select * from partition1 where month='202005' and day='1';