hive里面的类型转换
select '1'+2 , cast( '1' as int ) +2;
OK
3.0 3
-------------------------------------------------------------------------------------------
显示数据库
show databases;
OK
default
mydatabase
postman
product_postman
-------------------------------------------------------------------------------------------
查看数据库详情
hive> desc database product_postman;
OK
product_postman hdfs://jsptpd/apps/hive/warehouse/product_postman.db hive USER
Time taken: 0.227 seconds, Fetched: 1 row(s)
hive> set hive.cli.print.current.db=true
> ;
hive (default)> set hive.cli.print.header=true;
hive (default)> desc database product_postman;
OK
db_name comment location owner_name owner_type parameters
product_postman hdfs://jsptpd/apps/hive/warehouse/product_postman.db hive USER
Time taken: 0.225 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------
hive (default)> desc database extended product_postman;
OK
db_name comment location owner_name owner_type parameters
product_postman hdfs://jsptpd/apps/hive/warehouse/product_postman.db hive USER
Time taken: 0.231 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------
切换数据库
hive (default)> use product_postman;
OK
Time taken: 0.436 seconds
-------------------------------------------------------------------------------------------
修改数据库
Time taken: 0.436 seconds
hive (product_postman)> alter database product_postman set dbproperties('updatetime'='20170830');
OK
Time taken: 0.404 seconds
hive (product_postman)> desc database extended product_postman;
OK
db_name comment location owner_name owner_type parameters
product_postman hdfs://jsptpd/apps/hive/warehouse/product_postman.db hive USER {updatetime=20170830}
Time taken: 0.223 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------
删除有表的库
hive (hive_db2)> drop database hive_db2;
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. InvalidOperationException(message:Database hive_db2 is not empty. One or more tables exist.)
强制删除 切不可用
current_user(
hive (hive_db2)> drop database hive_db2 cascade;
OK
Time taken: 1.047 seconds
-------------------------------------------------------------------------------------------
普通创建表:
create table if not exists student2(
id int, name string
)
row format delimited fields terminated by '\t'
stored as textfile
location '/user/hive/warehouse/student2';
------------------------------------------------------------------------------------------
hive (hive_db2)> desc formatted student2;
OK
col_name data_type comment
# col_name data_type comment
Table Type: MANAGED_TABLE
--------------------------------------------------------------------------------------------
hive (hive_db2)> create external table if not exists student2(
> id int, name string
> )
> row format delimited fields terminated by '\t'
> stored as textfile
> location '/user/hive/warehouse/student2';
OK
Time taken: 0.288 seconds
hive (hive_db2)> drop table student2;
OK
Time taken: 0.486 seconds
hive (hive_db2)> dfs -ls /user/hive/warehouse/student2;
hive (hive_db2)> dfs -ls /user/hive/warehouse/;
Found 1 items
drwxr-xr-x - hive hdfs 0 2020-02-15 19:36 /user/hive/warehouse/student2
-------------------------------------------------------------------------------------------
desc formatted student2;
OK
col_name data_type comment
# col_name data_type comment
Table Type: EXTERNAL_TABLE
Time taken: 0.431 seconds, Fetched: 31 row(s)
------------------------------------
如果是外表,则不删除表,如是管理表,则删除数据
---------------------------------------------------------------------------------------------
管理表与外部表的互相转换
desc formatted students;
hive (hive_db2)> alter table student2 set tblproperties('EXTERNAL'='TRUE');
OK
Time taken: 0.356 seconds
hive (hive_db2)> desc formatted student2;
OK
Table Type: EXTERNAL_TABLE
hive (hive_db2)>
1.引入分区表(需要根据日期对日志进行管理)
/user/hive/warehouse/log_partition/20170702/20170702.log
/user/hive/warehouse/log_partition/20170703/20170703.log
/user/hive/warehouse/log_partition/20170704/20170704.log
创建分区表语法
hive (default)> create table dept_partition(
deptno int, dname string, loc string
)
partitioned by (month string)
row format delimited fields terminated by '\t';
注意:分区字段不能是表中已经存在的数据,可以将分区字段看作表的伪列。
3.加载数据到分区表中
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201709');
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201708');
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201707’);
注意:分区表加载数据时,必须指定分区
4.查询分区表中数据
单分区查询
hive (default)> select * from dept_partition where month='201709';
多分区联合查询
hive (default)> select * from dept_partition where month='201709'
union
select * from dept_partition where month='201708'
union
select * from dept_partition where month='201707';
增加分区
创建单个分区
hive (default)> alter table dept_partition add partition(month='201706') ;
同时创建多个分区
hive (default)> alter table dept_partition add partition(month='201705') partition(month='201704');
6.删除分区
删除单个分区
hive (default)> alter table dept_partition drop partition (month='201704');
同时删除多个分区
hive (default)> alter table dept_partition drop partition (month='201705'), partition (month='201706');
.查看分区表有多少分区
hive> show partitions dept_partition;
8.查看分区表结构
hive> desc formatted dept_partition;
# Partition Information
# col_name data_type comment
month string
创建二级分区表
hive (default)> create table dept_partition2(
deptno int, dname string, loc string
)
partitioned by (month string, day string)
row format delimited fields terminated by '\t';
正常的加载数据
(1)加载数据到二级分区表中
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table
default.dept_partition2 partition(month='201709', day='13');
(2)查询分区数据
hive (default)> select * from dept_partition2 where month='201709' and day='13';
把数据直接上传到分区目录上,让分区表和数据产生关联的三种方式
(1)方式一:上传数据后修复
上传数据
hive (default)> dfs -mkdir -p
/user/hive/warehouse/dept_partition2/month=201709/day=12;
hive (default)> dfs -put /opt/module/datas/dept.txt /user/hive/warehouse/dept_partition2/month=201709/day=12;
查询数据(查询不到刚上传的数据)
hive (default)> select * from dept_partition2 where month='201709' and day='12';
执行修复命令
hive> msck repair table dept_partition2;
再次查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='12';
方式二:上传数据后添加分区
上传数据
hive (default)> dfs -mkdir -p
/user/hive/warehouse/dept_partition2/month=201709/day=11;
hive (default)> dfs -put /opt/module/datas/dept.txt /user/hive/warehouse/dept_partition2/month=201709/day=11;
执行添加分区
hive (default)> alter table dept_partition2 add partition(month='201709',
day='11');
查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='11';
(3)方式三:创建文件夹后load数据到分区
创建目录
hive (default)> dfs -mkdir -p
/user/hive/warehouse/dept_partition2/month=201709/day=10;
上传数据
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table
dept_partition2 partition(month='201709',day='10');
查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='10';
修改表
4.7.1 重命名表
1.语法
ALTER TABLE table_name RENAME TO new_table_name
2.实操案例
hive (default)> alter table dept_partition2 rename to dept_partition3;
4.7.2 增加、修改和删除表分区
4.7.3 增加/修改/替换列信息
1.语法
更新列
ALTER TABLE table_name CHANGE [COLUMN] col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name]
增加和替换列
ALTER TABLE table_name ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
注:ADD是代表新增一字段,字段位置在所有列后面(partition列前),REPLACE则是表示替换表中所有字段。
(1)查询表结构
hive> desc dept_partition;
(2)添加列
hive (default)> alter table dept_partition add columns(deptdesc string);
(3)查询表结构
hive> desc dept_partition;
(4)更新列
hive (default)> alter table dept_partition change column deptdesc desc int;
(5)查询表结构
hive> desc dept_partition;
(6)替换列
hive (default)> alter table dept_partition replace columns(deptno string, dname
string, loc string);
(7)查询表结构
hive> desc dept_partition;
删除表
hive (default)> drop table dept_partition;
DML数据操作
数据导入
向表中装载数据(Load)
1.语法
hive> load data [local] inpath '/opt/module/datas/student.txt' [overwrite] into table student [partition (partcol1=val1,…)];
(1)load data:表示加载数据
(2)local:表示从本地加载数据到hive表;否则从HDFS加载数据到hive表
(3)inpath:表示加载数据的路径
(4)overwrite:表示覆盖表中已有数据,否则表示追加
(5)into table:表示加载到哪张表
(6)student:表示具体的表
(7)partition:表示上传到指定分区
实操案例
(0)创建一张表
hive (default)> create table student(id string, name string) row format delimited fields terminated by '\t';
(1)加载本地文件到hive
hive (default)> load data local inpath '/opt/module/datas/student.txt' into table default.student;
(2)加载HDFS文件到hive中
上传文件到HDFS
hive (default)> dfs -put /opt/module/datas/student.txt /user/atguigu/hive;
加载HDFS上数据
hive (default)> load data inpath '/user/atguigu/hive/student.txt' into table default.student;
(3)加载数据覆盖表中已有的数据
上传文件到HDFS
hive (default)> dfs -put /opt/module/datas/student.txt /user/atguigu/hive;
加载数据覆盖表中已有的数据
hive (default)> load data inpath '/user/atguigu/hive/student.txt' overwrite into table default.student;
1.创建一张分区表
hive (default)> create table student(id int, name string) partitioned by (month string) row format delimited fields terminated by '\t';
2.基本插入数据
hive (default)> insert into table student partition(month='201709') values(1,'wangwu'),(2,’zhaoliu’);
3.基本模式插入(根据单张表查询结果)
hive (default)> insert overwrite table student partition(month='201708')
select id, name from student where month='201709';
insert into:以追加数据的方式插入到表或分区,原有数据不会删除
insert overwrite:会覆盖表或分区中已存在的数据
注意:insert不支持插入部分字段
多表(多分区)插入模式(根据多张表查询结果)
hive (default)> from student
insert overwrite table student partition(month='201707')
select id, name where month='201709'
insert overwrite table student partition(month='201706')
select id, name where month='201709';
根据查询结果创建表(查询的结果会添加到新创建的表中)
create table if not exists student3
as select id, name from student;
1.上传数据到hdfs上
hive (default)> dfs -mkdir /student;
hive (default)> dfs -put /opt/module/datas/student.txt /student;
2. 创建表,并指定在hdfs上的位置
hive (default)> create external table if not exists student5(
id int, name string
)
row format delimited fields terminated by '\t'
location '/student;
Import数据到指定Hive表中
注意:先用export导出后,再将数据导入。
hive (default)> import table student2 partition(month='201709') from
'/user/hive/warehouse/export/student';
数据导出
5.2.1 Insert导出
1.将查询的结果导出到本地
hive (default)> insert overwrite local directory '/opt/module/datas/export/student'
select * from student;
2.将查询的结果格式化导出到本地
hive(default)>insert overwrite local directory '/opt/module/datas/export/student1'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' select * from student;
3.将查询的结果导出到HDFS上(没有local)
hive (default)> insert overwrite directory '/user/atguigu/student2'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select * from student;
数据导出
5.2.1 Insert导出
1.将查询的结果导出到本地
hive (default)> insert overwrite local directory '/opt/module/datas/export/student'
select * from student;
2.将查询的结果格式化导出到本地
hive(default)>insert overwrite local directory '/opt/module/datas/export/student1'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' select * from student;
3.将查询的结果导出到HDFS上(没有local)
hive (default)> insert overwrite directory '/user/atguigu/student2'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select * from student;
Hadoop命令导出到本地
hive (default)> dfs -get /user/hive/warehouse/student/month=201709/000000_0
/opt/module/datas/export/student3.txt;
5.2.3 Hive Shell 命令导出
基本语法:(hive -f/-e 执行语句或者脚本 > file)
[atguigu@hadoop102 hive]$ bin/hive -e 'select * from default.student;' >
/opt/module/datas/export/student4.txt;
5.2.4 Export导出到HDFS上
(defahiveult)> export table default.student to
'/user/hive/warehouse/export/student';
export和import主要用于两个Hadoop平台集群之间Hive表迁移。
清除表中数据(Truncate)
注意:Truncate只能删除管理表,不能删除外部表中数据
hive (default)> truncate table student;
查询
查询语句语法:
[WITH CommonTableExpression (, CommonTableExpression)*] (Note: Only available
starting with Hive 0.13.0)
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[ORDER BY col_list]
[CLUSTER BY col_list
| [DISTRIBUTE BY col_list] [SORT BY col_list]
]
[LIMIT number]
全表和特定列查询
创建部门表
create table if not exists dept(
deptno int,
dname string,
loc int
)
row format delimited fields terminated by '\t';