大数据：hive3

hive里面的类型转换
select '1'+2 , cast( '1' as int ) +2;
OK
3.0     3
-------------------------------------------------------------------------------------------

显示数据库
 show databases;
OK
default
mydatabase
postman
product_postman
-------------------------------------------------------------------------------------------
查看数据库详情

hive> desc database product_postman;
OK
product_postman         hdfs://jsptpd/apps/hive/warehouse/product_postman.db    hive    USER
Time taken: 0.227 seconds, Fetched: 1 row(s)
hive> set  hive.cli.print.current.db=true
    > ;
hive (default)> set hive.cli.print.header=true;
hive (default)> desc database product_postman;
OK
db_name comment location        owner_name      owner_type      parameters
product_postman         hdfs://jsptpd/apps/hive/warehouse/product_postman.db    hive    USER
Time taken: 0.225 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------
hive (default)>  desc database extended product_postman;
OK
db_name comment location        owner_name      owner_type      parameters
product_postman         hdfs://jsptpd/apps/hive/warehouse/product_postman.db    hive    USER
Time taken: 0.231 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------
切换数据库
hive (default)> use product_postman;
OK
Time taken: 0.436 seconds
-------------------------------------------------------------------------------------------
修改数据库
Time taken: 0.436 seconds
hive (product_postman)> alter database product_postman  set dbproperties('updatetime'='20170830');
OK
Time taken: 0.404 seconds
hive (product_postman)>  desc database extended product_postman;
OK
db_name comment location        owner_name      owner_type      parameters
product_postman         hdfs://jsptpd/apps/hive/warehouse/product_postman.db    hive    USER    {updatetime=20170830}
Time taken: 0.223 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------

删除有表的库
hive (hive_db2)> drop database hive_db2;
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. InvalidOperationException(message:Database hive_db2 is not empty. One or more tables exist.)
强制删除 切不可用
current_user(            
hive (hive_db2)> drop database hive_db2 cascade;
OK
Time taken: 1.047 seconds

-------------------------------------------------------------------------------------------
普通创建表：
create table if not exists student2(
id int, name string
)
row format delimited fields terminated by '\t'
stored as textfile
location '/user/hive/warehouse/student2';
 ------------------------------------------------------------------------------------------
hive (hive_db2)> desc formatted student2;
OK
col_name        data_type       comment
# col_name              data_type               comment             
                 
   
Table Type:             MANAGED_TABLE   

--------------------------------------------------------------------------------------------         
 
hive (hive_db2)> create external table if not exists student2(
               > id int, name string
               > )
               > row format delimited fields terminated by '\t'
               > stored as textfile
               > location '/user/hive/warehouse/student2';
OK
Time taken: 0.288 seconds
hive (hive_db2)> drop table student2;
OK
Time taken: 0.486 seconds
hive (hive_db2)> dfs -ls /user/hive/warehouse/student2;
hive (hive_db2)> dfs -ls /user/hive/warehouse/;
Found 1 items
drwxr-xr-x   - hive hdfs          0 2020-02-15 19:36 /user/hive/warehouse/student2
-------------------------------------------------------------------------------------------
desc formatted student2;
OK
col_name        data_type       comment
# col_name              data_type               comment             
                 
       
Table Type:             EXTERNAL_TABLE           
  
Time taken: 0.431 seconds, Fetched: 31 row(s)
------------------------------------
如果是外表，则不删除表，如是管理表，则删除数据
---------------------------------------------------------------------------------------------
管理表与外部表的互相转换
desc formatted students;
hive (hive_db2)> alter  table student2 set tblproperties('EXTERNAL'='TRUE');
OK
Time taken: 0.356 seconds
hive (hive_db2)> desc formatted student2;
OK
   
Table Type:             EXTERNAL_TABLE           
 
hive (hive_db2)> 


1．引入分区表（需要根据日期对日志进行管理）
/user/hive/warehouse/log_partition/20170702/20170702.log
/user/hive/warehouse/log_partition/20170703/20170703.log
/user/hive/warehouse/log_partition/20170704/20170704.log
创建分区表语法
hive (default)> create table dept_partition(
deptno int, dname string, loc string
)
partitioned by (month string)
row format delimited fields terminated by '\t';
注意：分区字段不能是表中已经存在的数据，可以将分区字段看作表的伪列。
3．加载数据到分区表中
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201709');
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201708');
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201707’);
注意：分区表加载数据时，必须指定分区
4．查询分区表中数据
	单分区查询
hive (default)> select * from dept_partition where month='201709';
多分区联合查询
hive (default)> select * from dept_partition where month='201709'
              union
              select * from dept_partition where month='201708'
              union
              select * from dept_partition where month='201707';

增加分区
创建单个分区
hive (default)> alter table dept_partition add partition(month='201706') ;
	同时创建多个分区
hive (default)> alter table dept_partition add partition(month='201705') partition(month='201704');

6．删除分区
	删除单个分区
hive (default)> alter table dept_partition drop partition (month='201704');
同时删除多个分区
hive (default)> alter table dept_partition drop partition (month='201705'), partition (month='201706');

．查看分区表有多少分区
hive> show partitions dept_partition;
8．查看分区表结构
hive> desc formatted dept_partition;

# Partition Information          
# col_name              data_type               comment             
month                   string   

创建二级分区表
hive (default)> create table dept_partition2(
               deptno int, dname string, loc string
               )
               partitioned by (month string, day string)
               row format delimited fields terminated by '\t';

正常的加载数据
（1）加载数据到二级分区表中
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table
 default.dept_partition2 partition(month='201709', day='13');
（2）查询分区数据
hive (default)> select * from dept_partition2 where month='201709' and day='13';
把数据直接上传到分区目录上，让分区表和数据产生关联的三种方式
（1）方式一：上传数据后修复
	上传数据
hive (default)> dfs -mkdir -p
 /user/hive/warehouse/dept_partition2/month=201709/day=12;
hive (default)> dfs -put /opt/module/datas/dept.txt  /user/hive/warehouse/dept_partition2/month=201709/day=12;
	查询数据（查询不到刚上传的数据）
hive (default)> select * from dept_partition2 where month='201709' and day='12';
执行修复命令
hive> msck repair table dept_partition2;
再次查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='12';
方式二：上传数据后添加分区
	上传数据
hive (default)> dfs -mkdir -p
 /user/hive/warehouse/dept_partition2/month=201709/day=11;
hive (default)> dfs -put /opt/module/datas/dept.txt  /user/hive/warehouse/dept_partition2/month=201709/day=11;
	执行添加分区
	hive (default)> alter table dept_partition2 add partition(month='201709',
 day='11');
	查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='11';
（3）方式三：创建文件夹后load数据到分区
		创建目录
hive (default)> dfs -mkdir -p
 /user/hive/warehouse/dept_partition2/month=201709/day=10;
上传数据
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table
 dept_partition2 partition(month='201709',day='10');
查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='10';


修改表
4.7.1 重命名表
1．语法
ALTER TABLE table_name RENAME TO new_table_name
2．实操案例
hive (default)> alter table dept_partition2 rename to dept_partition3;
4.7.2 增加、修改和删除表分区


4.7.3 增加/修改/替换列信息
1．语法
	更新列
ALTER TABLE table_name CHANGE [COLUMN] col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name]
增加和替换列
ALTER TABLE table_name ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...) 
注：ADD是代表新增一字段，字段位置在所有列后面(partition列前)，REPLACE则是表示替换表中所有字段。
（1）查询表结构
hive> desc dept_partition;
（2）添加列
hive (default)> alter table dept_partition add columns(deptdesc string);
（3）查询表结构
hive> desc dept_partition;
（4）更新列
hive (default)> alter table dept_partition change column deptdesc desc int;
（5）查询表结构
hive> desc dept_partition;
（6）替换列
hive (default)> alter table dept_partition replace columns(deptno string, dname
 string, loc string);
（7）查询表结构
hive> desc dept_partition;
删除表
hive (default)> drop table dept_partition;

DML数据操作

数据导入

 向表中装载数据（Load）
1．语法
hive> load data [local] inpath '/opt/module/datas/student.txt' [overwrite] into table student [partition (partcol1=val1,…)];
（1）load data:表示加载数据
（2）local:表示从本地加载数据到hive表；否则从HDFS加载数据到hive表
（3）inpath:表示加载数据的路径
（4）overwrite:表示覆盖表中已有数据，否则表示追加
（5）into table:表示加载到哪张表
（6）student:表示具体的表
（7）partition:表示上传到指定分区
实操案例
	（0）创建一张表
hive (default)> create table student(id string, name string) row format delimited fields terminated by '\t';
（1）加载本地文件到hive
hive (default)> load data local inpath '/opt/module/datas/student.txt' into table default.student;
（2）加载HDFS文件到hive中
	上传文件到HDFS
hive (default)> dfs -put /opt/module/datas/student.txt /user/atguigu/hive;
加载HDFS上数据
hive (default)> load data inpath '/user/atguigu/hive/student.txt' into table default.student;
（3）加载数据覆盖表中已有的数据
上传文件到HDFS
hive (default)> dfs -put /opt/module/datas/student.txt /user/atguigu/hive;
加载数据覆盖表中已有的数据
hive (default)> load data inpath '/user/atguigu/hive/student.txt' overwrite into table default.student;
1．创建一张分区表
hive (default)> create table student(id int, name string) partitioned by (month string) row format delimited fields terminated by '\t';
2．基本插入数据
hive (default)> insert into table  student partition(month='201709') values(1,'wangwu'),(2,’zhaoliu’);
3．基本模式插入（根据单张表查询结果）
hive (default)> insert overwrite table student partition(month='201708')
             select id, name from student where month='201709';
insert into：以追加数据的方式插入到表或分区，原有数据不会删除
insert overwrite：会覆盖表或分区中已存在的数据
注意：insert不支持插入部分字段
多表（多分区）插入模式（根据多张表查询结果）
hive (default)> from student
              insert overwrite table student partition(month='201707')
              select id, name where month='201709'
              insert overwrite table student partition(month='201706')
              select id, name where month='201709';
根据查询结果创建表（查询的结果会添加到新创建的表中）
create table if not exists student3
as select id, name from student;
1．上传数据到hdfs上
hive (default)> dfs -mkdir /student;
hive (default)> dfs -put /opt/module/datas/student.txt /student;
2.	创建表，并指定在hdfs上的位置
hive (default)> create external table if not exists student5(
              id int, name string
              )
              row format delimited fields terminated by '\t'
              location '/student;
 Import数据到指定Hive表中
注意：先用export导出后，再将数据导入。
hive (default)> import table student2 partition(month='201709') from
 '/user/hive/warehouse/export/student';
数据导出
5.2.1 Insert导出
1．将查询的结果导出到本地
hive (default)> insert overwrite local directory '/opt/module/datas/export/student'
            select * from student;
2．将查询的结果格式化导出到本地
hive(default)>insert overwrite local directory '/opt/module/datas/export/student1'
           ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'             select * from student;
3．将查询的结果导出到HDFS上(没有local)
hive (default)> insert overwrite directory '/user/atguigu/student2'
             ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' 
             select * from student;
数据导出
5.2.1 Insert导出
1．将查询的结果导出到本地
hive (default)> insert overwrite local directory '/opt/module/datas/export/student'
            select * from student;
2．将查询的结果格式化导出到本地
hive(default)>insert overwrite local directory '/opt/module/datas/export/student1'
           ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'             select * from student;
3．将查询的结果导出到HDFS上(没有local)
hive (default)> insert overwrite directory '/user/atguigu/student2'
             ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' 
             select * from student;

 Hadoop命令导出到本地
hive (default)> dfs -get /user/hive/warehouse/student/month=201709/000000_0
/opt/module/datas/export/student3.txt;
5.2.3 Hive Shell 命令导出
基本语法：（hive -f/-e 执行语句或者脚本 > file）
[atguigu@hadoop102 hive]$ bin/hive -e 'select * from default.student;' >
 /opt/module/datas/export/student4.txt;
5.2.4 Export导出到HDFS上
(defahiveult)> export table default.student to
 '/user/hive/warehouse/export/student';
export和import主要用于两个Hadoop平台集群之间Hive表迁移。
清除表中数据（Truncate）
注意：Truncate只能删除管理表，不能删除外部表中数据
hive (default)> truncate table student;

查询

查询语句语法：
[WITH CommonTableExpression (, CommonTableExpression)*]    (Note: Only available
 starting with Hive 0.13.0)
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
  FROM table_reference
  [WHERE where_condition]
  [GROUP BY col_list]
  [ORDER BY col_list]
  [CLUSTER BY col_list
    | [DISTRIBUTE BY col_list] [SORT BY col_list]
  ]
 [LIMIT number]


全表和特定列查询
创建部门表
create table if not exists dept(
deptno int,
dname string,
loc int
)
row format delimited fields terminated by '\t';

半_调_子

发布了131 篇原创文章 · 获赞 27 · 访问量 32万+

私信关注

猜你喜欢