大数据:hive3

hive里面的类型转换
select '1'+2 , cast( '1' as int ) +2;
OK
3.0     3
-------------------------------------------------------------------------------------------

显示数据库
 show databases;
OK
default
mydatabase
postman
product_postman
-------------------------------------------------------------------------------------------
查看数据库详情

hive> desc database product_postman;
OK
product_postman         hdfs://jsptpd/apps/hive/warehouse/product_postman.db    hive    USER
Time taken: 0.227 seconds, Fetched: 1 row(s)
hive> set  hive.cli.print.current.db=true
    > ;
hive (default)> set hive.cli.print.header=true;
hive (default)> desc database product_postman;
OK
db_name comment location        owner_name      owner_type      parameters
product_postman         hdfs://jsptpd/apps/hive/warehouse/product_postman.db    hive    USER
Time taken: 0.225 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------
hive (default)>  desc database extended product_postman;
OK
db_name comment location        owner_name      owner_type      parameters
product_postman         hdfs://jsptpd/apps/hive/warehouse/product_postman.db    hive    USER
Time taken: 0.231 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------
切换数据库
hive (default)> use product_postman;
OK
Time taken: 0.436 seconds
-------------------------------------------------------------------------------------------
修改数据库
Time taken: 0.436 seconds
hive (product_postman)> alter database product_postman  set dbproperties('updatetime'='20170830');
OK
Time taken: 0.404 seconds
hive (product_postman)>  desc database extended product_postman;
OK
db_name comment location        owner_name      owner_type      parameters
product_postman         hdfs://jsptpd/apps/hive/warehouse/product_postman.db    hive    USER    {updatetime=20170830}
Time taken: 0.223 seconds, Fetched: 1 row(s)
-------------------------------------------------------------------------------------------

删除有表的库
hive (hive_db2)> drop database hive_db2;
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. InvalidOperationException(message:Database hive_db2 is not empty. One or more tables exist.)
强制删除 切不可用
current_user(            
hive (hive_db2)> drop database hive_db2 cascade;
OK
Time taken: 1.047 seconds

-------------------------------------------------------------------------------------------
普通创建表:
create table if not exists student2(
id int, name string
)
row format delimited fields terminated by '\t'
stored as textfile
location '/user/hive/warehouse/student2';
 ------------------------------------------------------------------------------------------
hive (hive_db2)> desc formatted student2;
OK
col_name        data_type       comment
# col_name              data_type               comment             
                 
   
Table Type:             MANAGED_TABLE   

--------------------------------------------------------------------------------------------         
 
hive (hive_db2)> create external table if not exists student2(
               > id int, name string
               > )
               > row format delimited fields terminated by '\t'
               > stored as textfile
               > location '/user/hive/warehouse/student2';
OK
Time taken: 0.288 seconds
hive (hive_db2)> drop table student2;
OK
Time taken: 0.486 seconds
hive (hive_db2)> dfs -ls /user/hive/warehouse/student2;
hive (hive_db2)> dfs -ls /user/hive/warehouse/;
Found 1 items
drwxr-xr-x   - hive hdfs          0 2020-02-15 19:36 /user/hive/warehouse/student2
-------------------------------------------------------------------------------------------
desc formatted student2;
OK
col_name        data_type       comment
# col_name              data_type               comment             
                 
       
Table Type:             EXTERNAL_TABLE           
  
Time taken: 0.431 seconds, Fetched: 31 row(s)
------------------------------------
如果是外表,则不删除表,如是管理表,则删除数据
---------------------------------------------------------------------------------------------
管理表与外部表的互相转换
desc formatted students;
hive (hive_db2)> alter  table student2 set tblproperties('EXTERNAL'='TRUE');
OK
Time taken: 0.356 seconds
hive (hive_db2)> desc formatted student2;
OK
   
Table Type:             EXTERNAL_TABLE           
 
hive (hive_db2)> 


1.引入分区表(需要根据日期对日志进行管理)
/user/hive/warehouse/log_partition/20170702/20170702.log
/user/hive/warehouse/log_partition/20170703/20170703.log
/user/hive/warehouse/log_partition/20170704/20170704.log
创建分区表语法
hive (default)> create table dept_partition(
deptno int, dname string, loc string
)
partitioned by (month string)
row format delimited fields terminated by '\t';
注意:分区字段不能是表中已经存在的数据,可以将分区字段看作表的伪列。
3.加载数据到分区表中
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201709');
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201708');
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table default.dept_partition partition(month='201707’);
注意:分区表加载数据时,必须指定分区
4.查询分区表中数据
	单分区查询
hive (default)> select * from dept_partition where month='201709';
多分区联合查询
hive (default)> select * from dept_partition where month='201709'
              union
              select * from dept_partition where month='201708'
              union
              select * from dept_partition where month='201707';

增加分区
创建单个分区
hive (default)> alter table dept_partition add partition(month='201706') ;
	同时创建多个分区
hive (default)> alter table dept_partition add partition(month='201705') partition(month='201704');

6.删除分区
	删除单个分区
hive (default)> alter table dept_partition drop partition (month='201704');
同时删除多个分区
hive (default)> alter table dept_partition drop partition (month='201705'), partition (month='201706');

.查看分区表有多少分区
hive> show partitions dept_partition;
8.查看分区表结构
hive> desc formatted dept_partition;

# Partition Information          
# col_name              data_type               comment             
month                   string   

创建二级分区表
hive (default)> create table dept_partition2(
               deptno int, dname string, loc string
               )
               partitioned by (month string, day string)
               row format delimited fields terminated by '\t';

正常的加载数据
(1)加载数据到二级分区表中
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table
 default.dept_partition2 partition(month='201709', day='13');
(2)查询分区数据
hive (default)> select * from dept_partition2 where month='201709' and day='13';
把数据直接上传到分区目录上,让分区表和数据产生关联的三种方式
(1)方式一:上传数据后修复
	上传数据
hive (default)> dfs -mkdir -p
 /user/hive/warehouse/dept_partition2/month=201709/day=12;
hive (default)> dfs -put /opt/module/datas/dept.txt  /user/hive/warehouse/dept_partition2/month=201709/day=12;
	查询数据(查询不到刚上传的数据)
hive (default)> select * from dept_partition2 where month='201709' and day='12';
执行修复命令
hive> msck repair table dept_partition2;
再次查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='12';
方式二:上传数据后添加分区
	上传数据
hive (default)> dfs -mkdir -p
 /user/hive/warehouse/dept_partition2/month=201709/day=11;
hive (default)> dfs -put /opt/module/datas/dept.txt  /user/hive/warehouse/dept_partition2/month=201709/day=11;
	执行添加分区
	hive (default)> alter table dept_partition2 add partition(month='201709',
 day='11');
	查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='11';
(3)方式三:创建文件夹后load数据到分区
		创建目录
hive (default)> dfs -mkdir -p
 /user/hive/warehouse/dept_partition2/month=201709/day=10;
上传数据
hive (default)> load data local inpath '/opt/module/datas/dept.txt' into table
 dept_partition2 partition(month='201709',day='10');
查询数据
hive (default)> select * from dept_partition2 where month='201709' and day='10';


修改表
4.7.1 重命名表
1.语法
ALTER TABLE table_name RENAME TO new_table_name
2.实操案例
hive (default)> alter table dept_partition2 rename to dept_partition3;
4.7.2 增加、修改和删除表分区


4.7.3 增加/修改/替换列信息
1.语法
	更新列
ALTER TABLE table_name CHANGE [COLUMN] col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name]
增加和替换列
ALTER TABLE table_name ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...) 
注:ADD是代表新增一字段,字段位置在所有列后面(partition列前),REPLACE则是表示替换表中所有字段。
(1)查询表结构
hive> desc dept_partition;
(2)添加列
hive (default)> alter table dept_partition add columns(deptdesc string);
(3)查询表结构
hive> desc dept_partition;
(4)更新列
hive (default)> alter table dept_partition change column deptdesc desc int;
(5)查询表结构
hive> desc dept_partition;
(6)替换列
hive (default)> alter table dept_partition replace columns(deptno string, dname
 string, loc string);
(7)查询表结构
hive> desc dept_partition;
删除表
hive (default)> drop table dept_partition;



 

DML数据操作

数据导入

 向表中装载数据(Load)
1.语法
hive> load data [local] inpath '/opt/module/datas/student.txt' [overwrite] into table student [partition (partcol1=val1,…)];
(1)load data:表示加载数据
(2)local:表示从本地加载数据到hive表;否则从HDFS加载数据到hive表
(3)inpath:表示加载数据的路径
(4)overwrite:表示覆盖表中已有数据,否则表示追加
(5)into table:表示加载到哪张表
(6)student:表示具体的表
(7)partition:表示上传到指定分区
实操案例
	(0)创建一张表
hive (default)> create table student(id string, name string) row format delimited fields terminated by '\t';
(1)加载本地文件到hive
hive (default)> load data local inpath '/opt/module/datas/student.txt' into table default.student;
(2)加载HDFS文件到hive中
	上传文件到HDFS
hive (default)> dfs -put /opt/module/datas/student.txt /user/atguigu/hive;
加载HDFS上数据
hive (default)> load data inpath '/user/atguigu/hive/student.txt' into table default.student;
(3)加载数据覆盖表中已有的数据
上传文件到HDFS
hive (default)> dfs -put /opt/module/datas/student.txt /user/atguigu/hive;
加载数据覆盖表中已有的数据
hive (default)> load data inpath '/user/atguigu/hive/student.txt' overwrite into table default.student;
1.创建一张分区表
hive (default)> create table student(id int, name string) partitioned by (month string) row format delimited fields terminated by '\t';
2.基本插入数据
hive (default)> insert into table  student partition(month='201709') values(1,'wangwu'),(2,’zhaoliu’);
3.基本模式插入(根据单张表查询结果)
hive (default)> insert overwrite table student partition(month='201708')
             select id, name from student where month='201709';
insert into:以追加数据的方式插入到表或分区,原有数据不会删除
insert overwrite:会覆盖表或分区中已存在的数据
注意:insert不支持插入部分字段
多表(多分区)插入模式(根据多张表查询结果)
hive (default)> from student
              insert overwrite table student partition(month='201707')
              select id, name where month='201709'
              insert overwrite table student partition(month='201706')
              select id, name where month='201709';
根据查询结果创建表(查询的结果会添加到新创建的表中)
create table if not exists student3
as select id, name from student;
1.上传数据到hdfs上
hive (default)> dfs -mkdir /student;
hive (default)> dfs -put /opt/module/datas/student.txt /student;
2.	创建表,并指定在hdfs上的位置
hive (default)> create external table if not exists student5(
              id int, name string
              )
              row format delimited fields terminated by '\t'
              location '/student;
 Import数据到指定Hive表中
注意:先用export导出后,再将数据导入。
hive (default)> import table student2 partition(month='201709') from
 '/user/hive/warehouse/export/student';
数据导出
5.2.1 Insert导出
1.将查询的结果导出到本地
hive (default)> insert overwrite local directory '/opt/module/datas/export/student'
            select * from student;
2.将查询的结果格式化导出到本地
hive(default)>insert overwrite local directory '/opt/module/datas/export/student1'
           ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'             select * from student;
3.将查询的结果导出到HDFS上(没有local)
hive (default)> insert overwrite directory '/user/atguigu/student2'
             ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' 
             select * from student;
数据导出
5.2.1 Insert导出
1.将查询的结果导出到本地
hive (default)> insert overwrite local directory '/opt/module/datas/export/student'
            select * from student;
2.将查询的结果格式化导出到本地
hive(default)>insert overwrite local directory '/opt/module/datas/export/student1'
           ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'             select * from student;
3.将查询的结果导出到HDFS上(没有local)
hive (default)> insert overwrite directory '/user/atguigu/student2'
             ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' 
             select * from student;
 Hadoop命令导出到本地
hive (default)> dfs -get /user/hive/warehouse/student/month=201709/000000_0
/opt/module/datas/export/student3.txt;
5.2.3 Hive Shell 命令导出
基本语法:(hive -f/-e 执行语句或者脚本 > file)
[atguigu@hadoop102 hive]$ bin/hive -e 'select * from default.student;' >
 /opt/module/datas/export/student4.txt;
5.2.4 Export导出到HDFS上
(defahiveult)> export table default.student to
 '/user/hive/warehouse/export/student';
export和import主要用于两个Hadoop平台集群之间Hive表迁移。
清除表中数据(Truncate)
注意:Truncate只能删除管理表,不能删除外部表中数据
hive (default)> truncate table student;

查询

查询语句语法:
[WITH CommonTableExpression (, CommonTableExpression)*]    (Note: Only available
 starting with Hive 0.13.0)
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
  FROM table_reference
  [WHERE where_condition]
  [GROUP BY col_list]
  [ORDER BY col_list]
  [CLUSTER BY col_list
    | [DISTRIBUTE BY col_list] [SORT BY col_list]
  ]
 [LIMIT number]


全表和特定列查询
创建部门表
create table if not exists dept(
deptno int,
dname string,
loc int
)
row format delimited fields terminated by '\t';


发布了131 篇原创文章 · 获赞 27 · 访问量 32万+

猜你喜欢

转载自blog.csdn.net/wangjunji34478/article/details/104332549