Hive操作演示

在usr下，mkdir /wang

hadoop fs -mkdir /upload  
hadoop fs -chmod g+w /upload

上传数据

[root@master wang]# hadoop fs -put emp.csv /upload
[root@master wang]# hadoop fs -put dept.csv /upload

服务端：hiveserver2 &

客户端：

使用anonymous登录

beeline -u jdbc:hive2://master:10000/default

****使用root登录，我们用root用户登录，不然没有写的权限

beeline -u jdbc:hive2://master:10000/default -n root

创建员工表，使用创建表的语句来指明输入文件间隔符，然后load data到这个表

create table emp001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) row format delimited fields terminated by ',';   # 字段分割用逗号

创建部门表

create table dept001(deptno int,dname string,loc string) row format delimited fields terminated by ',';

导入数据

load data inpath '/upload/emp.csv' into table emp001;
load data inpath '/upload/dept.csv' into table dept001;

根据员工的部门号创建分区

create table emp_part001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int) partitioned by (deptno int) row format delimited fields terminated by ',';

往分区表中插入数据：指明导入的数据的分区（通过子查询导入数据）

insert into table emp_part001 partition(deptno=10) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=10;
insert into table emp_part001 partition(deptno=20) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=20;
insert into table emp_part001 partition(deptno=30) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=30;

创建一个桶表，根据员工的职位（job）进行分桶

create table emp_bucket001 (empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) clustered by (job) into 4 buckets row format delimited fields terminated by ',';

通过子查询插入数据

insert into emp_bucket001 select * from emp001;

查询员工信息：员工号姓名薪水

select empno,ename,sal from emp001;

多表连接查询

select dept001.dname,emp001.ename from emp001,dept001 where emp001.deptno=dept001.deptno;

员工总数

select count(empno) as emp_num from emp001;

去除重复值

select distinct deptno from emp001;

查询公司共有多少种职位

select count(distinct job) from emp001;

统计雇佣时间为1981年的总人数

select count(hiredate) as result from emp001 where hiredate like '%1981%';

统计各部门的薪水总和

select deptno,sum(sal) from emp001 group by deptno;

统计每个职位有多少个员工

select job, count(*) as emp_num from emp001 group by job order by emp_num asc;

查询入职时间最早的员工

select ename,hiredate from emp001
join
(select min(hiredate) as min_hiredate from emp001) t1
where hiredate=t1.min_hiredate;

判断薪水级别

select ename,empno,sal,
case when sal<2000 then 'low' when sal >=2000 
and sal <3000 then 'middle' 
else 'high' 
end as level 
from emp001 
order by sal desc;

根据职位给员工涨工资，把涨前、涨后的薪水显示出来

select empno,ename,job,sal,
case job when 'PRESIDENT' then sal+1000
when 'MANAGER' then sal+800 
else sal+400
end
from emp001;

统计上半年入职员工最多的地区

# cast用于转换数据类型
# substr用于截取字符串
select t1.loc,count(*)as emp_count
from 
(select dept001.loc,emp001.ename,
cast(substr(emp001.hiredate,6,2) as int) as hire_month
from dept001 join emp001 
on dept001.deptno=emp001.deptno) t1
where t1.hire_month<=6
group by t1.loc
order by emp_count desc
limit 1;

hive实例操作

Hive操作演示

猜你喜欢