hadoop@vm2:~/apache-hive-0.14.0-bin$ ll
total 400
drwxrwxr-x 8 hadoop hadoop 4096 Apr 16 04:45 ./
drwxr-xr-x 28 hadoop hadoop 4096 Apr 16 07:04 ../
drwxrwxr-x 3 hadoop hadoop 4096 Apr 16 04:45 bin/
drwxrwxr-x 2 hadoop hadoop 4096 Apr 16 07:02 conf/
drwxrwxr-x 4 hadoop hadoop 4096 Apr 16 04:45 examples/
drwxrwxr-x 7 hadoop hadoop 4096 Apr 16 04:45 hcatalog/
drwxrwxr-x 4 hadoop hadoop 4096 Apr 16 06:52 lib/
-rw-r--r-- 1 hadoop hadoop 23828 Nov 8 2014 LICENSE
-rw-r--r-- 1 hadoop hadoop 277 Nov 8 2014 NOTICE
-rw-r--r-- 1 hadoop hadoop 4048 Nov 8 2014 README.txt
-rw-r--r-- 1 hadoop hadoop 340611 Nov 8 2014 RELEASE_NOTES.txt
drwxrwxr-x 3 hadoop hadoop 4096 Apr 16 04:45 scripts/
hadoop@vm2:~/apache-hive-0.14.0-bin$
hadoop配置参考: https://blog.csdn.net/qq_26437925/article/details/78945077
环境变量的设置
export JAVA_HOME=/home/hadoop/java/jdk1.8.0_151
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
export HADOOP_HOME=/home/hadoop/hadoop/hadoop-2.7.4
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$HADOOP_HOME/bin:$PATH
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export ZOOKEEPER_HOME=/home/hadoop/zookeeper/zookeeper-3.4.10
export PATH=$ZOOKEEPER_HOME/bin:$PATH
export HIVE_HOME=/home/hadoop/apache-hive-0.14.0-bin
export PATH=$PATH:$HIVE_HOME/bin
export CLASSPATH=$CLASSPATH:/home/hadoop/apache-hive-0.14.0-bin/lib/*:.
mysql-connector-java-5.1.22-bin.jar 拷贝到 /home/hadoop/apache-hive-0.14.0-bin/lib
/home/hadoop/apache-hive-0.14.0-bin/conf
* hive-env.sh
export HADOOP_HOME=/home/hadoop/hadoop/hadoop-2.7.4
* hive-site.xml, 修改元数据存储相关的
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
<!-- 如果 mysql 和 hive 在同一个服务器节点,那么请更改 hadoop02 为 localhost -->
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>password to use against metastore database</description>
</property>
* 数据类型
整型
字符串类型
时间戳
日期
小数
联合类型
文字
Null 值
复杂类型 : 数组,映射,结构体
* 数据库表的操作
CREATE TABLE IF NOT EXISTS employee(
eid int,
name String,
salary String,
destination String) COMMENT 'Employee details' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' STORED AS TEXTFILE;
hive> LOAD DATA LOCAL INPATH '/home/hadoop/sample.txt' OVERWRITE INTO TABLE employee;
Loading data to table userdb.employee
Table userdb.employee stats: [numFiles=1, numRows=0, totalSize=161, rawDataSize=0]
OK
Time taken: 1.49 seconds
hive>
HDFS查看存储
分区
https://cwiki.apache.org/confluence/display/Hive/DynamicPartitions
静态分区:
* 可以根据PARTITIONED BY
创建分区表,一个表可以拥有一个或者多个分区,每个分区以文件夹的形式单独存在表文件夹的目录下。
* 分区是以字段的形式在表结构中存在,通过describe table命令可以查看到字段存在,但是该字段不存放实际的数据内容,仅仅是分区的表示。
* 分区建表分为2种,一种是单分区,也就是说在表文件夹目录下只有一级文件夹目录。另外一种是多分区,表文件夹下出现多文件夹嵌套模式。
https://www.cnblogs.com/one--way/p/7550795.html
drop TABLE IF EXISTS tb_partition;
CREATE TABLE tb_partition(
id int,
name String) PARTITIONED by (date_month string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
load data local inpath '/home/hadoop/nameinfo.txt' overwrite into table tb_partition partition(date_month='201709');
show partitions tb_partition;
nameinfo.txt
1 Lily 201708
2 Andy 201708
3 Tom 201708
1 Lily 201709
2 Andy 201709
3 Tom 201709
1 Lily 201710
2 Andy 201710
3 Tom 201710
// 清空数据
truncate table tb_partition;
// 重新load文件数据,注意建表语句中field分割符号;文本文件编码设置为utf-8
分区查询
hive> select id,name,date_month from tb_partition where date_month='201708';
OK
1 lily 201708
2 sun 201708
Time taken: 0.372 seconds, Fetched: 2 row(s)
hive>