假设我们有数据宾馆的近10年的数据,格式如下
每列的意义
| hotel | | |
| h_id | id |
| | h_region | 旅馆行政区划 |
| | h_hname | 旅馆名称 |
| | h_address | 旅馆地址 |
| | h_uname | 姓名 |
| | h_code | 证件号码 |
| | h_start | 开房时间 |
| | h_end | 退房时间 |
| | h_start_m | 开房时间时间戳 |
| | h_end_m | 退房时间时间戳 |
| | h_homecode | 入住房号 |
数据案例:
1,2,宾馆642,杭州市下城区xx484路373号,姓名65,U104,2006/06/23 00:00:00,2006/06/23 00:03:00,1150992000000,1150992180000,495
2,8,宾馆29,杭州市余杭区xx866路927号,姓名64,U193,2006/06/24 00:00:00,2006/06/24 00:16:00,1151078400000,1151079360000,376
3,8,宾馆190,杭州市余杭区xx81路801号,姓名56,U149,2006/06/24 00:00:00,2006/06/24 00:10:00,1151078400000,1151079000000,67
4,8,宾馆771,杭州市余杭区xx570路341号,姓名60,U870,2006/06/25 00:00:00,2006/06/25 00:06:00,1151164800000,1151165160000,761
5,1,宾馆584,杭州市上城区xx177路847号,姓名13,U552,2006/06/26 00:00:00,2006/06/26 00:09:00,1151251200000,1151251740000,583
6,5,宾馆375,杭州市西湖区xx532路372号,姓名93,U362,2006/06/27 00:00:00,2006/06/27 00:01:00,1151337600000,1151337660000,470
410,1,宾馆212,杭州市上城区xx67路444号,姓名35,U295,2007/01/09 00:00:00,2007/01/09 00:03:00,1168272000000,1168272180000,6
411,0,宾馆732,杭州市其他区xx447路975号,姓名71,U546,2007/01/09 00:00:00,2007/01/09 00:09:00,1168272000000,1168272540000,912
412,0,宾馆502,杭州市其他区xx700路944号,姓名46,U580,2007/01/09 00:00:00,2007/01/09 00:01:00,1168272000000,1168272060000,742
413,8,宾馆907,杭州市余杭区xx817路444号,姓名60,U653,2007/01/10 00:00:00,2007/01/10 00:21:00,1168358400000,1168359660000,629
414,7,宾馆426,杭州市萧山区xx529路190号,姓名25,U425,2007/01/11 00:00:00,2007/01/11 00:16:00,1168444800000,1168445760000,985
415,3,宾馆227,杭州市拱墅区xx572路114号,姓名12,U453,2007/01/11 00:00:00,2007/01/11 00:11:00,1168444800000,1168445460000,734
416,7,宾馆804,杭州市萧山区xx879路358号,姓名4,U234,2007/01/11 00:00:00,2007/01/11 00:12:00,1168444800000,1168445520000,156
create table hotel(
h_id bigint,
h_region string,
h_hname string,
h_address string,
h_uname string,
h_code string,
h_start string,
h_end string,
h_start_m bigint,
h_end_m bigint,
h_homecode string
)
partitioned by (ds string) row format delimited fields terminated by '\,' stored as textfile;
load data local inpath '/home/hadoop/data/hotel_data.txt' overwrite into table hotel partition (ds='2006-06');
load data local inpath '/home/hadoop/data/hotel_data.txt' overwrite into table hotel partition (ds='2007-01');
select * from hotel where ds ='2006-06';
select * from hotel where ds ='2007-01';
load data local inpath '/home/hadoop/data/pro100000.txt' overwrite into table prob;
load data local inpath '/home/hadoop/data/cate1000.txt' overwrite into table cateb;
create table invites (id int, name string) partitioned by (ds string) row format delimited fields terminated by '\,' stored as textfile;
load data local inpath '/home/hadoop/data/aa.txt' overwrite into table invites partition (ds='2013-08-16');
load data local inpath '/home/hadoop/data/a2.txt' overwrite into table invites partition (ds='2013-08-17');
hive (default)> select * from invites where ds ='2013-08-16';
OK
invites.id invites.name invites.ds
1 aa 2013-08-16
2 bb 2013-08-16
3 cc 2013-08-16
4 ee 2013-08-16
5 ff 2013-08-16
Time taken: 0.125 seconds, Fetched: 5 row(s)
hive (default)> select * from invites where ds ='2013-08-17';
OK
invites.id invites.name invites.ds
6 aa1 2013-08-17
7 bb2 2013-08-17
8 cc3 2013-08-17
9 ee4 2013-08-17
1 ff5 2013-08-17
Time taken: 0.084 seconds, Fetched: 5 row(s)
hive (default)> select * from invites ;
OK
invites.id invites.name invites.ds
1 aa 2013-08-16
2 bb 2013-08-16
3 cc 2013-08-16
4 ee 2013-08-16
5 ff 2013-08-16
6 aa1 2013-08-17
7 bb2 2013-08-17
8 cc3 2013-08-17
9 ee4 2013-08-17
1 ff5 2013-08-17
Time taken: 0.055 seconds, Fetched: 10 row(s)
hive (default)> select * from invites where ds ='2013-08-12';
OK
invites.id invites.name invites.ds
Time taken: 0.069 seconds