Hive1.2.1<常用操作示例>

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Gpwner/article/details/74080105

版本:apache-hive-1.2.1
使用到的数据:链接:http://pan.baidu.com/s/1c2xmk1M 密码:uucs

1. train 0

1.创建10min数据托管表。
create table TenminData_tg (  
`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注')
row format delimited fields terminated by ','

2.向托管表中导入数据。
load data local inpath 'usr/local/data/fc=1_all/*' into table tenmindata1;

3.创建10min数据外部表。
create external table TenminData_external (  
`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注')
row format delimited fields terminated by ','
location '/dataAnalysis/duliming/fc=1_all/';

4.删除托管表。
drop table tenmindata_tg;
5.删除外部表。
drop table tenmindata_external;

2.train 1

1.创建10min数据表,以风场和分机作为分区字段。
create table TenminData (  
`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注')
PARTITIONED BY ( fc string,fj string)
row format delimited fields terminated by ','

2.创建风场=1,风机=WT02287的分区。
alter table tenmindata add partition(fc=1,fj='WT02287');

4.导入数据到十分钟数据的指定分区中。
load data local inpath '/usr/local/data/fc=1/fj=WT02287/*.bz2' into table tenmindata partition(fc=1,fj='WT02287');

5.查看tenmindata下面有哪些分区。
show partitions tenmindata;

6.创建未分区的tenmindata表。
create table TenminData1 (  
`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注')
row format delimited fields terminated by ',';

7.向未分区hive表导入数据。
load data local inpath 'usr/local/data/fc=1_all/*' into table tenmindata1;

8.查询未分区hive表。
select * from tenmindata where fan_code='WT02287' limit 10;


9.对比分区和未分区的查询效率

//删除已有分区
alter table tenmindata drop partition(fc=1,fj='WT02287');
hadoop fs -copyFromLocal /usr/local/data/fc\=1 /user/hive/warehouse/tenmindata/
//自动恢复分区
MSCK REPAIR TABLE tenmindata;
show partitions tenmindata;


select * from tenmindata where fc=1 and fj='WT02287' limit 10;
select * from tenmindata1 where fan_no='WT02287' limit 10;

3.修改表

1.修改表名。
ALTER TABLE tenmindata RENAME TO 10min;
2.修改表注释。
ALTER TABLE 10min SET TBLPROPERTIES('comment'='10min数据');
3.修改表的分隔符。
ALTER TABLE 10min SET SERDEPROPERTIES('field.delim'=',');
4.修改表的SERDE。
ALTER TABLE 10min SET SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES(
   "separatorChar" = ",",
   "quoteChar"     = "'",
   "escapeChar"    = "\n"
)

5.删除表分区
alter table 10min drop if exists partition(fc=1,fj='WT02316');
6.新建分区
alter table 10min add if not exists partition(fc=1,fj='WT02316')
location '/data/fc=1/fj=WT02316';

select * from 10min whre fc=1 and fj='WT02316';

ALTER TABLE 10min PARTITION(FC=1,FJ='WT02316') SET SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES(
   "separatorChar" = ",",
   "quoteChar"     = "'",
   "escapeChar"    = "\n"
)



7.重命名分区
alter table 10min partition(fc=1,fj='WT02316') RENAME TO PARTITION(fc=1,fj='WT02317');
select * from 10min where fj='WT02316' limit 10;
select * from 10min where fj='WT02317' limit 10;

8.自动恢复分区。
msck repair table 10min;
ALTER TABLE table_name RECOVER PARTITIONS;

9.修改列名称
alter table 10min change wind_speed ws string;
show create table 10min;

// 修改表字段名称,确定列位置
alter table 10min change ws wind_speed string after rs;

alter table 10min change wind_speed ws string first;

alter table 10min change ws wind_speed string comment '风速' after fan_status;

10.添加列
alter table 10min add columns (position string);

11.删除列
alter table 10min replace columns (position string);

desc formatted 10min;
查看到表的serDe为OpenCSVSerDe,不支持replaceALTER TABLE 10min SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';

alter table 10min replace columns (`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注');

4.视图

1.创建视图。
create view if not exists normal_ws_view (fanCode,windspeed,rs,power)
as select fan_no,wind_speed,rs,power from 10min where wind_speed>4 and wind_speed <=10;

select * from normal_ws_view limit 100;

2.修改视图
alter view normal_ws_view as select fan_no,wind_speed,rs,power from 10min where wind_speed>3 and wind_speed<=8;
select * from normal_ws_view limit 100;

3.删除视图
drop view normal_ws_view;

5.索引

1.创建索引
create index 10min_index on table 10min (FAN_NO, WIND_SPEED)
as 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
WITH DEFERRED REBUILD
row format delimited fields terminated by ',';

2.填充索引表的索引数据。
alter index 10min_index on 10min rebuild;

3.查看索引表创建的索引。
show indexes on 10min;

4.删除索引表和索引数据。
drop index 10min_index on 10min;

5.使用索引的时候需要开启两个参数。
hive.optimize.index.filter
hive.optimize.index.groupby

6.train 2

1.创建10min数据表,按照风场分区,按照风机分桶,按照时间排序。
create table TenminData3 (  
`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注')
PARTITIONED BY ( fc string)
CLUSTERED BY (FAN_NO) SORTED BY (DATA_DATE ASC) INTO 5 BUCKETS
row format delimited fields terminated by ',';


2.向10min数据表填充数据。
set hive.enforce.bucketing=true;
INSERT OVERWRITE TABLE tenmindata3 partition(fc=1) 
SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK 
FROM 10min WHERE FC=1 and fj>='WT02287' AND fj<='WT02291';


3.查看10min数据表下文件分布情况,并查看文件内容。
hadoop fs -cat /apps/hive/warehouse/hivetest.db/tenmindata3/fc=1/000000_0
4.查询10min数据表中指定风场,风机某段时间内的数据。
select * from tenmindata3 where fc=1 and FAN_NO='WT02287' LIMIT 100;
5.使用TABLESAMPLE子句对表进行取样。
SELECT * FROM TENMINDATA3 TABLESAMPLE(BUCKET 1 OUT OF 2 ON FAN_NO);

7.train 3

1.创建10min数据表。rowformat=avro。
参照:https://cwiki.apache.org/confluence/display/Hive/AvroSerDe

create table TenminData_avro (  
`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注')
PARTITIONED BY ( fc string)
CLUSTERED BY (FAN_NO) SORTED BY (DATA_DATE ASC) INTO 5 BUCKETS
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED as INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';

2.向表中灌入数据。
INSERT OVERWRITE TABLE tenmindata_avro partition(fc=1) SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK FROM TENMINDATA WHERE FC=1;


3.1.创建10min数据表。rowformat=orc
create table TenminData_orc (  
`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注')
PARTITIONED BY ( fc string)
CLUSTERED BY (FAN_NO) SORTED BY (DATA_DATE ASC) INTO 5 BUCKETS
STORED AS ORC

等价于:
ROW FORMAT SERDE
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
  STORED AS INPUTFORMAT
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
  OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'

4.向表中灌入数据。
INSERT OVERWRITE TABLE tenmindata_orc partition(fc=1) SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK FROM TENMINDATA WHERE FC=1;

5.parquet
STORED AS PARQUET /
STORED AS PARQUETFILE
等价于
ROW FORMAT SERDE
  'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
  STORED AS INPUTFORMAT
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
  OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'


6.RCFILE

STORED AS RCFILE
等价于
STORED AS INPUTFORMAT
  'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
  OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'

7.SEQUENCEFILE
STORED AS SEQUENCEFILE
等价于
STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.SequenceFileInputFormat'
  OUTPUTFORMAT
  'org.apache.hadoop.mapred.SequenceFileOutputFormat'

8.TEXTFILE
STORED AS TEXTFILE

STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.TextInputFormat'
  OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'

8.train 4

1.创建10min数据表
create table TenminData4 (  
`DATASOURCE` string ,
`FAN_NO` string ,
`DATA_DATE` string,
`FAN_STATUS` double,
`WIND_SPEED` double comment '风速',
`ROTOR_RS` double comment '电机转速',
`RS` double comment '桨叶转速',
`WIND_DIRECTION` double comment '风向',
`YAW_ANGLE` double comment '偏航角度',
`BOX_T` double comment '齿轮箱油温',
`BOX_BEARING_T` double comment '齿轮箱轴承油温',
`ENVIRON_T` double comment '环境温度',
`WT_T` double comment '机舱温度',
`ROTOR_GROUP_T` double  comment '发电机温度',
`A_PHASE_C` double comment 'A相电流',
`B_PHASE_C` double comment 'B相电流',
`C_PHASE_C` double comment 'C相电流',
`A_PHASE_V` double comment 'A相电压',
`B_PHASE_V` double comment 'B相电压',
`C_PHASE_V` double comment 'C相电压',
`MACHINE_FREQUENCY` double comment '电机频率',
`REACTIVE_POWER` double comment '无功功率',
`POWER` double comment '有功功率',
`POWER_FACTOR` double comment '功率因素',
`TOTAL_POWER` double comment '总发电量',
`TOTAL_GEN_TIME` double comment '总发电时间',
`DOWN_TIME` double comment '故障时间',
`STANDBY_TIME` double comment '备用时间',
`REMARK` double comment '备注')
PARTITIONED BY ( fc string,fj string)
row format delimited fields terminated by ','

2.使用insert overwrite select注入数据。
INSERT OVERWRITE TABLE tenmindata4 partition(fc=1,fj='WT02287') SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK FROM TENMINDATA WHERE FC=1 AND FJ='WT02287';

3.使用From的方式插入数据到tenmindata4.
FROM TENMINDATA INSERT OVERWRITE TABLE tenmindata4 partition(fc=1,fj='WT02288')
SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK
WHERE FC=1 AND FJ='WT02288';


FROM TENMINDATA
INSERT OVERWRITE TABLE tenmindata4 partition(fc=1,fj='WT02289')
SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK
WHERE FC=1 AND FJ='WT02289'
INSERT OVERWRITE TABLE tenmindata4 partition(fc=1,fj='WT02290')
SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK
WHERE FC=1 AND FJ='WT02290';


4.使用动态分区插入数据。
INSERT OVERWRITE TABLE tenmindata4 partition(fc=1,fj) 
SELECT DATASOURCE,FAN_NO ,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK,FAN_NO AS fj FROM TENMINDATA
WHERE FC=1 AND FJ>='WT02291' AND FJ<='WT02293';

FROM TENMINDATA INSERT OVERWRITE TABLE tenmindata4 partition(fc,fj)
SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK,FAN_STATUS as fc,FAN_NO as fj
WHERE FC=1 AND FJ>='WT02287';

全部动态如何实现?即:风场为动态,风机为动态插入。
FROM TENMINDATA INSERT OVERWRITE TABLE tenmindata4 partition(fc,fj)
SELECT DATASOURCE,FAN_NO,DATA_DATE,FAN_STATUS,WIND_SPEED,ROTOR_RS,RS,WIND_DIRECTION,YAW_ANGLE,BOX_T,BOX_BEARING_T,ENVIRON_T,WT_T,ROTOR_GROUP_T,
A_PHASE_C,B_PHASE_C,C_PHASE_C,A_PHASE_V,B_PHASE_V,C_PHASE_V,MACHINE_FREQUENCY,REACTIVE_POWER,POWER,POWER_FACTOR,TOTAL_POWER,TOTAL_GEN_TIME,DOWN_TIME,STANDBY_TIME,REMARK,FAN_STATUS as fc,FAN_NO as fj
WHERE FC=1 AND FJ>='WT02287';



5.使用CATS创建tenmindata5;

CREATE TABLE tenmindata5 as select * from tenmindata where fc=1 and fj='WT02287'

创建表的时候指定表的SerDe为sequence,并且插入的数据按照时间排序。

6.使用CREATE TABLE LIKE创建tenmindata6;
create table tenmindata6 like tenmindata;
show tables;
show create table tenmindata6;


猜你喜欢

转载自blog.csdn.net/Gpwner/article/details/74080105