hdfs 存储测试

hdfs存储测试对比

数据格式
存储格式 TEXTFILE SequenceFile RCfile Avro Parquet ORC
数据大小 65G 67G 61.5G 68.2 G 28.9 G 8.3G
load hive use times 286.319 seconds 118.45 seconds 106.212 seconds 163.988 seconds 136.663 seconds 130.186 seconds
CREATE TABLE IF NOT EXISTS prod_purchased_txt(
    uid            string,
    event_time      bigint,
    touch_point_id   string,
    et_city  string,
et_city_tier  string,
et_has_gifts  boolean,
et_has_points  boolean,
et_order_discount  float,
et_order_list_value  float,
et_order_quantity  float,
et_order_type  string,
et_order_value  float,
et_pay_mode  string,
et_prod_brand_list  array<string>,
et_prod_cate_name_list  array<string>,
et_prod_id_list  array<string>,
et_prod_list_price_list  array<float>,
et_prod_name_list  array<string>,
et_prod_quantity_list  array<float>,
et_province  string,
et_purchase_date_type  string
)
ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
REATE TABLE IF NOT EXISTS prod_purchased_orc(
    uid            string,
    event_time      bigint,
    touch_point_id   string,
    et_city  string,
et_city_tier  string,
et_has_gifts  boolean,
et_has_points  boolean,
et_order_discount  float,
et_order_list_value  float,
et_order_quantity  float,
et_order_type  string,
et_order_value  float,
et_pay_mode  string,
et_prod_brand_list  array<string>,
et_prod_cate_name_list  array<string>,
et_prod_id_list  array<string>,
et_prod_list_price_list  array<float>,
et_prod_name_list  array<string>,
et_prod_quantity_list  array<float>,
et_province  string,
et_purchase_date_type  string
)partitioned by (process_date string)
ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  LINES TERMINATED BY '\n'
STORED AS ORC ;

======================================
STORED AS SequenceFile;
STORED AS RCfile ;
STORED AS Avro ;
STORED AS parquetfile ;
STORED AS ORC ;

只有text支持load,其他可以通过insert into导入数据

load data local inpath '/home/hadoop/prod_purchased.txt' into table prod_purchased_txt ;
insert into table prod_purchased_sq partition(process_date = '2019-06-01') select * from prod_purchased_txt;

猜你喜欢

转载自blog.csdn.net/lunhuasxk/article/details/90767147
今日推荐