一、执行hive分区表添加字段命令(背景)
alter table db_name.staging_user_log_minutes
add columns(
`ext_main_business_line` string COMMENT '主业务线'
) cascade;
因为历史分区量太大,所以执行命令花了很长的时间,正好又有新的调度任务写入新分区,导致有的分区没有添加上新的字段
二、解决问题路径(通过HIVE元数据查找)
1、通过表名 找到 TBL_ID
mysql> select * from TBLS where TBL_NAME='staging_user_log_minutes' limit 3 \G
*************************** 1. row ***************************
TBL_ID: 521582
CREATE_TIME: 1555483202
DB_ID: 86
LAST_ACCESS_TIME: 0
OWNER: pcsjob
RETENTION: 0
SD_ID: 1491906
TBL_NAME: staging_user_log_minutes
TBL_TYPE: EXTERNAL_TABLE
VIEW_EXPANDED_TEXT: NULL
VIEW_ORIGINAL_TEXT: NULL
LINK_TARGET_ID: NULL
2、通过TBL_ID 找到 SD_ID
mysql> select * from PARTITIONS where TBL_ID='521582'
and PART_NAME like 'dt=2019-05-20/hh=11%' \G
*************************** 1. row ***************************
PART_ID: 1234906
CREATE_TIME: 1558321954
LAST_ACCESS_TIME: 0
PART_NAME: dt=2019-05-20/hh=11/mm=00
SD_ID: 1767457
TBL_ID: 521582
LINK_TARGET_ID: NULL
*************************** 2. row ***************************
PART_ID: 1234920
CREATE_TIME: 1558322569
LAST_ACCESS_TIME: 0
PART_NAME: dt=2019-05-20/hh=11/mm=10
SD_ID: 1767476
TBL_ID: 521582
LINK_TARGET_ID: NULL
*************************** 3. row ***************************
PART_ID: 1234933
CREATE_TIME: 1558323222
LAST_ACCESS_TIME: 0
PART_NAME: dt=2019-05-20/hh=11/mm=20
SD_ID: 1767480
TBL_ID: 521582
LINK_TARGET_ID: NULL
3、通过SD_ID 找到 CD_ID
mysql> select * from SDS where SD_ID in (1767457,1767476,1767480) \G
*************************** 1. row ***************************
SD_ID: 1767457
CD_ID: 574420
INPUT_FORMAT: org.apache.hadoop.mapred.TextInputFormat
IS_COMPRESSED:
IS_STOREDASSUBDIRECTORIES:
LOCATION: hdfs://tesla-cluster/apps/hive/warehouse/staging_kafka.db/staging_user_log_minutes/dt=2019-05-20/hh=11/mm=00
NUM_BUCKETS: -1
OUTPUT_FORMAT: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
SERDE_ID: 1767437
*************************** 2. row ***************************
SD_ID: 1767476
CD_ID: 537778
INPUT_FORMAT: org.apache.hadoop.mapred.TextInputFormat
IS_COMPRESSED:
IS_STOREDASSUBDIRECTORIES:
LOCATION: hdfs://tesla-cluster/apps/hive/warehouse/staging_kafka.db/staging_user_log_minutes/dt=2019-05-20/hh=11/mm=10
NUM_BUCKETS: -1
OUTPUT_FORMAT: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
SERDE_ID: 1767456
*************************** 3. row ***************************
SD_ID: 1767480
CD_ID: 574421
INPUT_FORMAT: org.apache.hadoop.mapred.TextInputFormat
IS_COMPRESSED:
IS_STOREDASSUBDIRECTORIES:
LOCATION: hdfs://tesla-cluster/apps/hive/warehouse/staging_kafka.db/staging_user_log_minutes/dt=2019-05-20/hh=11/mm=20
NUM_BUCKETS: -1
OUTPUT_FORMAT: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
SERDE_ID: 1767460
4、通过CD_ID 找到当前分区 是否添加上新的字段…
mysql> select * from COLUMNS_V2 where CD_ID='574421'
order by INTEGER_IDX desc limit 2 \G
*************************** 1. row ***************************
CD_ID: 574421
COMMENT: 主业务线
COLUMN_NAME: ext_main_business_line
TYPE_NAME: string
INTEGER_IDX: 146
*************************** 2. row ***************************
CD_ID: 574421
COMMENT: from deserializer
COLUMN_NAME: ext_is_member_name
TYPE_NAME: string
INTEGER_IDX: 145
mysql> select * from COLUMNS_V2 where CD_ID='537778'
order by INTEGER_IDX desc limit 2 \G
*************************** 1. row ***************************
CD_ID: 574421
COMMENT: from deserializer
COLUMN_NAME: ext_is_member_name
TYPE_NAME: string
INTEGER_IDX: 145
*************************** 2. row ***************************
CD_ID: 574421
COMMENT: from deserializer
COLUMN_NAME: ext_is_member
TYPE_NAME: int
INTEGER_IDX: 144
通过上面结果可以看到 CD_ID=537778,没有添加上最新的字段
三、解决办法
INSERT INTO COLUMNS_V2 (CD_ID,COMMENT,COLUMN_NAME,TYPE_NAME,INTEGER_IDX)
VALUES (537778, '主业务线', 'ext_main_business_line', 'string', 146);