1.通用数据归档方法
#1. 创建归档表,一般在原表名后面添加_bak。
CREATE TABLE `ota_order_bak` (
`id` bigint(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
`order_id` varchar(255) DEFAULT NULL COMMENT '订单id',
`ota_id` varchar(255) DEFAULT NULL COMMENT 'ota',
`check_in_date` varchar(255) DEFAULT NULL COMMENT '入住日期',
`check_out_date` varchar(255) DEFAULT NULL COMMENT '离店日期',
`hotel_id` varchar(255) DEFAULT NULL COMMENT '酒店ID',
`guest_name` varchar(255) DEFAULT NULL COMMENT '顾客',
`purcharse_time` timestamp NULL DEFAULT NULL COMMENT '购买时间',
`create_time` datetime DEFAULT NULL,
`update_time` datetime DEFAULT NULL,
`create_user` varchar(255) DEFAULT NULL,
`update_user` varchar(255) DEFAULT NULL,
`status` int(4) DEFAULT '1' COMMENT '状态 :1 正常 , 0 删除',
`hotel_name` varchar(255) DEFAULT NULL,
`price` decimal(10,0) DEFAULT NULL,
`remark` longtext,
PRIMARY KEY (`id`),
KEY `IDX_order_id` (`order_id`) USING BTREE,
KEY `hotel_name` (`hotel_name`) USING BTREE,
KEY `ota_id` (`ota_id`) USING BTREE,
KEY `IDX_purcharse_time` (`purcharse_time`) USING BTREE,
KEY `IDX_create_time` (`create_time`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8
PARTITION BY RANGE (to_days(create_time)) (
PARTITION p201808 VALUES LESS THAN (to_days('2018-09-01')),
PARTITION p201809 VALUES LESS THAN (to_days('2018-10-01')),
PARTITION p201810 VALUES LESS THAN (to_days('2018-11-01')),
PARTITION p201811 VALUES LESS THAN (to_days('2018-12-01')),
PARTITION p201812 VALUES LESS THAN (to_days('2019-01-01')),
PARTITION p201901 VALUES LESS THAN (to_days('2019-02-01')),
PARTITION p201902 VALUES LESS THAN (to_days('2019-03-01')),
PARTITION p201903 VALUES LESS THAN (to_days('2019-04-01')),
PARTITION p201904 VALUES LESS THAN (to_days('2019-05-01')),
PARTITION p201905 VALUES LESS THAN (to_days('2019-06-01')),
PARTITION p201906 VALUES LESS THAN (to_days('2019-07-01')),
PARTITION p201907 VALUES LESS THAN (to_days('2019-08-01')),
PARTITION p201908 VALUES LESS THAN (to_days('2019-09-01')),
PARTITION p201909 VALUES LESS THAN (to_days('2019-10-01')),
PARTITION p201910 VALUES LESS THAN (to_days('2019-11-01')),
PARTITION p201911 VALUES LESS THAN (to_days('2019-12-01')),
PARTITION p201912 VALUES LESS THAN (to_days('2020-01-01')));
#2. 插入原表中无效的数据(需要跟开发同学确认数据保留范围)
create table tbl_p201808 as select * from ota_order where create_time between '2018-08-01 00:00:00' and '2018-08-31 23:59:59';
#3. 跟归档表分区做分区交换
alter table ota_order_bak exchange partition p201808 with table tbl_p201808;
#4. 删除原表中已经规范的数据
delete from ota_order where create_time between '2018-08-01 00:00:00' and '2018-08-31 23:59:59' limit 3000;
2.优化后的归档方式
#1. 创建中间表
CREATE TABLE `ota_order_2020` (........) ENGINE=InnoDB DEFAULT CHARSET=utf8
PARTITION BY RANGE (to_days(create_time)) (
PARTITION p201808 VALUES LESS THAN (to_days('2018-09-01')),
PARTITION p201809 VALUES LESS THAN (to_days('2018-10-01')),
PARTITION p201810 VALUES LESS THAN (to_days('2018-11-01')),
PARTITION p201811 VALUES LESS THAN (to_days('2018-12-01')),
PARTITION p201812 VALUES LESS THAN (to_days('2019-01-01')),
PARTITION p201901 VALUES LESS THAN (to_days('2019-02-01')),
PARTITION p201902 VALUES LESS THAN (to_days('2019-03-01')),
PARTITION p201903 VALUES LESS THAN (to_days('2019-04-01')),
PARTITION p201904 VALUES LESS THAN (to_days('2019-05-01')),
PARTITION p201905 VALUES LESS THAN (to_days('2019-06-01')),
PARTITION p201906 VALUES LESS THAN (to_days('2019-07-01')),
PARTITION p201907 VALUES LESS THAN (to_days('2019-08-01')),
PARTITION p201908 VALUES LESS THAN (to_days('2019-09-01')),
PARTITION p201909 VALUES LESS THAN (to_days('2019-10-01')),
PARTITION p201910 VALUES LESS THAN (to_days('2019-11-01')),
PARTITION p201911 VALUES LESS THAN (to_days('2019-12-01')),
PARTITION p201912 VALUES LESS THAN (to_days('2020-01-01')));
#2. 插入原表中有效的数据,如果数据量在100W左右可以在业务低峰期直接插入,如果比较大,建议采用dataX来做,可以控制频率和大小,之前我这边用Go封装了dataX可以实现自动生成json文件,自定义大小去执行。
insert into ota_order_2020 select * from ota_order where create_time between '2020-08-01 00:00:00' and '2020-08-31 23:59:59';
#3. 表重命名
alter table ota_order rename to ota_order_bak;
alter table ota_order_2020 rename to ota_order;
#4. 插入差异数据
insert into ota_order select * from ota_order_bak a where not exists (select 1 from ota_order b where a.id = b.id);
#5. ota_order_bak改造成分区表,如果表比较大不建议直接改造,可以先创建好分区表,通过dataX把导入进去即可。
#6. 后续的归档方法
#创建中间普遍表
create table ota_order_mid like ota_order;
#交换原表无效数据分区到普通表
alter table ota_order exchange partition p201808 with table ota_order_mid;
##交换普通表数据到归档表的相应分区
alter table ota_order_bak exchange partition p201808 with table ota_order_mid;
这样原表和归档表都是按月的分区表,只需要创建一个中间普通表,在业务低峰期做两次分区交换,既可以删除无效数据,又能回收空,而且没有空间碎片,不会影响表上的索引及SQL的执行计划。