非分区表如何在线转分区表呢,怎样才能不影响业务?
使用pg_pathman可以直接做到,参考:PostgreSQL普通表转分区表
那么使用pg原生分区该如何实现呢?
做之前我们可以先想想在线转分区表的难点在哪里呢?
1、转分区表过程中,数据仍然在插入怎么办?
2、如何避免长时间锁表?
首先,避免数据实时插入,我们需要创建触发器来新插入的数据路由到新的表中。为了避免长时间锁表我们可以在后台进行数据迁移,分批进行迁移等等。
例子:
1、创建普通表并插入测试数据
bill=# create table orig_table
bill-# ( id serial not null,
bill(# data float default random()
bill(# );
CREATE TABLE
bill=# create index orig_data_index on orig_table(data);
CREATE INDEX
bill=# create index orig_id_index on orig_table(id);
CREATE INDEX
bill=# insert into orig_table (id)
bill-# select nextval('orig_table_id_seq')
bill-# from generate_series(1,100000);
INSERT 0 100000
2、创建原生分区表
该分区表是用来将我们的原普通表数据插入其中,然后对表进行rename。
bill=# create table part_table
bill-# (like orig_table including defaults including indexes including constraints)
bill-# partition by range(data);
CREATE TABLE
bill=# create table part_table_p1
bill-# partition of part_table
bill-# for values from (minvalue) to (0.25);
partition of part_table
for values from (0.5) to (0.75);
bill=# create table part_table_p2
bill-# partition of part_table
bill-# for values from (0.25) to (0.5);
CREATE TABLE
bill=#
bill=# create table part_table_p3
bill-# partition of part_table
bill-# for values from (0.5) to (0.75);
CREATE TABLE
bill=#
bill=# create table part_table_p4
bill-# partition of part_table
bill-# for values from (0.75) to (maxvalue);
CREATE TABLE
3、创建触发器函数
因为我们要确保在迁移数据的过程中,向原先表中插入数据不会影响,因此需要创建触发器来实现。
bill=# create or replace function part_v_trigger()
bill-# returns trigger
bill-# language plpgsql
bill-# as
bill-# $TRIG$
bill$#
bill$# begin
bill$# IF TG_OP = 'INSERT'
bill$# THEN
bill$# INSERT INTO part_table VALUES(NEW.id, NEW.data);
bill$# RETURN NEW;
bill$# ELSIF TG_OP = 'DELETE'
bill$# THEN
bill$# DELETE FROM part_table WHERE id = OLD.id;
bill$# DELETE FROM old_orig_table WHERE id = OLD.id;
bill$# RETURN OLD;
bill$# ELSE -- UPDATE
bill$# DELETE FROM old_orig_table WHERE id = OLD.id;
bill$# IF FOUND
bill$# THEN
bill$# INSERT INTO part_table VALUES(NEW.id, NEW.data);
bill$# ELSE
bill$# UPDATE part_table SET id = NEW.id, data = NEW.data
bill$# WHERE id = OLD.id;
bill$# END IF;
bill$# RETURN NEW;
bill$# END IF;
bill$# end
bill$#
bill$# $TRIG$;
CREATE FUNCTION
4、创建视图
我们可以在一个事务中创建一个视图包含这两张表。 由于我们不会再向旧的非分区表添加新的数据,所以我们禁用该表的vacuum。
bill=# BEGIN;
BEGIN
bill=*#
bill=*# ALTER TABLE orig_table RENAME TO old_orig_table;
ALTER TABLE
bill=*#
bill=*# ALTER TABLE old_orig_table SET(
bill(*# autovacuum_enabled = false, toast.autovacuum_enabled = false
bill(*# );
ALTER TABLE
bill=*#
bill=*# CREATE VIEW orig_table AS
bill-*# SELECT id, data FROM old_orig_table
bill-*# UNION ALL
bill-*# SELECT id, data FROM part_table
bill-*# ;
CREATE VIEW
bill=*#
bill=*# CREATE TRIGGER orig_table_part_trigger
bill-*# INSTEAD OF INSERT OR UPDATE OR DELETE on orig_table
bill-*# FOR EACH ROW
bill-*# EXECUTE FUNCTION part_v_trigger();
CREATE TRIGGER
bill=*#
bill=*# COMMIT;
COMMIT
5、数据迁移
使用CTE语句进行迁移,同时每1000条提交一次。
bill=# create or replace function f_move_data() returns void as $$
bill$# declare
bill$# t_count int;
bill$# begin
bill$# loop
bill$# EXECUTE 'with a as
bill$# (delete from only orig_table where id = any(array
bill$# (select id
bill$# from only orig_table limit 1000
bill$# for update skip locked)) returning *) insert into part_table
bill$# select * from a;';
bill$# select into t_count count(*) from old_orig_table;
bill$# exit when t_count = 0;
bill$# end loop;
bill$# end;
bill$# $$ language plpgsql;
CREATE FUNCTION
bill=# select f_move_data();
f_move_data
-------------
(1 row)
bill=# select count(*) from part_table;
count
--------
100000
(1 row)
6、表重命名
该步骤会短暂锁表,避免出现问题,我们可以加上锁超时参数。
BEGIN;
set lock_timeout ='3s';
DROP VIEW orig_table CASCADE;
DROP FUNCTION part_v_trigger();
ALTER SEQUENCE orig_table_id_seq OWNED BY part_table.id;
ALTER TABLE part_table RENAME TO orig_table;
COMMIT;
最后删除原旧表:
DROP TABLE old_orig_table;