二、扩容
2.1 扩容副本
对于副本节点的扩容,当ck集群新增副本节点后,zk会自动将原副本中的数据同步至新增副本节点中。
1、扩容副本的大致步骤
- 在扩容副本节点中修改配置,将集群配置中添加当前副本节点
- 启动扩容副本节点节点,并创建相关复制表(此时该副本节点查询请求可正常路由选择所有的副本节点,但原副本节点配置文件未刷新,只能路由选择原有副本节点)
- 原副本节点修改配置文件,将集群配置中添加新增副本节点信息
2、案例测试
1)扩容前配置
-- 配置文件
<clickhouse_remote_servers>
<!-- Test only shard config for testing distributed storage -->
<shard1_repl1>
<shard>
<!-- Optional. Shard weight when writing data. Default: 1. -->
<weight>1</weight>
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
<internal_replication>true</internal_replication>
<replica>
<host>sdw1</host>
<port>9000</port>
</replica>
</shard>
</shard1_repl1>
</clickhouse_remote_servers>
-- sdw1集群信息
sdw1 :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard1_repl1 │ 1 │ 1 │ 1 │ sdw1 │ 172.16.104.12 │ 9000 │ 1 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
1 rows in set. Elapsed: 0.005 sec.
-- sdw1上复制表信息
sdw1 :) show tables;
SHOW TABLES
┌─name─┐
│ tt1 │
└──────┘
1 rows in set. Elapsed: 0.007 sec.
sdw1 :) select * from tt1 order by id;
SELECT *
FROM tt1
ORDER BY id ASC
┌─id─┬─name─┬─create_date─┐
│ 4 │ ww │ 2020-01-02 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 6 │ dsk │ 2020-07-20 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 19 │ bw │ 2021-02-18 │
└────┴──────┴─────────────┘
3 rows in set. Elapsed: 0.012 sec.
2)修改配置文件
<clickhouse_remote_servers>
<!-- Test only shard config for testing distributed storage -->
<shard1_repl1>
<shard>
<!-- Optional. Shard weight when writing data. Default: 1. -->
<weight>1</weight>
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
<internal_replication>true</internal_replication>
<replica>
<host>sdw1</host>
<port>9000</port>
</replica>
<!-- 将新增的副本节点添加至集群配置中 -->
<replica>
<host>sdw2</host>
<port>9000</port>
</replica>
</shard>
</shard1_repl1>
</clickhouse_remote_servers>
<!-- 新增副本节点按照规律填写macros信息 -->
<macros>
<layer>01</layer>
<shard>01</shard>
<replica>cluster01-01-2</replica>
</macros>
3)修改sdw2节点配置信息后,启动新增副本节点ck服务,并手动创建相关表结构。此时对于sdw2节点来讲,集群的副本信息是完整的,可正常路由到任意节点,当时对于sdw1节点来讲,由于配置文件还未刷新,集群副本仍然还是只有sdw1一个。
-- 启动sdw2节点
# systemctl restart clickhouse-server
-- 在sdw2节点上手动创建表结构
sdw2 :) create table db1.tt1 (`id` Int32,`name` String,`create_date` Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/tt1', '{replica}') PARTITION BY toYYYYMM(create_date) ORDER BY id SETTINGS index_granularity = 8192;
sdw2 :) create table db1.tt2 on cluster shard1_repl1 (`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/tt2', '{replica}') ORDER BY id SETTINGS index_granularity = 8192;
-- sdw1上ck集群仍然还是1节点
sdw1 :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard1_repl1 │ 1 │ 1 │ 1 │ sdw1 │ 172.16.104.12 │ 9000 │ 1 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
1 rows in set. Elapsed: 0.006 sec.
-- sdw2上ck集群信息已经刷新为扩容之后的状态
sdw2 :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard1_repl1 │ 1 │ 1 │ 1 │ sdw1 │ 172.16.104.12 │ 9000 │ 0 │ default │ │ 0 │ 0 │
│ shard1_repl1 │ 1 │ 1 │ 2 │ sdw2 │ 172.16.104.13 │ 9000 │ 1 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
2 rows in set. Elapsed: 0.011 sec.
4)检查sdw2节点数据是否可正常复制
sdw2 :) select * from tt1 order by id;
SELECT *
FROM tt1
ORDER BY id ASC
┌─id─┬─name─┬─create_date─┐
│ 4 │ ww │ 2020-01-02 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 6 │ dsk │ 2020-07-20 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 19 │ bw │ 2021-02-18 │
└────┴──────┴─────────────┘
3 rows in set. Elapsed: 0.011 sec.
5)修改sdw1节点配置文件检查配置生效情况
-- 检查sdw1的ck服务集群信息,集群信息已经刷新为扩容后信息
sdw1 :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard1_repl1 │ 1 │ 1 │ 1 │ sdw1 │ 172.16.104.12 │ 9000 │ 1 │ default │ │ 0 │ 0 │
│ shard1_repl1 │ 1 │ 1 │ 2 │ sdw2 │ 172.16.104.13 │ 9000 │ 0 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
2 rows in set. Elapsed: 0.010 sec.
5)检查sdw1集群服务信息
-- sdw2正常写入数据
sdw1 :) select * from tt1 order by id;
SELECT *
FROM tt1
ORDER BY id ASC
┌─id─┬─name─┬─create_date─┐
│ 1 │ aa │ 2020-01-04 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 4 │ ww │ 2020-01-02 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 6 │ dsk │ 2020-07-20 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 19 │ bw │ 2021-02-18 │
└────┴──────┴─────────────┘
4 rows in set. Elapsed: 0.015 sec.
-- sdw1检查数据
sdw1 :) select * from tt1 order by id;
SELECT *
FROM tt1
ORDER BY id ASC
┌─id─┬─name─┬─create_date─┐
│ 1 │ aa │ 2020-01-04 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 4 │ ww │ 2020-01-02 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 6 │ dsk │ 2020-07-20 │
└────┴──────┴─────────────┘
┌─id─┬─name─┬─create_date─┐
│ 19 │ bw │ 2021-02-18 │
└────┴──────┴─────────────┘
4 rows in set. Elapsed: 0.015 sec.
2.2 扩容分片
1、扩容分片的大致步骤
方案一(历史数据重分布):
- 在原分片节点、新增分片节点中新增一个集群,新集群为我们扩容后所有节点的信息
- 在原分片节点上,创建与历史表结构一致的新表table_bak,需要留意engine、以及cluster的选择
- 通过快照表的方式,将原集群的分布式表数据备份迁移至新集群对应的分布式表中,该步骤会自动对表数据进行分片的路由选择
- 通过rename将新集群的本地表table_bak替换为table,并新建分布式表,完成数据的重分布
方法二(不迁移历史数据):
- 无论是新增分片节点还是原分片节点,直接在原集群中加入新增分片的信息
- 在新增分片节点中手动创建本地表、分布式表,在原分片节点重建分布式表语句
- 历史数据仍然保留再原分片节点中,扩容后新的数据写入正常路由至集群各个分片
不迁移历史数据的情况,表尽量是有设置TTL,这样可以避免某一节点数据量倾斜巨大,导致单个节点负载过大。
2、案例测试
1)扩容前环境检查
-- 配置文件
<clickhouse_remote_servers>
<!-- Test only shard config for testing distributed storage -->
<shard2_repl0>
<shard>
<replica>
<host>mdw</host>
<port>9000</port>
</replica>
</shard>
</shard2_repl0>
</clickhouse_remote_servers>
-- 集群信息
mdw :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard2_repl0 │ 1 │ 1 │ 1 │ mdw │ 172.16.104.11 │ 9000 │ 1 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
2)新增集群配置
<clickhouse_remote_servers>
<!-- 新增分片节点不需要再配置该集群信息 -->
<shard2_repl0>
<shard>
<replica>
<host>mdw</host>
<port>9000</port>
</replica>
</shard>
</shard2_repl0>
<!-- 新增集群配置 -->
<shard2_repl0_new>
<shard>
<replica>
<host>mdw</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>sdw3</host>
<port>9000</port>
</replica>
</shard>
</shard2_repl0_new>
</clickhouse_remote_servers>
3)修改mdw、sdw3节点配置文件后,检查配置文件生效情况
-- mdw集群信息
mdw :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard2_repl0 │ 1 │ 1 │ 1 │ mdw │ 172.16.104.11 │ 9000 │ 1 │ default │ │ 0 │ 0 │
│ shard2_repl0_new │ 1 │ 1 │ 1 │ mdw │ 172.16.104.11 │ 9000 │ 1 │ default │ │ 0 │ 0 │
│ shard2_repl0_new │ 2 │ 1 │ 1 │ sdw3 │ 172.16.104.14 │ 9000 │ 0 │ default │ │ 0 │ 0 │
└──────────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
3 rows in set. Elapsed: 0.011 sec.
sdw3 :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard2_repl0_new │ 1 │ 1 │ 1 │ mdw │ 172.16.104.11 │ 9000 │ 0 │ default │ │ 0 │ 0 │
│ shard2_repl0_new │ 2 │ 1 │ 1 │ sdw3 │ 172.16.104.14 │ 9000 │ 1 │ default │ │ 0 │ 0 │
└──────────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
2 rows in set. Elapsed: 0.006 sec.
4)通过快照表的方式在新的集群下创建表,并进行数据备份迁移
mdw :) create table db1.t2_new_local on cluster shard2_repl0_new as db1.t2_local ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/t2_new_local', '{replica}') ORDER BY id;
mdw :) create table db1.t8_new_local on cluster shard2_repl0_new as db1.t8_local ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/t8_new_local', '{replica}') ORDER BY id;;
mdw :) create table db1.t2_new on cluster shard2_repl0_new as db1.t2 ENGINE = Distributed('shard2_repl0_new', 'db1', 't2_new_local', rand())
mdw :) create table db1.t8_new on cluster shard2_repl0_new as db1.t8 ENGINE = Distributed('shard2_repl0_new', 'db1', 't8_new_local', rand())
mdw :) insert into t2_new select * from t2;
mdw :) insert into t8_new select * from t8;
-- 检查数据分布
mdw :) select * from t2_new
SELECT *
FROM t2_new
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
└────┴──────┘
┌─id─┬─name─┐
│ 3 │ cc │
│ 4 │ dd │
│ 5 │ ee │
└────┴──────┘
5 rows in set. Elapsed: 0.013 sec.
mdw :) select * from t2_new_local;
SELECT *
FROM t2_new_local
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
└────┴──────┘
sdw3 :) select * from t2_new_local;
SELECT *
FROM t2_new_local
┌─id─┬─name─┐
│ 3 │ cc │
│ 4 │ dd │
│ 5 │ ee │
└────┴──────┘
3 rows in set. Elapsed: 0.006 sec.
5)通过rename对本地表进行名字替换,新建分布式表
mdw :) rename table db1.t2_local to db1.t2_bak_local,db1.t2_new_local to db1.t2_local;
mdw :) rename table db1.t2 to db1.t2_bak;
mdw :) rename table db1.t8_local to db1.t8_bak_local,db1.t8_new_local to db1.t8_local;
mdw :) rename table db1.t8 to db1.t8_bak;
sdw3 :) rename table db1.t2_local to db1.t2_bak_local,db1.t2_new_local to db1.t2_local;
sdw3 :) rename table db1.t2 to db1.t2_bak;
sdw3 :) rename table db1.t8_local to db1.t8_bak_local,db1.t8_new_local to db1.t8_local;
sdw3 :) rename table db1.t8 to db1.t8_bak;
mdw :) create table db1.t2(`id` Int32,`name` String) ENGINE = Distributed('shard2_repl0_new', 'db1', 't2_local', rand())
mdw :) create table db1.t8 on cluster shard2_repl0_new (`id` Int32,`name` String) ENGINE = Distributed('shard2_repl0_new', 'db1', 't8_local', rand())
mdw :) select * from t2;
SELECT *
FROM t2
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
└────┴──────┘
┌─id─┬─name─┐
│ 3 │ cc │
│ 4 │ dd │
│ 5 │ ee │
└────┴──────┘
5 rows in set. Elapsed: 0.032 sec.
mdw :) select * from t2_local;
SELECT *
FROM t2_local
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
└────┴──────┘
2 rows in set. Elapsed: 0.006 sec.
sdw3 :) select * from t2_local
SELECT *
FROM t2_local
┌─id─┬─name─┐
│ 3 │ cc │
│ 4 │ dd │
│ 5 │ ee │
└────┴──────┘
3 rows in set. Elapsed: 0.020 sec.
6)删除无效表
mdw :) drop table t2_bak_local on cluster shard2_repl0;
mdw :) drop table t2_bak on cluster shard2_repl0;
mdw :) drop table t8_bak_local on cluster shard2_repl0;
mdw :) drop table t8_bak on cluster shard2_repl0;
3、案例测试
1)扩容前环境检查
<clickhouse_remote_servers>
<shard2_repl0>
<shard>
<replica>
<host>mdw</host>
<port>9000</port>
</replica>
</shard>
</shard2_repl0>
</clickhouse_remote_servers>
mdw :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard2_repl0 │ 1 │ 1 │ 1 │ mdw │ 172.16.104.11 │ 9000 │ 1 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
1 rows in set. Elapsed: 0.008 sec.
2)所有分片节点修改配置信息,在原集群下添加新增分片节点信息
<clickhouse_remote_servers>
<shard2_repl0>
<shard>
<replica>
<host>mdw</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>sdw3</host>
<port>9000</port>
</replica>
</shard>
</shard2_repl0>
</clickhouse_remote_servers>
mdw :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard2_repl0 │ 1 │ 1 │ 1 │ mdw │ 172.16.104.11 │ 9000 │ 1 │ default │ │ 0 │ 0 │
│ shard2_repl0 │ 2 │ 1 │ 1 │ sdw3 │ 172.16.104.14 │ 9000 │ 0 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
2 rows in set. Elapsed: 0.006 sec.
sdw3 :) select * from system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard2_repl0 │ 1 │ 1 │ 1 │ mdw │ 172.16.104.11 │ 9000 │ 0 │ default │ │ 0 │ 0 │
│ shard2_repl0 │ 2 │ 1 │ 1 │ sdw3 │ 172.16.104.14 │ 9000 │ 1 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
2 rows in set. Elapsed: 0.006 sec.
3)在新增的分片节点下手动新建本地表和分布式表
sdw3 :) create table db1.t2_aa_local on cluster shard2_repl0 (`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/t2_aa_local', '{replica}') ORDER BY id;
sdw3 :) create table t2_aa(`id` Int32,`name` String) ENGINE = Distributed('shard2_repl0', 'db1', 't2_aa_local', rand())
mdw :) select * from t2_aa
SELECT *
FROM t2_aa
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
│ 4 │ dd │
│ 5 │ ee │
└────┴──────┘
5 rows in set. Elapsed: 0.016 sec.
mdw :) select * from t2_aa_local
SELECT *
FROM t2_aa_local
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
│ 4 │ dd │
│ 5 │ ee │
└────┴──────┘
5 rows in set. Elapsed: 0.005 sec.
sdw3 :) select * from t2_aa
SELECT *
FROM t2_aa
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
│ 4 │ dd │
│ 5 │ ee │
└────┴──────┘
5 rows in set. Elapsed: 0.016 sec.
sdw3 :) select * from t2_aa_local
SELECT *
FROM t2_aa_local
Ok.
0 rows in set. Elapsed: 0.004 sec.
4)新增数据写入
sdw3 :) insert into t2_aa values(6,'ff'),(7,'gg');
INSERT INTO t2_aa VALUES
Ok.
2 rows in set. Elapsed: 0.050 sec.
sdw3 :) select * from t2_aa_local
SELECT *
FROM t2_aa_local
┌─id─┬─name─┐
│ 6 │ ff │
└────┴──────┘
mdw :) select * from t2_aa_local
SELECT *
FROM t2_aa_local
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
│ 4 │ dd │
│ 5 │ ee │
└────┴──────┘
┌─id─┬─name─┐
│ 7 │ gg │
└────┴──────┘
6 rows in set. Elapsed: 0.008 sec.