一、规划
1、服务器配置
属性 | 详情 |
---|---|
服务器配置 | 4C8G |
操作系统版本 | CentOS Linux release 7.8.2003 (Core) |
内核版本 | 3.10.0-1127.el7.x86_64 |
ZK版本 | ZooKeeper-3.6.2 |
CK版本 | ClickHouse-20.9.5.5 |
2、ZK
1)服务器规划
主机 | IP | PORT |
---|---|---|
sdw1 | 172.16.104.12 | 2181 |
sdw2 | 172.16.104.13 | 2181 |
sdw3 | 172.16.104.14 | 2181 |
2)目录规划
软件 | 目录 |
---|---|
软件安装 | /usr/local/zookeeper |
数据 | /data/zookeeper/data |
配置文件 | /usr/local/zookeeper/conf/zoo.cfg |
3、CK
1)服务器规划
主机 | IP | http_port | tcp_port | interserver_http_port | ck集群 |
---|---|---|---|---|---|
sdw1 | 172.16.104.12 | 8123 | 9000 | 9009 | cluster01-01-1(shard1_repl1) |
sdw2 | 172.16.104.13 | 8123 | 9000 | 9009 | cluster01-01-2(shard1_repl1) |
sdw1 | 172.16.104.12 | 8123 | 9000 | 9009 | cluster02-01(shard2_repl0) |
sdw2 | 172.16.104.13 | 8123 | 9000 | 9009 | cluster02-02(shard2_repl0) |
2)目录规划
软件 | 目录 |
---|---|
clickhouse-server软件 | /etc/clickhouse-server |
clickhouse-click软件 | /etc/clickhouse-click |
配置文件 | /etc/clickhouse-server |
数据目录 | /data/clickhouse-server/data |
日志目录 | /data/clickhouse-server/logs |
二、安装JDK
# yum install java
三、安装Zookeeper
1、下载安装包
zk主要分为源码安装包与二进制安装包,以下是二进制安装包安装步骤:
# wget -c https://mirror.bit.edu.cn/apache/zookeeper/zookeeper-3.6.2/apache-zookeeper-3.6.2-bin.tar.gz
# tar xf apache-zookeeper-3.6.2-bin.tar.gz -C /usr/local/
# mv apache-zookeeper-3.6.2-bin zookeeper
2、修改配置文件
1)修改zoo.cfg配置文件
# cd /usr/local/zookeeper/conf
# cp -r ./zoo_sample.cfg zoo.cfg
# cat zoo.cfg | grep -v "^#" //修改coo.cfg文件如下
tickTime=2000 //客户端与zk服务器的超时时间、客户端会话的超时时间
initLimit=10
syncLimit=5
dataDir=/data/zookeeper/data //存储快照文件数据目录
dataLogDir=/data/zookeeper/logs //日志目录
clientPort=2181 //服务端监控TCP端口
server.1=sdw1:2888:3888 //server.${myid}=
server.2=sdw2:2888:3888
server.3=sdw3:2888:3888
# scp -r ./zoo.cfg sdw1:/usr/local/zookeeper/conf/zoo.cfg
# scp -r ./zoo.cfg sdw2:/usr/local/zookeeper/conf/zoo.cfg
2)修改myid文件
# vi /data/zookeeper/data/myid
1 //与zoo.cfg配置文件中对应,sdw为server.1,所以该处填写为1
3)配置环境变量
#Zookeeper Environment
export ZK_HOME=/usr/local/zookeeper
export PATH=$PATH:$ZK_HOME/bin
3、启动ZK
-- 后台启动zk
# zkServer.sh start
-- 查看zk状态
# zkServer.sh status
-- 前台启动zk并显示具体报错(若启动zk时报错可使用该方式启动排查问题)
# zkServer.sh start-foreground
四、安装部署CK
1、下载RPM安装包
下载可参考:https://clickhouse.tech/docs/zh/getting-started/install/#from-rpm-packages
-- 下载
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-client-20.9.5.5-2.noarch.rpm
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-common-static-20.9.5.5-2.x86_64.rpm
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-common-static-dbg-20.9.5.5-2.x86_64.rpm
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-server-20.9.5.5-2.noarch.rpm
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-test-20.9.5.5-2.noarch.rpm
-- 安装
# rpm -ihv clickhouse-common-static-20.9.5.5-2.x86_64.rpm
# rpm -ihv clickhouse-common-static-dbg-20.9.5.5-2.x86_64.rpm
# rpm -ihv clickhouse-server-20.9.5.5-2.noarch.rpm
# rpm -ihv clickhouse-client-20.9.5.5-2.noarch.rpm
-- 安装成功后有以下两目录
/etc/clickhouse-client/ /etc/clickhouse-server/
2、创建磁盘目录并授权
# mkdir -p /data/clickhouse-server/{access,data,format_schemas,logs,tmp,user_files}
# chown -R clickhouse:clickhouse /data/clickhouse-server
3、修改配置文件
1) config.xml
<yandex>
<!--日志-->
<logger>
<!-- Possible levels: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105 -->
<level>trace</level>
<log>/data/clickhouse-server/logs/clickhouse-server.log</log>
<errorlog>/data/clickhouse-server/logs/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<!--端口-->
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<interserver_http_port>9009</interserver_http_port>
<interserver_http_host>sdw1</interserver_http_host>
<!--监听-->
<listen_host>0.0.0.0</listen_host>
<!--连接相关-->
<max_connections>4096</max_connections>
<keep_alive_timeout>3</keep_alive_timeout>
<max_concurrent_queries>100</max_concurrent_queries>
<!--内存限制相关-->
<max_server_memory_usage>0</max_server_memory_usage>
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
<total_memory_profiler_step>4194304</total_memory_profiler_step>
<total_memory_tracker_sample_probability>0</total_memory_tracker_sample_probability>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<!--路径相关-->
<path>/data/clickhouse-server/data/</path>
<tmp_path>/data/clickhouse-server/tmp/</tmp_path>
<user_files_path>/data/clickhouse-server/user_files/</user_files_path>
<access_control_path>/data/clickhouse-server/access/</access_control_path>
<!--集群配置相关-->
<include_from>/etc/clickhouse-server/config.d/metrika.xml</include_from>
<remote_servers incl="clickhouse_remote_servers" />
<zookeeper incl="zookeeper-servers" optional="true" />
<macros incl="macros" optional="true" />
<yandex>
2)metrika.xml(集群配置文件)
<?xml version="1.0"?>
<yandex>
<!--zookeeper相关配置-->
<zookeeper-servers> <!--需要与config.xml中zookerper的incl名称匹配-->
<node index="1"> <!--node节点配置-->
<host>sdw1</host> <!--zk_host-->
<port>2181</port> <!--zk_port-->
</node>
<node index="2">
<host>sdw2</host>
<port>2181</port>
</node>
<node index="3">
<host>sdw3</host>
<port>2181</port>
</node>
</zookeeper-servers>
<!--clickhouse集群配置-->
<clickhouse_remote_servers>
<!-- Test only shard config for testing distributed storage -->
<shard1_repl1> <!--1分片1副本配置-->
<shard>
<!-- Optional. Shard weight when writing data. Default: 1. -->
<weight>1</weight>
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
<internal_replication>true</internal_replication>
<replica>
<host>sdw1</host> <!--副本节点host-->
<port>9000</port> <!--副本节点端口-->
</replica>
<replica> <!--若集群有其他副本,可继续配置replica-->
<host>sdw2</host>
<port>9000</port>
</replica>
</shard>
</shard1_repl1>
<shard2_repl0> <!--2分片0副本配置-->
<shard>
<replica>
<host>sdw1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>sdw2</host>
<port>9000</port>
</replica>
</shard>
</shard2_repl0>
<!--
<shard2_repl1> <!--2分片1副本配置-->
<shard>
<replica>
<host>sdw1</host>
<port>9000</port>
</replica>
<replica>
<host>sdw2</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>sdw3</host>
<port>9000</port>
</replica>
<replica>
<host>sdw4</host>
<port>9000</port>
</replica>
</shard>
</shard2_repl1>
-->
</clickhouse_remote_servers>
<!--复制标识,需根据实际修改-->
<macros> <!--1分片1副本配置-->
<layer>01</layer> <!--集群标识-->
<shard>01</shard> <!--分片标识-->
<replica>cluster01-01-1</replica> <!--副本标识-->
</macros>
<macros> <!--2分片0副本配置-->
<layer>02</layer> <!--集群标识-->
<shard>01</shard> <!--分片标识-->
<replica>cluster02-01-1</replica> <!--副本标识,对于shardNreplica0的集群,使用标识相同即可,方便后续分布式DDL使用{replica}-->
</macros>
<macros> <!--2分片1副本配置-->
<layer>03</layer> <!--集群标识-->
<shard>01</shard> <!--分片标识-->
<replica>cluster03-01-1</replica> <!--副本标识-->
</macros>
</yandex>
3)users.xml
<?xml version="1.0"?>
<yandex>
<!-- Profiles of settings. -->
<profiles>
<!-- Default settings. -->
<default> <!--默认default角色-->
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
</default>
<rw_role> <!--超级读写权限角色-->
<distributed_product_mode>allow</distributed_product_mode>
<constraints>
<max_memory_usage>
<min>5000000000</min>
<max>20000000000</max>
</max_memory_usage>
</constraints>
<readonly>0</readonly>
<allow_ddl>1</allow_ddl>
</rw_role>
<ro_role> <!--拥有set权限的读权限角色-->
<distributed_product_mode>allow</distributed_product_mode>
<constraints>
<max_memory_usage>
<min>5000000000</min>
<max>10000000000</max>
</max_memory_usage>
</constraints>
<readonly>2</readonly>
<allow_ddl>1</allow_ddl>
</ro_role>
</profiles>
<!-- Users and ACL. -->
<users>
<!-- If user name was not specified, 'default' user is used. -->
<default> <!--默认default账号-->
<password></password>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<!-- Settings profile for user. -->
<profile>default</profile>
<!-- Quota for user. -->
<quota>default</quota>
<!-- User can create other users and grant rights to them. -->
<!-- <access_management>1</access_management> -->
</default>
<sansi_rw> <!--超级读写权限账号-->
<!--echo -n 123 | openssl dgst -sha256-->
<password_sha256_hex>a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>rw_role</profile>
<quota>default</quota>
</sansi_rw>
<sansi_ro> <!--拥有set权限的只读账号-->
<!--echo -n 123 | openssl dgst -sha256-->
<password_sha256_hex>a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>ro_role</profile>
<quota>default</quota>
</sansi_ro>
</users>
<!-- Quotas. -->
<quotas>
<!-- Name of quota. -->
<default>
<!-- Limits for time interval. You could specify many intervals with different limits. -->
<interval>
<!-- Length of interval. -->
<duration>3600</duration>
<!-- No limits. Just calculate resource usage for time interval. -->
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</yandex>
4、启动服务
# systemctl start clickhouse-server //启动ck服务
# systemctl status clickhouse-server //检查ck服务状态
5、集群基本信息查看
-- 该信息是配置1节点2副本的情况
# clickhouse-client --host 172.16.104.12 --port 9000
ClickHouse client version 20.9.5.5 (official build).
Connecting to 172.16.104.12:9000 as user default.
Connected to ClickHouse server version 20.9.5 revision 54439.
sdw2 :) SELECT * FROM system.clusters;
SELECT *
FROM system.clusters
┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard1_repl1 │ 1 │ 1 │ 1 │ sdw1 │ 172.16.104.12 │ 9000 │ 0 │ default │ │ 0 │ 0 │
│ shard1_repl1 │ 1 │ 1 │ 2 │ sdw2 │ 172.16.104.13 │ 9000 │ 1 │ default │ │ 0 │ 0 │
│ shard2_repl0 │ 1 │ 1 │ 1 │ sdw1 │ 172.16.104.12 │ 9000 │ 0 │ default │ │ 0 │ 0 │
│ shard2_repl0 │ 2 │ 1 │ 1 │ sdw2 │ 172.16.104.13 │ 9000 │ 1 │ default │ │ 0 │ 0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘
4 rows in set. Elapsed: 0.009 sec.
sdw2 :) select * from system.zookeeper where path='/clickhouse'
SELECT *
FROM system.zookeeper
WHERE path = '/clickhouse'
┌─name───────┬─value─┬──────czxid─┬──────mzxid─┬───────────────ctime─┬───────────────mtime─┬─version─┬─cversion─┬─aversion─┬─ephemeralOwner─┬─dataLength─┬─numChildren─┬──────pzxid─┬─path────────┐
│ tables │ │ 8589938637 │ 8589938637 │ 2020-12-05 18:15:55 │ 2020-12-05 18:15:55 │ 0 │ 4 │ 0 │ 0 │ 0 │ 4 │ 8589939065 │ /clickhouse │
│ task_queue │ │ 8589934595 │ 8589934595 │ 2020-12-05 16:24:39 │ 2020-12-05 16:24:39 │ 0 │ 1 │ 0 │ 0 │ 0 │ 1 │ 8589934596 │ /clickhouse │
└────────────┴───────┴────────────┴────────────┴─────────────────────┴─────────────────────┴─────────┴──────────┴──────────┴────────────────┴────────────┴─────────────┴────────────┴─────────────┘
2 rows in set. Elapsed: 0.016 sec.
五、集群高可用使用
在CK集群的副本使用中,不需外特别的配置文件配置,我们可以利用replicated相关引擎并结合zk来完成所有功能。
1、基本语法
1)ReplicatedMergeTree 基本语法
engine = ReplicatedMergeTree('zk_path','replica_name')
- zk_path : 指定在zk中创建数据表的路径,配置模版为 /clickhouse/tables/ s h a r d / {shard}/ shard/{table_name},{shard}为分片编号、${table_name}为数据表名称
- replica_name : 副本名称
- 同一分片下不同副本的数据表,应该定义相同的zk_path、不同的replica_name
- 不同分片下不同副本的数据表,应该定义不同的zk_path、不同的replica_name
2)分布表基本语法
enginge = Distributed(cluster,database,table [,sharding_key])
- cluster : 集群名字,可由metrika.xml配置文件中的<clickhouse_remote_servers>查看
- database、table : 分布式表所对应的数据库以及local表名(做distribute表和local表的映射关系)
- sharding_key : 分片键,选填参数
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-0PldN3ji-1607320734632)(http://note.youdao.com/yws/res/73825/F1C3A6F834314F67B132FE3E6EA811F5)]
2、示例
1)0分片多副本模式创建表
表数据创建:
-- shard1-repl1,创建t1的复制表
-- /clickhouse/tables/01-01/image_label : 01-01 通过 集群标识-分片标识确定
-- cluster01-01-1 : 集群标识-分片标识:副本标识
sdw1 :) create database db2;
sdw1 :) CREATE TABLE t1(`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-01/t1','cluster01-01-1') ORDER BY id ;
-- shard1-repl2,创建t1的复制表
sdw2 :) create database db2;
sdw2 :) CREATE TABLE t1(`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-01/t1','cluster01-01-2') ORDER BY id ;
数据写入:
-- shard1-repl1
sdw1 :) insert into t3 values(1,'aa'),(2,'bb'),(3,'cc');
-- shard1-repl2
sdw2 :) insert into t3 values(4,'dd');
sdw2 :) insert into t3 values(5,'ee');
数据查询:
-- shard1-repl1
sdw1 :) select * from t1;
SELECT *
FROM t1
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
└────┴──────┘
┌─id─┬─name─┐
│ 4 │ dd │
└────┴──────┘
┌─id─┬─name─┐
│ 5 │ ee │
└────┴──────┘
5 rows in set. Elapsed: 0.009 sec.
-- shard1-repl2
sdw2 :) select * from t1;
SELECT *
FROM t1
┌─id─┬─name─┐
│ 5 │ ee │
└────┴──────┘
┌─id─┬─name─┐
│ 4 │ dd │
└────┴──────┘
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
└────┴──────┘
5 rows in set. Elapsed: 0.008 sec.
2)2分片0副本模式创建分布式表
表数据创建:
-- shard1
sdw1 :) CREATE TABLE t3(`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/02-01/t3','cluster02-01') ORDER BY id ;
sdw1 :) CREATE TABLE t3_all(`id` Int32,`name` String) ENGINE = Distributed(shard2_repl0,db2,t3,rand())
-- shard2
sdw2 :) CREATE TABLE t3(`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/02-02/t3','cluster02-02') ORDER BY id ;
sdw2 :) CREATE TABLE t3_all(`id` Int32,`name` String) ENGINE = Distributed(shard2_repl0,db2,t3,rand())
数据写入:
-- shard1
sdw1 :) insert into t3 values(1,'aa'),(2,'bb'),(3,'cc');
-- shard2
sdw2 :) insert into t3 values(4,'dd'),(5,'ee'),(6,'ff');
数据查询:
-- shard1
sdw1 :) select * from t3;
SELECT *
FROM t3
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
└────┴──────┘
3 rows in set. Elapsed: 0.006 sec.
sdw1 :) select * from t3_all;
SELECT *
FROM t3_all
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
└────┴──────┘
┌─id─┬─name─┐
│ 4 │ dd │
│ 5 │ ee │
│ 6 │ ff │
└────┴──────┘
6 rows in set. Elapsed: 0.019 sec.
-- shard2
sdw2 :) select * from t3;
SELECT *
FROM t3
┌─id─┬─name─┐
│ 4 │ dd │
│ 5 │ ee │
│ 6 │ ff │
└────┴──────┘
3 rows in set. Elapsed: 0.006 sec.
sdw2 :) select * from t3_all;
SELECT *
FROM t3_all
┌─id─┬─name─┐
│ 4 │ dd │
│ 5 │ ee │
│ 6 │ ff │
└────┴──────┘
┌─id─┬─name─┐
│ 1 │ aa │
│ 2 │ bb │
│ 3 │ cc │
└────┴──────┘
6 rows in set. Elapsed: 0.017 sec.