2.1 mariadb部署 - All controller
#每台controller安装,这里以controller160为例
yum install -y mariadb mariadb-server python3-PyMySQL
#安装galera相关的插件,利用galera搭建集群
yum install -y mariadb-server-galera mariadb-galera-common galera xinetd rsync
#启动数据库服务,并将其配置开机启动:
systemctl enable mariadb.service
systemctl start mariadb.service
#通过运行mysql_secure_installation脚本来重设密码,我这边设置的是devops:
mysql_secure_installation
#创建并编辑/etc/my.cnf.d/openstack.cnf文件(如果需要,备份/etc/my.cnf.d/中的现有配置文件),绑定地址设置为控制节点的管理IP地址,以允许其他节点通过管理网络进行访问:
[mysqld]
binlog_format = ROW
bind-address = 172.16.1.160
default-storage-engine = innodb
innodb_file_per_table = on
max_connections = 4096
collation-server = utf8_general_ci
character-set-server = utf8
[galera]
bind-address = 172.16.1.160
wsrep_provider = /usr/lib64/galera/libgalera_smm.so
wsrep_cluster_address ="gcomm://controller160,controller161,controller162"
wsrep_cluster_name = openstack-ussuri-cluster
wsrep_node_name = controller160
wsrep_node_address = 172.16.1.160
wsrep_on=ON
wsrep_slave_threads=4
wsrep_sst_method=rsync
wsrep_sst_auth=root:devops
default_storage_engine=InnoDB
#停止全部控制节点的mariadb服务,以controller160节点为例
[root@controller160 ~]# systemctl stop mariadb.service
#任选1个控制节点以如下方式启动mariadb服务,这里选择controller160节点
[root@controller160 ~]# /usr/libexec/mysqld --wsrep-new-cluster --user=root &
2020-06-16 14:36:28 0 [Note] /usr/libexec/mysqld (mysqld 10.3.17-MariaDB) starting as process 1785 ...
[root@controller160 ~]# ps -ef |grep mysql
root 553 132 0 16:05 pts/2 00:00:00 /usr/libexec/mysqld --wsrep-new-cluster --user=root
root 643 132 0 16:08 pts/2 00:00:00 grep --color=auto mysql
#其他控制节点加入mariadb集群,以controller161节点为例;
#启动后加入集群,controller161节点从controller160节点同步数据,也可同步查看mariadb日志/var/log/mariadb/mariadb.log
[root@controller161 ~]# systemctl start mariadb.service
[root@controller161 ~]# systemctl status mariadb.service
● mariadb.service - MariaDB 10.3 database server
Loaded: loaded (/usr/lib/systemd/system/mariadb.service; enabled; vendor preset: disabled)
Active: active (running) since Tue 2020-06-16 16:10:07 UTC; 9s ago
Docs: man:mysqld(8)
https://mariadb.com/kb/en/library/systemd/
Process: 803 ExecStartPost=/usr/libexec/mysql-check-upgrade (code=exited, status=0/SUCCESS)
Process: 587 ExecStartPre=/usr/libexec/mysql-prepare-db-dir mariadb.service (code=exited, status=0/SUCCESS)
Process: 563 ExecStartPre=/usr/libexec/mysql-check-socket (code=exited, status=0/SUCCESS)
Main PID: 624 (mysqld)
Status: "Taking your SQL requests now..."
Tasks: 38 (limit: 32767)
Memory: 200.5M
CGroup: /system.slice/mariadb.service
└─624 /usr/libexec/mysqld --basedir=/usr
Jun 16 16:10:05 controller161 rsyncd[738]: connect from controller160 (172.16.1.160)
Jun 16 16:10:05 controller161 rsyncd[738]: rsync to rsync_sst/./performance_schema from controller160 (172.16.1.160)
Jun 16 16:10:05 controller161 rsyncd[738]: receiving file list
Jun 16 16:10:05 controller161 rsyncd[738]: sent 48 bytes received 229 bytes total size 61
Jun 16 16:10:05 controller161 rsyncd[740]: connect from controller160 (172.16.1.160)
Jun 16 16:10:05 controller161 rsyncd[740]: rsync to rsync_sst/ from controller160 (172.16.1.160)
Jun 16 16:10:05 controller161 rsyncd[740]: receiving file list
Jun 16 16:10:05 controller161 rsyncd[740]: sent 48 bytes received 185 bytes total size 41
Jun 16 16:10:05 controller161 rsyncd[674]: sent 0 bytes received 0 bytes total size 0
Jun 16 16:10:07 controller161 systemd[1]: Started MariaDB 10.3 database server.
#重新启动controller160节点;
#启动前删除controller160节点的数据
[root@controller160 ~]# pkill -9 mysql
[root@controller160 ~]# rm -rf /var/lib/mysql/*
#注意以system unit方式启动mariadb服务时的权限
[root@controller160 ~]# chown mysql:mysql /var/run/mariadb/mariadb.pid
#启动后查看节点所在服务状态,controller160节点从controller162节点同步数据
[root@controller160 ~]# systemctl start mariadb.service
[root@controller160 run]# systemctl status mariadb.service
● mariadb.service - MariaDB 10.3 database server
Loaded: loaded (/usr/lib/systemd/system/mariadb.service; enabled; vendor preset: disabled)
Active: active (running) since Fri 2020-06-19 00:55:22 CST; 11s ago
Docs: man:mysqld(8)
https://mariadb.com/kb/en/library/systemd/
Process: 21791 ExecStartPost=/usr/libexec/mysql-check-upgrade (code=exited, status=0/SUCCESS)
Process: 21305 ExecStartPre=/usr/libexec/mysql-prepare-db-dir mariadb.service (code=exited, status=0/SUCCESS)
Process: 21281 ExecStartPre=/usr/libexec/mysql-check-socket (code=exited, status=0/SUCCESS)
Main PID: 21489 (mysqld)
Status: "Taking your SQL requests now..."
Tasks: 38 (limit: 23978)
Memory: 302.0M
CGroup: /system.slice/mariadb.service
└─21489 /usr/libexec/mysqld --basedir=/usr
Jun 19 00:55:16 controller160 rsyncd[21645]: sent 618 bytes received 2111179 bytes total size 2108561
Jun 19 00:55:16 controller160 rsyncd[21705]: connect from controller162 (172.16.1.162)
Jun 19 00:55:16 controller160 rsyncd[21705]: rsync to rsync_sst/./placement from controller162 (172.16.1.162)
Jun 19 00:55:16 controller160 rsyncd[21705]: receiving file list
Jun 19 00:55:16 controller160 rsyncd[21705]: sent 48 bytes received 216 bytes total size 61
Jun 19 00:55:19 controller160 rsyncd[21715]: connect from controller162 (172.16.1.162)
Jun 19 00:55:19 controller160 rsyncd[21715]: rsync to rsync_sst/ from controller162 (172.16.1.162)
Jun 19 00:55:19 controller160 rsyncd[21715]: receiving file list
Jun 19 00:55:19 controller160 rsyncd[21715]: sent 48 bytes received 186 bytes total size 43
Jun 19 00:55:22 controller160 systemd[1]: Started MariaDB 10.3 database server.
#查看集群状态
[root@controller160 ~]# mysql -uroot -p
Enter password:
MariaDB [(none)]> show status like "wsrep_cluster_size";
+--------------------+-------+
| Variable_name | Value |
+--------------------+-------+
| wsrep_cluster_size | 3 |
+--------------------+-------+
1 row in set (0.001 sec)
MariaDB [(none)]> SHOW status LIKE 'wsrep_ready';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| wsrep_ready | ON |
+---------------+-------+
1 row in set (0.001 sec)
#设置心跳检测clustercheck
#vim /usr/bin/clustercheck
#在任意控制节点创建clustercheck_user用户并赋权;
#注意账号/密码与脚本中的账号/密码对应,这里采用的是脚本默认的账号/密码,否则需要修改clustercheck脚本文件
[root@controller160 ~]# mysql -uroot -p
MariaDB [(none)]> GRANT PROCESS ON *.* TO 'clustercheckuser'@'localhost' IDENTIFIED BY 'clustercheckpassword!';
MariaDB [(none)]> FLUSH PRIVILEGES;
#检测配置文件
# 在全部控制节点新增心跳检测服务配置文件/etc/xinetd.d/mysqlchk,以controller160节点为例
[root@controller01 ~]# wget https://codeload.github.com/olafz/percona-clustercheck/zip/master
[root@controller01 ~]#unzip master
[root@controller01 ~]#cd percona-clustercheck-master/
[root@controller160 percona-clustercheck-master]#cp clustercheck /usr/bin/
[root@controller160 percona-clustercheck-master]#touch /etc/xinetd.d/mysqlchk
[root@controller01 ~]# vim /etc/xinetd.d/mysqlchk
# default: on
# description: mysqlchk
service mysqlchk
{
port = 9200
disable = no
socket_type = stream
protocol = tcp
wait = no
user = root
group = root
groups = yes
server = /usr/bin/clustercheck
type = UNLISTED
per_source = UNLIMITED
log_on_success =
log_on_failure = HOST
flags = REUSE
}
#启动心跳检测服务
#修改/etc/services,变更tcp9200端口用途,以controller160节点为例
[root@controller160 ~]# vim /etc/services
#wap-wsp 9200/tcp # WAP connectionless session service
mysqlchk 9200/tcp # mysqlchk
# 启动xinetd服务,以controller160节点为例
[root@controller160 ~]# systemctl daemon-reload
[root@controller160 ~]# systemctl enable xinetd
[root@controller160 ~]# systemctl start xinetd
#测试心跳检测脚本
[root@controller160 ~]# /usr/bin/clustercheck
HTTP/1.1 200 OK
Content-Type: text/plain
Connection: close
Content-Length: 40
Percona XtraDB Cluster Node is synced.
2.2 RabbitMQ部署 - All controller
#OpenStack使用消息队列来协调服务之间的操作和状态信息。消息队列服务通常在控制节点上运行。OpenStack支持多个消息队列服务,包括RabbitMQ、Qpid和ZeroMQ。如果您希望实现不同的消息队列服务,请参考与之相关的文档。
#添加rabbitmq-erlang源
#vim /etc/yum.repos.d/rabbitmq-erlang.repo
[rabbitmq-erlang]
name=rabbitmq-erlang
baseurl=https://dl.bintray.com/rabbitmq-erlang/rpm/erlang/22/el/8/
gpgcheck=1
gpgkey=https://dl.bintray.com/rabbitmq/Keys/rabbitmq-release-signing-key.asc
repo_gpgcheck=0
enabled=1
#添加rabbitmq源
#vim /etc/yum.repos.d/rabbitmq.repo
[bintray-rabbitmq-server]
name=bintray-rabbitmq-rpm
baseurl=https://dl.bintray.com/rabbitmq/rpm/rabbitmq-server/v3.8.x/el/8/
gpgcheck=0
repo_gpgcheck=0
enabled=1
#安装rabbitmq
yum install -y erlang rabbitmq-server
#以controller160为例,启动rabbitmq,并将其配置开机启动:
systemctl enable rabbitmq-server.service
systemctl start rabbitmq-server.service
systemctl status rabbitmq-server.service
#构建rabbitmq集群
#任选1个控制节点首先启动rabbitmq服务,这里选择controller160节点
[root@controller160 ~]#systemctl start rabbitmq-server.service
[root@controller160 ~]#rabbitmqctl cluster_status
#分发.erlang.cookie
[root@controller160 ~]#scp /var/lib/rabbitmq/.erlang.cookie [email protected]:/var/lib/rabbitmq/
[root@controller160 ~]#scp /var/lib/rabbitmq/.erlang.cookie [email protected]:/var/lib/rabbitmq/
#修改controller161/162节点.erlang.cookie文件的用户/组,以controller161节点为例
[root@controller161 ~]#chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie
#注意修改全部控制节点.erlang.cookie文件的权限,默认即400权限,可不修改
[root@controller161 ~]#ll /var/lib/rabbitmq/.erlang.cookie
#启动controller161/162节点的rabbitmq服务
[root@controller161 ~]#systemctl restart rabbitmq-server
[root@controller162 ~]#systemctl restart rabbitmq-server
#构建集群,controller161/162节点以ram节点的形式加入集群
[root@controller161 ~]#rabbitmqctl stop_app
[root@controller161 ~]#rabbitmqctl join_cluster --ram rabbit@controller160
[root@controller161 ~]#rabbitmqctl start_app
#检查是否创建成功
[root@controller160 ~]# rabbitmqctl cluster_status
warning: the VM is running with native name encoding of latin1 which may cause Elixir to malfunction as it expects utf8. Please ensure your locale is set to UTF-8 (which can be verified by running "locale" in your shell)
Cluster status of node rabbit@controller160 ...
Basics
Cluster name: rabbit@controller160
Disk Nodes
rabbit@controller160
RAM Nodes
rabbit@controller161
rabbit@controller162
Running Nodes
rabbit@controller160
rabbit@controller161
rabbit@controller162
Versions
rabbit@controller160: RabbitMQ 3.8.5 on Erlang 22.3.4.1
rabbit@controller161: RabbitMQ 3.8.5 on Erlang 22.3.4.1
rabbit@controller162: RabbitMQ 3.8.5 on Erlang 22.3.4.1
Alarms
(none)
Network Partitions
(none)
Listeners
Node: rabbit@controller160, interface: [::], port: 25672, protocol: clustering, purpose: inter-node and CLI tool communication
Node: rabbit@controller160, interface: [::], port: 5672, protocol: amqp, purpose: AMQP 0-9-1 and AMQP 1.0
Node: rabbit@controller161, interface: [::], port: 25672, protocol: clustering, purpose: inter-node and CLI tool communication
Node: rabbit@controller161, interface: [::], port: 5672, protocol: amqp, purpose: AMQP 0-9-1 and AMQP 1.0
Node: rabbit@controller162, interface: [::], port: 25672, protocol: clustering, purpose: inter-node and CLI tool communication
Node: rabbit@controller162, interface: [::], port: 5672, protocol: amqp, purpose: AMQP 0-9-1 and AMQP 1.0
Feature flags
Flag: implicit_default_bindings, state: enabled
Flag: quorum_queue, state: enabled
Flag: virtual_host_metadata, state: enabled
#添加openstack用户,关设置密码为rabbitmq.123:
rabbitmqctl add_user rabbitmq rabbitmq.123
rabbitmqctl set_user_tags rabbitmq administrator
#允许openstack用户的配置、写和读访问:
rabbitmqctl set_permissions -p "/" rabbitmq ".*" ".*" ".*"
#查看账号
rabbitmqctl list_users
#镜像队列ha
# 设置镜像队列高可用
rabbitmqctl set_policy ha-all "^" '{"ha-mode":"all"}'
# 查看镜像队列策略
rabbitmqctl list_policies
#安装web管理插件
# 在全部控制节点安装web管理插件,以controller160节点为例
rabbitmq-plugins enable rabbitmq_management
访问任意节点,如:http://172.16.1.160:15672
2.3 Memcached部署 - All controller
#身份服务验证机制使用Memcached来缓存令牌。memcached服务通常在控制节点上运行。
yum install -y memcached python3-memcached
#将服务配置为控制节点的管理IP地址。这是为了允许其他节点通过管理网络访问:
#vim /etc/memcached.conf
sed -i 's|127.0.0.1,::1|0.0.0.0|g' /etc/sysconfig/memcached
#启动Memcached服务,并配置为开机启动:
systemctl enable memcached.service
systemctl start memcached.service
systemctl status memcached.service
2.4 Etcd部署 - All controller
#OpenStack服务可以使用Etcd,这是一种分布式可靠的键值存储,用于分布式密钥锁定、存储配置、跟踪服务生存周期和其他场景
yum install -y etcd
#修改配置文件为控制节点的管理IP地址,使其他节点能够通过管理网络进行访问:
#vim /etc/etcd/etcd.conf
ETCD_NAME="controller160"
ETCD_DATA_DIR="/var/lib/etcd"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-01"
ETCD_INITIAL_CLUSTER="controller160=http://172.16.1.160:2380,controller161=http://172.16.1.161:2380,controller162=http://172.16.1.162:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.16.1.160:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://172.16.1.160:2379"
ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
ETCD_LISTEN_CLIENT_URLS="http://172.16.1.160:2379,http://127.0.0.1:2379"
#修改 etcd.service
#vim /usr/lib/systemd/system/etcd.service
[Service]
Type=notify
WorkingDirectory=/var/lib/etcd/
EnvironmentFile=-/etc/etcd/etcd.conf
User=etcd
# set GOMAXPROCS to number of processors
ExecStart=/bin/bash -c "GOMAXPROCS=$(nproc) /usr/bin/etcd \
--name=\"${ETCD_NAME}\" \
--data-dir=\"${ETCD_DATA_DIR}\" \
--listen-peer-urls=\"${ETCD_LISTEN_PEER_URLS}\" \
--listen-client-urls=\"${ETCD_LISTEN_CLIENT_URLS}\" \
--initial-advertise-peer-urls=\"${ETCD_INITIAL_ADVERTISE_PEER_URLS}\" \
--advertise-client-urls=\"${ETCD_ADVERTISE_CLIENT_URLS}\" \
--initial-cluster=\"${ETCD_INITIAL_CLUSTER}\" \
--initial-cluster-token=\"${ETCD_INITIAL_CLUSTER_TOKEN}\" \
--initial-cluster-state=\"${ETCD_INITIAL_CLUSTER_STATE}\""
Restart=on-failure
LimitNOFILE=65536
#依次启动每个节点的etcd服务,并配置为开机启动:
systemctl enable etcd
systemctl start etcd
systemctl status etcd
#验证etcd
[root@controller160 ~]# etcdctl cluster-health
member c8750a07a2008c34 is healthy: got healthy result from http://172.16.1.161:2379
member ddc8bcfe3f80cd5d is healthy: got healthy result from http://172.16.1.160:2379
member fc456eb6d491b837 is healthy: got healthy result from http://172.16.1.162:2379
cluster is healthy
[root@controller160 ~]# etcdctl member list
c8750a07a2008c34: name=controller161 peerURLs=http://172.16.1.161:2380 clientURLs=http://172.16.1.161:2379 isLeader=false
ddc8bcfe3f80cd5d: name=controller160 peerURLs=http://172.16.1.160:2380 clientURLs=http://172.16.1.160:2379 isLeader=true
fc456eb6d491b837: name=controller162 peerURLs=http://172.16.1.162:2380 clientURLs=http://172.16.1.162:2379 isLeader=false
至此,基础服务已部署完毕,如有问题请联系我改正,感激不尽!
X.过程中遇到的问题
eg.1.Error: Failed to synchronize cache for repo 'AppStream'
解决方案:dnf upgrade --releasever=8
eg.2.2020-06-16 13:58:50 0 [ERROR] mysqld: Server GSSAPI error (major 851968, minor 2529639093) : gss_acquire_cred failed -Unspecified GSS failure. Minor code may provide more information. Keytab FILE:/etc/krb5.keytab is nonexistent or empty.
2020-06-16 13:58:50 0 [ERROR] Plugin 'gssapi' init function returned error.
2020-06-16 13:58:50 0 [ERROR] Can't init tc log
2020-06-16 13:58:50 0 [ERROR] Aborting
rm -rf /var/lib/mysql/tc.log
eg3.异常处理:当机房突然停电,所有galera主机都非正常关机,来电后开机,会导致galera集群服务无法正常启动。如何处理?
第1步:开启galera集群的群主主机的mariadb服务。
第2步:开启galera集群的成员主机的mariadb服务。
异常处理:galera集群的群主主机和成员主机的mysql服务无法启动,如何处理?
解决方法一:第1步、删除garlera群主主机的/var/lib/mysql/grastate.dat状态文件
/bin/galera_new_cluster启动服务。启动正常。登录并查看wsrep状态。
第2步:删除galera成员主机中的/var/lib/mysql/grastate.dat状态文件
systemctl restart mariadb重启服务。启动正常。登录并查看wsrep状态。
解决方法二:第1步、修改garlera群主主机的/var/lib/mysql/grastate.dat状态文件中的0为1
/bin/galera_new_cluster启动服务。启动正常。登录并查看wsrep状态。
第2步:修改galera成员主机中的/var/lib/mysql/grastate.dat状态文件中的0为1
systemctl restart mariadb重启服务。启动正常。登录并查看wsrep状态。
eg4.[root@controller160 percona-clustercheck-master]# /usr/bin/clustercheck
HTTP/1.1 503 Service Unavailable
Content-Type: text/plain
Connection: close
Content-Length: 44
vim /usr/bin/clustercheck
MYSQL_USERNAME="${MYSQL_USERNAME:=-clustercheckuser}"
MYSQL_PASSWORD="${MYSQL_PASSWORD-clustercheckpassword!}"
修改成
MYSQL_USERNAME="${MYSQL_USERNAME:-clustercheckuser}"
MYSQL_PASSWORD="${MYSQL_PASSWORD:-clustercheckpassword!}"