集群操作
启动集群
# 启动 mon 服务
sudo service ceph-mon@ceph1 start
# 启动 mgr 服务
sudo service ceph-mgr@ceph1 start
# 启动指定的 OSD 服务
sudo service ceph-osd@0 start
# 启动所有的 OSD 服务
sudo service ceph-osd@* start
# 启动 MDS 服务
sudo service ceph-mds@ceph1 start
查看 ceph 的实时运行状态
ceph -w
cluster:
id: 46634c97-5c3c-424b-b2d9-653a15849c61
health: HEALTH_OK
services:
mon: 1 daemons, quorum node1 (age 41m)
mgr: node1(active, since 41m)
osd: 3 osds: 3 up (since 41m), 3 in (since 3w)
data:
pools: 1 pools, 128 pgs
objects: 94 objects, 275 MiB
usage: 3.8 GiB used, 14 GiB / 18 GiB avail
pgs: 128 active+clean
卸载某个节点所有的 ceph 数据包
ceph-deploy purge node1 # 删除所有软件和数据
ceph-deploy purgedata node1 # 只删除数据
查看ceph存储空间
ceph df
RAW STORAGE:
CLASS SIZE AVAIL USED RAW USED %RAW USED
hdd 18 GiB 14 GiB 824 MiB 3.8 GiB 21.15
TOTAL 18 GiB 14 GiB 824 MiB 3.8 GiB 21.15
POOLS:
POOL ID PGS STORED OBJECTS USED %USED MAX AVAIL
rbd 1 128 260 MiB 94 787 MiB 5.47 4.4 GiB
为ceph创建一个 admin 用户并为 admin 用户创建一个密钥,把密钥保存到 /etc/ceph 目录下
ceph auth get-or-create client.admin mds 'allow *' osd 'allow *' mon 'allow *' mgr 'allow *' \
-o /etc/ceph/ceph.client.admin.keyring
为 osd.0 创建一个用户并创建一个key
ceph auth get-or-create mds.ceph1 mon 'allow profile mds' osd 'allow rwx' mds 'allow *' \
-o /var/lib/ceph/mds/ceph-node1/keyring
查看 ceph 集群中的认证用户及相关的 key
ceph auth list
查看集群的详细配置
ceph daemon mon.ceph1 config show | more
查看集群健康状态细节
ceph health detail
HEALTH_OK # 如果有故障或者警告的话,这里会输出很多。
MON 操作
查看 MON 的状态信息
ceph mon stat
e1: 1 mons at {
node1=[v2:10.0.0.131:3300/0,v1:10.0.0.131:6789/0]}, election epoch 21, leader 0 node1, quorum 0 node1
查看 MON 的选举状态
ceph quorum_status
{
"election_epoch":21,"quorum":[0],"quorum_names":["node1"],"quorum_leader_name":"node1","quorum_age":2619,"monmap":{
"epoch":1,"fsid":"46634c97-5c3c-424b-b2d9-653a15849c61","modified":"2021-01-29 17:59:58.311910","created":"2021-01-29 17:59:58.311910","min_mon_release":14,"min_mon_release_name":"nautilus","features":{
"persistent":["kraken","luminous","mimic","osdmap-prune","nautilus"],"optional":[]},"mons":[{
"rank":0,"name":"node1","public_addrs":{
"addrvec":[{
"type":"v2","addr":"10.0.0.131:3300","nonce":0},{
"type":"v1","addr":"10.0.0.131:6789","nonce":0}]},"addr":"10.0.0.131:6789/0","public_addr":"10.0.0.131:6789/0"}]}}
查看 MON 的映射信息
ceph mon dump
dumped monmap epoch 1
epoch 1
fsid 46634c97-5c3c-424b-b2d9-653a15849c61
last_changed 2021-01-29 17:59:58.311910
created 2021-01-29 17:59:58.311910
min_mon_release 14 (nautilus)
0: [v2:10.0.0.131:3300/0,v1:10.0.0.131:6789/0] mon.node1
删除一个 MON 节点
ceph mon remove ceph1
# 如果是部署节点,也可以使用 ceph-deploy 删除
ceph-deploy mon remove ceph1
获得一个正在运行的 mon map,并保存在指定的文件中
ceph mon getmap -o mon.txt
got monmap epoch 1
查看上面获得的 map
monmaptool --print mon.txt
ool: monmap file mon.txt
epoch 1
fsid 0862c251-2970-4329-b171-53a77d52b2d4
last_changed 2020-05-07 02:16:50.749480
created 2020-05-07 02:16:50.749480
0: 172.31.5.182:6789/0 mon.ceph1
把上面的mon map注入新加入的节点
ceph-mon -i ceph1 --inject-monmap mon.txt
查看 MON 的 amin socket
ceph-conf --name mon.ceph1 --show-config-value admin_socket
/var/run/ceph/ceph-mon.ceph1.asok
查看 MON 的详细状态
ceph daemon mon.ceph1 mon_status
OSD 操作
查看 ceph osd 运行状态
ceph osd stat
3 osds: 3 up (since 45m), 3 in (since 3w); epoch: e96
查看 osd 映射信息
ceph osd dump
查看 osd 的目录树
ceph osd tree
删除 OSD
# 1. down 掉 OSD
ceph osd down osd.0
# 2. 踢出集群
ceph osd out osd.0
# 3. 移除 OSD
ceph osd rm osd.0
# 4. 删除授权
ceph auth rm osd.0
# 5. 删除 crush map
ceph osd crush rm osd.0
设置 OSD 最大个数
# 获取 OSD 最大个数
ceph osd getmaxosd
# 设置 OSD 最大个数
ceph osd setmaxosd 10
设置OSD crush 的权重
ceph osd crush set 3 3.0 host=ceph2
set item id 3 name 'osd.3' weight 3 at location {
host=ceph2} to crush map
查看集群中某个 osd 的配置参数
ceph --admin-daemon /var/run/ceph/ceph-osd.0.asok config show | less
# 另一种方式
ceph -n osd.0 --show-config |grep objectstore
动态设置集群中 osd 的参数配置
# 设置单个 osd
ceph tell osd.0 injectargs "--osd_recovery_op_priority 63"
# 设置所有的 osd
ceph tell osd.* injectargs "--osd_recovery_op_priority 63"
MDS 操作
查看 MDS 状态
ceph mds stat
查看 MDS 的映射信息
ceph mds dump
删除 MDS 节点
# 删除第一个 MDS 节点
ceph mds rm 0
mds gid 0 dne
存储池操作
查看ceph集群中的pool数量
ceph osd lspools
创建存储池
ceph osd pool create testpool 128 128 # 128 指 PG 数量
为一个 ceph pool 配置配额
ceph osd pool set-quota testpool max_objects 10000
在集群中删除一个 pool
#首先要在 ceph.conf 文件中配置允许删除集群
mon_allow_pool_delete = true
#然后重启 MON 进程
sudo service ceph-mon@ceph1 restart
#在删除存储池
ceph osd pool delete testpool testpool --yes-i-really-really-mean-it
PG 操作
查看PG状态
ceph pg stat
查看pg组的映射信息
ceph pg dump
查看pg中stuck的状态
ceph pg dump_stuck unclean
ceph pg dump_stuck inactive
ceph pg dump_stuck stale
获取 pg_num / pgp_num
ceph osd pool get mytestpool pg_num
ceph osd pool get mytestpool pgp_num
设置 pg_num
ceph osd pool set mytestpool pg_num 512
ceph osd pool set mytestpool pgp_num 512
恢复一个丢失的pg
ceph pg {
pg-id} mark_unfound_lost revert
修复 pg 数据
ceph pg crush repair {
pg_id}
ceph pg repair {
pg_id}
显示非正常状态的pg
ceph pg dump_stuck inactive|unclean|stale