版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/zhonglinzhang/article/details/82907357
StatefulSet和Deployment的区别
Deployment部署无状态服务,StatefulSet部署有状态服务
官方给出的建议是,如果部署的应用满足以下一个或多个部署需求,则建议使用StatefulSet
- 稳定的、唯一的网络标识。
- 稳定的、持久的存储。
- 有序的、优雅的部署和伸缩。
- 有序的、优雅的删除和停止。
- 有序的、自动的滚动更新。
稳定主要是针对Pod发生re-schedule后仍然要保持之前的网络标识和持久化存储。网络标识包括hostname、集群内DNS中该Pod对应的域名,并不能保证Pod re-schedule之后IP不变。如果使用Deployment,需要考虑滚动更新的过程中的参数控制(maxSurge、maxUnavailable)、每个应用的IP池预留造成的IP浪费等等问题
0. 从这里开始啊
// NewControllerInitializers is a public map of named controller groups (you can start more than one in an init func)
// paired to their InitFunc. This allows for structured downstream composition and subdivision.
func NewControllerInitializers() map[string]InitFunc {
controllers := map[string]InitFunc{}
controllers["statefulset"] = startStatefulSetController
return controllers
}
0.1 startStatefulsetController函数
- 初始化一个
StatefulSetController
对象。最后启动该对象的Run时,进入循环处理流程
func startStatefulSetController(ctx ControllerContext) (bool, error) {
if !ctx.AvailableResources[schema.GroupVersionResource{Group: "apps", Version: "v1beta1", Resource: "statefulsets"}] {
return false, nil
}
go statefulset.NewStatefulSetController(
ctx.InformerFactory.Core().V1().Pods(),
ctx.InformerFactory.Apps().V1beta1().StatefulSets(),
ctx.InformerFactory.Core().V1().PersistentVolumeClaims(),
ctx.InformerFactory.Apps().V1beta1().ControllerRevisions(),
ctx.ClientBuilder.ClientOrDie("statefulset-controller"),
).Run(1, ctx.Stop)
return true, nil
}
1. NewStatefulSetController函数
路径: pkg/controller/statefulset/stateful_set.go,主要ListWatch Pod和StatefulSet
1.1 创建stattefulsetController结构体,包括queue
ssc := &StatefulSetController{
kubeClient: kubeClient,
control: NewDefaultStatefulSetControl(
NewRealStatefulPodControl(
kubeClient,
setInformer.Lister(),
podInformer.Lister(),
pvcInformer.Lister(),
recorder),
NewRealStatefulSetStatusUpdater(kubeClient, setInformer.Lister()),
history.NewHistory(kubeClient, revInformer.Lister()),
),
pvcListerSynced: pvcInformer.Informer().HasSynced,
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "statefulset"),
podControl: controller.RealPodControl{KubeClient: kubeClient, Recorder: recorder},
revListerSynced: revInformer.Informer().HasSynced,
}
1.2 PodInformer注册了add update delete EventHandler,会将Pod对应的StatefulSet加入到queue中
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
// lookup the statefulset and enqueue
AddFunc: ssc.addPod,
// lookup current and old statefulset if labels changed
UpdateFunc: ssc.updatePod,
// lookup statefulset accounting for deletion tombstones
DeleteFunc: ssc.deletePod,
})
1.3 SetInformer注册了add update EventHandler,将StatefulSet加入到 queue中
setInformer.Informer().AddEventHandlerWithResyncPeriod(
cache.ResourceEventHandlerFuncs{
AddFunc: ssc.enqueueStatefulSet,
UpdateFunc: func(old, cur interface{}) {
oldPS := old.(*apps.StatefulSet)
curPS := cur.(*apps.StatefulSet)
if oldPS.Status.Replicas != curPS.Status.Replicas {
glog.V(4).Infof("Observed updated replica count for StatefulSet: %v, %d->%d", curPS.Name, oldPS.Status.Replicas, curPS.Status.Replicas)
}
ssc.enqueueStatefulSet(cur)
},
DeleteFunc: ssc.enqueueStatefulSet,
},
statefulSetResyncPeriod,
)
ssc.setLister = setInformer.Lister()
ssc.setListerSynced = setInformer.Informer().HasSynced
2. func (ssc *StatefulSetController) sync(key string) error
从 queue中pop一个StatefulSet对象,然后执行sync进行操作
2.1 调statefulset的Get API得到statefulSet对象
set, err := ssc.setLister.StatefulSets(namespace).Get(name)
if errors.IsNotFound(err) {
glog.Infof("StatefulSet has been deleted %v", key)
return nil
}
2.2 sync中
- 根据setLabel匹配出所有revisions、然后检查这些revisions中是否有OwnerReference为空的,如果有,那说明存在Orphaned的Revisions。
- 调用getPodsForStatefulSet获取StatefulSet应该管理的Pods
selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector)
if err != nil {
utilruntime.HandleError(fmt.Errorf("error converting StatefulSet %v selector: %v", key, err))
// This is a non-transient error, so don't retry.
return nil
}
if err := ssc.adoptOrphanRevisions(set); err != nil {
return err
}
pods, err := ssc.getPodsForStatefulSet(set, selector)
if err != nil {
return err
}
return ssc.syncStatefulSet(set, pods)
3.(ssc *defaultStatefulSetControl) UpdateStatefulSet(set *apps.StatefulSet, pods []*v1.Pod) error函数
3.1 获取currentRevision和updateRevision
// list all revisions and sort them
revisions, err := ssc.ListRevisions(set)
if err != nil {
return err
}
history.SortControllerRevisions(revisions)
// get the current, and update revisions
currentRevision, updateRevision, collisionCount, err := ssc.getStatefulSetRevisions(set, revisions)
if err != nil {
return err
}
3.2 updateStatefulSet函数
第四节讲解
// perform the main update function and get the status
status, err := ssc.updateStatefulSet(set, currentRevision, updateRevision, collisionCount, pods)
if err != nil {
return err
}
3.3 最终调用API更新statefulset状态
func (ssc *defaultStatefulSetControl) updateStatefulSetStatus(
set *apps.StatefulSet,
status *apps.StatefulSetStatus) error {
// complete any in progress rolling update if necessary
completeRollingUpdate(set, status)
// if the status is not inconsistent do not perform an update
if !inconsistentStatus(set, status) {
return nil
}
// copy set and update its status
set = set.DeepCopy()
if err := ssc.statusUpdater.UpdateStatefulSetStatus(set, status); err != nil {
return err
}
return nil
}
4. func (ssc *defaultStatefulSetControl) updateStatefulSet(
set *apps.StatefulSet,
currentRevision *apps.ControllerRevision,
updateRevision *apps.ControllerRevision,
collisionCount int32,
pods []*v1.Pod) (*apps.StatefulSetStatus, error)函数
4.1 获取currentRevision和updateRevision
并设置generation,currentRevision, updateRevision等信息到StatefulSet status
// get the current and update revisions of the set.
currentSet, err := ApplyRevision(set, currentRevision)
if err != nil {
return nil, err
}
updateSet, err := ApplyRevision(set, updateRevision)
if err != nil {
return nil, err
}
// set the generation, and revisions in the returned status
status := apps.StatefulSetStatus{}
status.ObservedGeneration = new(int64)
*status.ObservedGeneration = set.Generation
status.CurrentRevision = currentRevision.Name
status.UpdateRevision = updateRevision.Name
status.CollisionCount = new(int32)
*status.CollisionCount = collisionCount
4.2 pods分成两个slice
- replicas: 0 <= getOrdinal(pod) < set.Spec.Replicas
- condemned: set.Spec.Replicas <= getOrdinal(pod)
replicaCount := int(*set.Spec.Replicas)
// slice that will contain all Pods such that 0 <= getOrdinal(pod) < set.Spec.Replicas
replicas := make([]*v1.Pod, replicaCount)
// slice that will contain all Pods such that set.Spec.Replicas <= getOrdinal(pod)
condemned := make([]*v1.Pod, 0, len(pods))
unhealthy := 0
firstUnhealthyOrdinal := math.MaxInt32
4.3 将pod分为两个slice,replicas与condemned
// First we partition pods into two lists valid replicas and condemned Pods
for i := range pods {
status.Replicas++
// count the number of running and ready replicas
if isRunningAndReady(pods[i]) {
status.ReadyReplicas++
}
// count the number of current and update replicas
if isCreated(pods[i]) && !isTerminating(pods[i]) {
if getPodRevision(pods[i]) == currentRevision.Name {
status.CurrentReplicas++
} else if getPodRevision(pods[i]) == updateRevision.Name {
status.UpdatedReplicas++
}
}
if ord := getOrdinal(pods[i]); 0 <= ord && ord < replicaCount {
// if the ordinal of the pod is within the range of the current number of replicas,
// insert it at the indirection of its ordinal
replicas[ord] = pods[i]
} else if ord >= replicaCount {
// if the ordinal is greater than the number of replicas add it to the condemned list
condemned = append(condemned, pods[i])
}
// If the ordinal could not be parsed (ord < 0), ignore the Pod.
}
4.4 从repilcas和condemned中找出第一个unhealthy的Pod
ordinal最小的unhealth pod
// find the first unhealthy Pod
for i := range replicas {
if !isHealthy(replicas[i]) {
unhealthy++
if ord := getOrdinal(replicas[i]); ord < firstUnhealthyOrdinal {
firstUnhealthyOrdinal = ord
firstUnhealthyPod = replicas[i]
}
}
}
for i := range condemned {
if !isHealthy(condemned[i]) {
unhealthy++
if ord := getOrdinal(condemned[i]); ord < firstUnhealthyOrdinal {
firstUnhealthyOrdinal = ord
firstUnhealthyPod = condemned[i]
}
}
}
判断healthy:
4.5 DeletionTimestamp不空为删除的pod,直接返回,monotonic是否设置并行,为设置则为串行
// If the StatefulSet is being deleted, don't do anything other than updating
// status.
if set.DeletionTimestamp != nil {
return &status, nil
}
monotonic := !allowsBurst(set)
4.6 遍历valid replicas
- pod Failed (pod.Status.Phase = Failed), 删除并new这个pod object(注意revisions匹配)pod.Status.Phase = "",pod还没有recreate,则Create,ParallelPodManagement = OrderedReady,则直接返回当前status。ParallelPodManagement = Parallel,则for下一个
- pod正在删除且ParallelPodManagement = OrderedReady,返回status
- pod不是RunningAndReady状态,且ParallelPodManagement = OrderedReady,返回status
- pod与statefulset的identity和storage是否匹配,不匹配则调用apiserver Update Stateful Pod进行updateIdentity和updateStorage(创建PVC)
// Examine each replica with respect to its ordinal
for i := range replicas {
// delete and recreate failed pods
if isFailed(replicas[i]) {
if err := ssc.podControl.DeleteStatefulPod(set, replicas[i]); err != nil {
return &status, err
}
if getPodRevision(replicas[i]) == currentRevision.Name {
status.CurrentReplicas--
} else if getPodRevision(replicas[i]) == updateRevision.Name {
status.UpdatedReplicas--
}
status.Replicas--
replicas[i] = newVersionedStatefulSetPod(
currentSet,
updateSet,
currentRevision.Name,
updateRevision.Name,
i)
}
// If we find a Pod that has not been created we create the Pod
if !isCreated(replicas[i]) {
}
if isTerminating(replicas[i]) && monotonic {
}
if !isRunningAndReady(replicas[i]) && monotonic {
}
}
4.7 遍历condemned replicas,index由大到小都被删除
- 清理isTerminating状态pod.Status.Phase == v1.PodFailed
- DeleteStatefulPod调用API接口删除pod
for target := len(condemned) - 1; target >= 0; target-- {
// wait for terminating pods to expire
if isTerminating(condemned[target]) {
// block if we are in monotonic mode
if monotonic {
return &status, nil
}
continue
}
// if we are in monotonic mode and the condemned target is not the first unhealthy Pod block
if !isRunningAndReady(condemned[target]) && monotonic && condemned[target] != firstUnhealthyPod {
return &status, nil
}
if err := ssc.podControl.DeleteStatefulPod(set, condemned[target]); err != nil {
return &status, err
}
if getPodRevision(condemned[target]) == currentRevision.Name {
status.CurrentReplicas--
} else if getPodRevision(condemned[target]) == updateRevision.Name {
status.UpdatedReplicas--
}
if monotonic {
return &status, nil
}
}
4.8 UpdateStrategy Type是OnDelete
- 如果UpdateStrategy Type是OnDelete,只有Pods被手动删除后,才会触发Recreate
// for the OnDelete strategy we short circuit. Pods will be updated when they are manually deleted.
if set.Spec.UpdateStrategy.Type == apps.OnDeleteStatefulSetStrategyType {
return &status, nil
}
4.9
- RollingUpdate:Partition不设置就相当于0,相当于全部pods进行更新
- pod revision不是updateRevision且不是正在删除的,则删除这个pod
- pod不是healthy的,直接返回状态
// we compute the minimum ordinal of the target sequence for a destructive update based on the strategy.
updateMin := 0
if set.Spec.UpdateStrategy.RollingUpdate != nil {
updateMin = int(*set.Spec.UpdateStrategy.RollingUpdate.Partition)
}
// we terminate the Pod with the largest ordinal that does not match the update revision.
for target := len(replicas) - 1; target >= updateMin; target-- {
// delete the Pod if it is not already terminating and does not match the update revision.
if getPodRevision(replicas[target]) != updateRevision.Name && !isTerminating(replicas[target]) {
glog.V(4).Infof("StatefulSet %s/%s terminating Pod %s for update",
set.Namespace,
set.Name,
replicas[target].Name)
err := ssc.podControl.DeleteStatefulPod(set, replicas[target])
status.CurrentReplicas--
return &status, err
}
// wait for unhealthy Pods on update
if !isHealthy(replicas[target]) {
glog.V(4).Infof(
"StatefulSet %s/%s is waiting for Pod %s to update",
set.Namespace,
set.Name,
replicas[target].Name)
return &status, nil
}
}
5. newVersionedStatefulSetPod函数
- 更新策略是RollingUpdate且Partition为0或者ordinal < Partition,则使用currentRevision创建该Pod Object。
- 其余使用updateRevision
func newVersionedStatefulSetPod(currentSet, updateSet *apps.StatefulSet, currentRevision, updateRevision string, ordinal int) *v1.Pod {
if currentSet.Spec.UpdateStrategy.Type == apps.RollingUpdateStatefulSetStrategyType &&
(currentSet.Spec.UpdateStrategy.RollingUpdate == nil && ordinal < int(currentSet.Status.CurrentReplicas)) ||
(currentSet.Spec.UpdateStrategy.RollingUpdate != nil && ordinal < int(*currentSet.Spec.UpdateStrategy.RollingUpdate.Partition)) {
pod := newStatefulSetPod(currentSet, ordinal)
setPodRevision(pod, currentRevision)
return pod
}
pod := newStatefulSetPod(updateSet, ordinal)
setPodRevision(pod, updateRevision)
return pod
}