github: https://github.com/cri-o/cri-o
Compatibility matrix: CRI-O ⬄ Kubernetes
Version - Branch | Kubernetes branch/version | Maintenance status |
---|---|---|
CRI-O 1.12.x - release-1.12 | Kubernetes 1.12 branch, v1.12.x | = |
CRI-O 1.13.x - release-1.13 | Kubernetes 1.13 branch, v1.13.x | = |
CRI-O 1.14.x - release-1.14 | Kubernetes 1.14 branch, v1.14.x | = |
CRI-O 1.15.x - release-1.15 | Kubernetes 1.15 branch, v1.15.x | = |
CRI-O HEAD - master | Kubernetes master branch | ✓ |
参考: K8sMeetup社区
初始化 defaultStoreOptions
func init() { defaultStoreOptions.RunRoot = "/var/run/containers/storage" defaultStoreOptions.GraphRoot = "/var/lib/containers/storage" defaultStoreOptions.GraphDriverName = "" ReloadConfigurationFile(defaultConfigFile, &defaultStoreOptions) }
1. main
1.1 DefaultConfig 函数
路径: internal/lib/config/config.go,DefaultStoreOptions 函数如果是 root 权限(uid=0(root) gid=0(root) 组=0(root)),则配置文件在 /etc/containers/storage.conf,可以 man 5 containers-storage.conf 查看详细说明
// DefaultConfig returns the default configuration for crio.
func DefaultConfig() (*Config, error) {
storeOpts, err := storage.DefaultStoreOptions(rootless.IsRootless(), rootless.GetRootlessUID())
if err != nil {
return nil, err
}
1.1.1 结构体 Config
// Config represents the entire set of configuration values that can be set for
// the server. This is intended to be loaded from a toml-encoded config file.
type Config struct {
RootConfig
APIConfig
RuntimeConfig
ImageConfig
NetworkConfig
}
结构体包括 RootConfig,RunRoot 默认路径 /var/run/containers/storage,Root 默认路径 /var/lib/containers/storage
APIConifg,Listen GRPC server 监听地址为 /var/run/crio/crio.sock
默认 GRPCMaxSendMsgSize:16777216, GRPCMaxRecvMsgSize:16777216
return &Config{
RootConfig: RootConfig{
Root: storeOpts.GraphRoot,
RunRoot: storeOpts.RunRoot,
Storage: storeOpts.GraphDriverName,
StorageOptions: storeOpts.GraphDriverOptions,
LogDir: "/var/log/crio/pods",
},
APIConfig: APIConfig{
Listen: CrioSocketPath,
StreamAddress: "127.0.0.1",
StreamPort: "0",
GRPCMaxSendMsgSize: defaultGRPCMaxMsgSize,
GRPCMaxRecvMsgSize: defaultGRPCMaxMsgSize,
},
1.1.2 RuntimeConfig 配置
默认的 runtime 为 runc,
RuntimeConfig: RuntimeConfig{
DefaultRuntime: defaultRuntime,
Runtimes: Runtimes{
defaultRuntime: {
RuntimePath: "",
RuntimeType: defaultRuntimeType,
RuntimeRoot: DefaultRuntimeRoot,
},
},
Conmon: "",
ConmonEnv: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
},
ConmonCgroup: "pod",
SELinux: selinuxEnabled(),
SeccompProfile: "",
ApparmorProfile: DefaultApparmorProfile,
CgroupManager: cgroupManager,
DefaultMountsFile: "",
PidsLimit: DefaultPidsLimit,
ContainerExitsDir: containerExitsDir,
ContainerAttachSocketDir: ContainerAttachSocketDir,
LogSizeMax: DefaultLogSizeMax,
LogToJournald: DefaultLogToJournald,
DefaultCapabilities: DefaultCapabilities,
LogLevel: "error",
DefaultSysctls: []string{},
DefaultUlimits: []string{},
AdditionalDevices: []string{},
},
1.1.3 ImageConfig 配置
配置文件 /etc/crio/crio.conf,可以覆盖,默认的 docker://,pause 镜像 k8s.gcr.io/pause:3.1,
ImageConfig: ImageConfig{
DefaultTransport: defaultTransport,
GlobalAuthFile: "",
PauseImage: pauseImage,
PauseImageAuthFile: "",
PauseCommand: pauseCommand,
SignaturePolicyPath: "",
ImageVolumes: ImageVolumesMkdir,
Registries: []string{},
InsecureRegistries: []string{},
},
1.1.4 NetworkConfig 配置
配置 cni 默认路径为 /etc/cni/net.d/,默认的二进制可执行文件路径为 /opt/cni/bin/
NetworkConfig: NetworkConfig{
NetworkDir: cniConfigDir,
PluginDirs: []string{cniBinDir},
},
1. 2 建立 GRPC server,默认的监听地址为 /var/run/crio/crio.sock
lis, err := server.Listen("unix", config.Listen)
if err != nil {
logrus.Fatalf("failed to listen: %v", err)
}
grpcServer := grpc.NewServer(
grpc.UnaryInterceptor(log.UnaryInterceptor()),
grpc.StreamInterceptor(log.StreamInterceptor()),
grpc.MaxSendMsgSize(config.GRPCMaxSendMsgSize),
grpc.MaxRecvMsgSize(config.GRPCMaxRecvMsgSize),
)
1.3 server.New 函数实例化 Server
service, err := server.New(ctx, systemContext, configPath, config)
if err != nil {
logrus.Fatal(err)
}
2. New 函数
路径 server/server.go
// New creates a new Server with the provided context, systemContext,
// configPath and configuration
func New(
ctx context.Context,
systemContext *types.SystemContext,
configPath string,
configIface libconfig.Iface,
) (*Server, error) {
if configIface == nil || configIface.GetData() == nil {
return nil, fmt.Errorf("provided configuration interface or its data is nil")
}
config := configIface.GetData()
2.1 创建目录 ContainerAttachSocketDir ContainerExitsDir
ContainerAttachSocketDir: /var/run/crio
ContainerExitsDir:/var/run/crio/exits
if err := os.MkdirAll(config.ContainerAttachSocketDir, 0755); err != nil {
return nil, err
}
// This is used to monitor container exits using inotify
if err := os.MkdirAll(config.ContainerExitsDir, 0755); err != nil {
return nil, err
}
2.2 实例化 ContainerServer
第 3 章节讲解
containerServer, err := lib.New(ctx, systemContext, configIface)
if err != nil {
return nil, err
}
2.3 InitCNI 函数
调用 initCNI 初始化 cni 网络插件,initCNI 函数使用默认的 DefaultExec 执行命令,newWatcher 函数监控配置目录,如果有变更则重新加载网络配置
// InitCNI takes a binary directory in which to search for CNI plugins, and
// a configuration directory in which to search for CNI JSON config files.
// If no valid CNI configs exist, network requests will fail until valid CNI
// config files are present in the config directory.
// If defaultNetName is not empty, a CNI config with that network name will
// be used as the default CNI network, and container network operations will
// fail until that network config is present and valid.
func InitCNI(defaultNetName string, confDir string, binDirs ...string) (CNIPlugin, error) {
return initCNI(nil, "", defaultNetName, confDir, binDirs...)
}
3. New 函数
路径 internal/lib/container_server.go
// New creates a new ContainerServer with options provided
func New(ctx context.Context, systemContext *types.SystemContext, configIface libconfig.Iface) (*ContainerServer, error) {
if configIface == nil {
return nil, fmt.Errorf("provided config is nil")
}
store, err := configIface.GetStore()
if err != nil {
return nil, err
}
config := configIface.GetData()
3.1 configIface 接口
实现在 internal/lib/config/config.go
// GetStore returns the container storage for a given configuration
func (c *Config) GetStore() (cstorage.Store, error) {
return cstorage.GetStore(cstorage.StoreOptions{
RunRoot: c.RunRoot,
GraphRoot: c.Root,
GraphDriverName: c.Storage,
GraphDriverOptions: c.StorageOptions,
})
}
// GetData returns the Config of a Iface
func (c *Config) GetData() *Config {
return c
}
3.2 GetImageService 函数
实例化 imageService
// GetImageService returns an ImageServer that uses the passed-in store, and
// which will prepend the passed-in defaultTransport value to an image name if
// a name that's passed to its PullImage() method can't be resolved to an image
// in the store and can't be resolved to a source on its own.
func GetImageService(ctx context.Context, sc *types.SystemContext, store storage.Store, defaultTransport string, insecureRegistries, registries []string) (ImageServer, error) {
if store == nil {
var err error
storeOpts, err := storage.DefaultStoreOptions(rootless.IsRootless(), rootless.GetRootlessUID())
if err != nil {
return nil, err
}
store, err = storage.GetStore(storeOpts)
if err != nil {
return nil, err
}
}
is := &imageService{
store: store,
defaultTransport: defaultTransport,
indexConfigs: make(map[string]*indexInfo),
insecureRegistryCIDRs: make([]*net.IPNet, 0),
imageCache: make(map[string]imageCacheItem),
ctx: ctx,
}
3.3 设置 registries 配置
registries = [
'docker.io'
]
if len(registries) != 0 {
seenRegistries := make(map[string]bool, len(registries))
cleanRegistries := []string{}
for _, r := range registries {
if seenRegistries[r] {
continue
}
cleanRegistries = append(cleanRegistries, r)
seenRegistries[r] = true
}
is.unqualifiedSearchRegistries = cleanRegistries
} else {
systemRegistries, err := sysregistriesv2.UnqualifiedSearchRegistries(sc)
if err != nil {
return nil, err
}
is.unqualifiedSearchRegistries = systemRegistries
}
3.4 实例化 runtimeService,使用 image 服务
实现了接口 runtimeServer
// GetRuntimeService returns a RuntimeServer that uses the passed-in image
// service to pull and manage images, and its store to manage containers based
// on those images.
func GetRuntimeService(ctx context.Context, storageImageServer ImageServer) RuntimeServer {
return &runtimeService{
storageImageServer: storageImageServer,
ctx: ctx,
}
}
3.4.1 包裹了 runtime 接口
// RuntimeServer wraps up various CRI-related activities into a reusable // implementation. type RuntimeServer interface { // CreatePodSandbox creates a pod infrastructure container, using the // specified PodID for the infrastructure container's ID. In the CRI // view of things, a sandbox is distinct from its containers, including // its infrastructure container, but at this level the sandbox is // essentially the same as its infrastructure container, with a // container's membership in a pod being signified by it listing the // same pod ID in its metadata that the pod's other members do, and // with the pod's infrastructure container having the same value for // both its pod's ID and its container ID. // Pointer arguments can be nil. Either the image name or ID can be // omitted, but not both. All other arguments are required. CreatePodSandbox(systemContext *types.SystemContext, podName, podID, imageName, imageAuthFile, imageID, containerName, metadataName, uid, namespace string, attempt uint32, idMappings *idtools.IDMappings, labelOptions []string) (ContainerInfo, error) // RemovePodSandbox deletes a pod sandbox's infrastructure container. // The CRI expects that a sandbox can't be removed unless its only // container is its infrastructure container, but we don't enforce that // here, since we're just keeping track of it for higher level APIs. RemovePodSandbox(idOrName string) error // GetContainerMetadata returns the metadata we've stored for a container. GetContainerMetadata(idOrName string) (RuntimeContainerMetadata, error) // SetContainerMetadata updates the metadata we've stored for a container. SetContainerMetadata(idOrName string, metadata *RuntimeContainerMetadata) error // CreateContainer creates a container with the specified ID. // Pointer arguments can be nil. Either the image name or ID can be // omitted, but not both. All other arguments are required. CreateContainer(systemContext *types.SystemContext, podName, podID, imageName, imageID, containerName, containerID, metadataName string, attempt uint32, idMappings *idtools.IDMappings, labelOptions []string) (ContainerInfo, error) // DeleteContainer deletes a container, unmounting it first if need be. DeleteContainer(idOrName string) error // StartContainer makes sure a container's filesystem is mounted, and // returns the location of its root filesystem, which is not guaranteed // by lower-level drivers to never change. StartContainer(idOrName string) (string, error) // StopContainer attempts to unmount a container's root filesystem, // freeing up any kernel resources which may be limited. StopContainer(idOrName string) error // GetWorkDir returns the path of a nonvolatile directory on the // filesystem (somewhere under the Store's Root directory) which can be // used to store arbitrary data that is specific to the container. It // will be removed automatically when the container is deleted. GetWorkDir(id string) (string, error) // GetRunDir returns the path of a volatile directory (does not survive // the host rebooting, somewhere under the Store's RunRoot directory) // on the filesystem which can be used to store arbitrary data that is // specific to the container. It will be removed automatically when // the container is deleted. GetRunDir(id string) (string, error) }
以上乱七八糟的启动,启动前的配置初始化工作就略过
第4 章节开始分析创建一个 pod ,从 CRI 发送 GRPC RunPodSandbox 请求
// RunPodSandbox creates and runs a pod-level sandbox. func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { // platform dependent call return s.runPodSandbox(ctx, req) }
4. runPodSandbox 函数
路径 server/sandbox_run_linux.go
func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
const operation = "run_pod_sandbox"
defer func() {
recordOperation(operation, time.Now())
recordError(operation, err)
}()
4.1 CreatePodSandbox 创建 pod 的 sandbox
根据 RuntimeService 实现了 RuntimeServer 的接口,实现路径为 internal/pkg/storage/runtime.go
podContainer, err := s.StorageRuntimeServer().CreatePodSandbox(s.systemContext,
name, id,
s.config.PauseImage,
s.config.PauseImageAuthFile,
"",
containerName,
req.GetConfig().GetMetadata().GetName(),
req.GetConfig().GetMetadata().GetUid(),
namespace,
attempt,
s.defaultIDMappings,
labelOptions)
4.1.1 createContainerOrPodSandbox 函数
检验 pod 名,id,镜像名 id,容器名等不能为空
func (r *runtimeService) createContainerOrPodSandbox(systemContext *types.SystemContext, podName, podID, imageName, imageAuthFile, imageID, containerName, containerID, metadataName, uid, namespace string, attempt uint32, idMappings *idtools.IDMappings, labelOptions []string, isPauseImage bool) (ContainerInfo, error) {
var ref types.ImageReference
if podName == "" || podID == "" {
return ContainerInfo{}, ErrInvalidPodName
}
if imageName == "" && imageID == "" {
return ContainerInfo{}, ErrInvalidImageName
}
if containerName == "" {
return ContainerInfo{}, ErrInvalidContainerName
}
if metadataName == "" {
metadataName = containerName
}
4.1.2 CreateContainer 函数创建容器的结构体信息
实现为 github.com/containers/storage/store.go
rlstore.Create 创建一个 top 层,一个读写层
containerStore Create 方法创建容器的结构体信息 container *Container
container, err := r.storageImageServer.GetStore().CreateContainer(containerID, names, img.ID, "", string(mdata), &coptions)
if err != nil {
if metadata.Pod {
logrus.Debugf("failed to create pod sandbox %s(%s): %v", metadata.PodName, metadata.PodID, err)
} else {
logrus.Debugf("failed to create container %s(%s): %v", metadata.ContainerName, containerID, err)
}
return ContainerInfo{}, err
}
4.2 New 函数实例化 sandbox
// New creates and populates a new pod sandbox
// New sandboxes have no containers, no infra container, and no network namespaces associated with them
// An infra container must be attached before the sandbox is added to the state
func New(id, namespace, name, kubeName, logDir string, labels, annotations map[string]string, processLabel, mountLabel string, metadata *pb.PodSandboxMetadata, shmPath, cgroupParent string, privileged bool, runtimeHandler, resolvPath, hostname string, portMappings []*hostport.PortMapping, hostNetwork bool) (*Sandbox, error) {
sb := new(Sandbox)
sb.id = id
sb.namespace = namespace
sb.name = name
sb.kubeName = kubeName
sb.logDir = logDir
sb.labels = labels
sb.annotations = annotations
sb.containers = oci.NewMemoryStore()
sb.processLabel = processLabel
sb.mountLabel = mountLabel
sb.metadata = metadata
sb.shmPath = shmPath
sb.cgroupParent = cgroupParent
sb.privileged = privileged
sb.runtimeHandler = runtimeHandler
sb.resolvPath = resolvPath
sb.hostname = hostname
sb.portMappings = portMappings
sb.createdAt = time.Now()
sb.hostNetwork = hostNetwork
return sb, nil
}
4.3 使用 host network 模式
// set up namespaces
if hostNetwork {
err = g.RemoveLinuxNamespace(string(runtimespec.NetworkNamespace))
if err != nil {
return nil, err
}
}
4.4 为 sandbox 创建网络 namespace
else if s.config.ManageNetworkNSLifecycle {
// Create the sandbox network namespace
if err := sb.NetNsCreate(nil); err != nil {
return nil, err
}
defer func() {
if err == nil {
return
}
if netnsErr := sb.NetNsRemove(); netnsErr != nil {
log.Warnf(ctx, "Failed to remove networking namespace: %v", netnsErr)
}
}()
// Pass the created namespace path to the runtime
err = g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), sb.NetNsPath())
if err != nil {
return nil, err
}
}
5. CreateContainer 函数
// createContainerPlatform performs platform dependent intermediate steps before calling the container's oci.Runtime().CreateContainer()
func (s *Server) createContainerPlatform(container, infraContainer *oci.Container, cgroupParent string) error {
if s.defaultIDMappings != nil && !s.defaultIDMappings.Empty() {
rootPair := s.defaultIDMappings.RootPair()
for _, path := range []string{container.BundlePath(), container.MountPoint()} {
if err := os.Chown(path, rootPair.UID, rootPair.GID); err != nil {
return errors.Wrapf(err, "cannot chown %s to %d:%d", path, rootPair.UID, rootPair.GID)
}
if err := makeAccessible(path, rootPair.UID, rootPair.GID); err != nil {
return errors.Wrapf(err, "cannot make %s accessible to %d:%d", path, rootPair.UID, rootPair.GID)
}
}
}
return s.Runtime().CreateContainer(container, cgroupParent)
}
5.1 newRuntimeImpl 函数
默认的 runtime 为 OCI,但是可以设置为 vm
func (r *Runtime) newRuntimeImpl(c *Container) (RuntimeImpl, error) {
// Define the current runtime handler as the default runtime handler.
rh := r.config.Runtimes[r.config.DefaultRuntime]
// Override the current runtime handler with the runtime handler
// corresponding to the runtime handler key provided with this
// specific container.
if c.runtimeHandler != "" {
runtimeHandler, err := r.ValidateRuntimeHandler(c.runtimeHandler)
if err != nil {
return nil, err
}
rh = runtimeHandler
}
if rh.RuntimeType == RuntimeTypeVM {
return newRuntimeVM(rh.RuntimePath), nil
}
// If the runtime type is different from "vm", then let's fallback
// onto the OCI implementation by default.
return newRuntimeOCI(r, rh), nil
}
5.1.1 vm runtime 情况,需要实例化 vm OCI
runtimeVM 实现了 RuntimeImpl 接口,定义在 internal/oci/runtime_vm.go 中
// newRuntimeVM creates a new runtimeVM instance
func newRuntimeVM(path string) RuntimeImpl {
logrus.Debug("oci.newRuntimeVM() start")
defer logrus.Debug("oci.newRuntimeVM() end")
// FIXME: We need to register those types for now, but this should be
// defined as a specific package that would be shared both by CRI-O and
// containerd. This would allow shim implementation to import a single
// package to do the proper registration.
const prefix = "types.containerd.io"
// register TypeUrls for commonly marshaled external types
major := strconv.Itoa(rspec.VersionMajor)
typeurl.Register(&rspec.Spec{}, prefix, "opencontainers/runtime-spec", major, "Spec")
typeurl.Register(&rspec.Process{}, prefix, "opencontainers/runtime-spec", major, "Process")
typeurl.Register(&rspec.LinuxResources{}, prefix, "opencontainers/runtime-spec", major, "LinuxResources")
typeurl.Register(&rspec.WindowsResources{}, prefix, "opencontainers/runtime-spec", major, "WindowsResources")
return &runtimeVM{
path: path,
ctx: context.Background(),
ctrs: make(map[string]containerInfo),
}
}
5.1.2 实例化默认的 runc runtime
// newRuntimeOCI creates a new runtimeOCI instance
func newRuntimeOCI(r *Runtime, handler *config.RuntimeHandler) RuntimeImpl {
runRoot := config.DefaultRuntimeRoot
if handler.RuntimeRoot != "" {
runRoot = handler.RuntimeRoot
}
return &runtimeOCI{
Runtime: r,
path: handler.RuntimePath,
root: runRoot,
}
}
5.2 CreateContainer runtime 为 vm
// CreateContainer creates a container.
func (r *runtimeVM) CreateContainer(c *Container, cgroupParent string) (err error) {
logrus.Debug("runtimeVM.createContainer() start")
defer logrus.Debug("runtimeVM.createContainer() end")
// Lock the container
c.opLock.Lock()
defer c.opLock.Unlock()
5.2.1 startRuntimeDaemon 函数启动 runtime daemon 服务
func (r *runtimeVM) startRuntimeDaemon(c *Container) error {
logrus.Debug("runtimeVM.startRuntimeDaemon() start")
defer logrus.Debug("runtimeVM.startRuntimeDaemon() end")
// Prepare the command to run
args := []string{"-id", c.ID()}
if logrus.GetLevel() == logrus.DebugLevel {
args = append(args, "-debug")
}
args = append(args, "start")
5.3 runtimeOCI 的 CreateContainer 方法
启动的命令为 /usr/libexec/crio/conmon --syslog
-c 48f06b6121b1482b80bb8d2bbd69058ca207091cdd4833a5f28eea74897ed8dd 容器ID
-n k8s_POD_mysql-hostpath-7d97c48d75-czprl_default_91f6b4a4-bd99-11e9-9aea-3497f6d36b76_0
-u 48f06b6121b1482b80bb8d2bbd69058ca207091cdd4833a5f28eea74897ed8dd 容器UUID
-r /usr/sbin/runc runtime 运行路径
-b /var/run/containers/storage/overlay-containers/48f06b6121b1482b80bb8d2bbd69058ca207091cdd4833a5f28eea74897ed8dd/userdata
-p /var/run/containers/storage/overlay-containers/48f06b6121b1482b80bb8d2bbd69058ca207091cdd4833a5f28eea74897ed8dd/userdata/pidfile 容器PID文件
-l /var/log/pods/91f6b4a4-bd99-11e9-9aea-3497f6d36b76/48f06b6121b1482b80bb8d2bbd69058ca207091cdd4833a5f28eea74897ed8dd.log
--exit-dir /var/run/crio/exits --socket-dir-path /var/run/crio --log-level info --runtime-arg --root=/run/runc
// CreateContainer creates a container.
func (r *runtimeOCI) CreateContainer(c *Container, cgroupParent string) (err error) {
var stderrBuf bytes.Buffer
parentPipe, childPipe, err := newPipe()
childStartPipe, parentStartPipe, err := newPipe()
if err != nil {
return fmt.Errorf("error creating socket pair: %v", err)
}
defer parentPipe.Close()
defer parentStartPipe.Close()
6. PullImage 函数
func (svc *imageService) PullImage(systemContext *types.SystemContext, imageName string, inputOptions *copy.Options) (types.ImageReference, error) {
policy, err := signature.DefaultPolicy(systemContext)
if err != nil {
return nil, err
}
policyContext, err := signature.NewPolicyContext(policy)
if err != nil {
return nil, err
}
总结:
本文分析了启动 crio 服务,加载配置,服务主要包括 runtime 和 image,内容太多了略过
CreatePodSandbox 创建容器的配置信息
主要是生成配置文件,调用 conmon 启动命令,包括 runtime,主要逻辑在 kata-runtime 中