CPU Steal time

CPU Steal time 是指虚拟机vm需要执行某个命令但是没有抢到物理cpu来执行的时间.
其实现在arch/arm64/kernel/paravirt.c
这个特性对应一个kconfig CONFIG_PARAVIRT,开了这个能提高系统性能.
开了这个config后，这个特性就使能了，但是这个特性应该只在guest kernel中使用，host的kernel中则可以关掉这个特性
static bool steal_acc = true;
static int __init parse_no_stealacc(char *arg)
{
	steal_acc = false;
	return 0;
}

early_param("no-steal-acc", parse_no_stealacc);
关掉这个特性可以再命令行中加no-steal-acc 来关闭这个特性
这个特性的入口函数是pv_time_init
int __init pv_time_init(void)
{
	int ret;
#检查hyper层是否支持这个特性
	if (!has_pv_steal_clock())
		return 0;

	ret = pv_time_init_stolen_time();
	if (ret)
		return ret;
#最重要就是这个，设置一个回调函数
	pv_ops.time.steal_clock = pv_steal_clock;

	static_key_slow_inc(&paravirt_steal_enabled);
	if (steal_acc)
		static_key_slow_inc(&paravirt_steal_rq_enabled);

	pr_info("using stolen time PV\n");

	return 0;
}
设置回调函数，kernel中就会调用paravirt_steal_clock 来计算steal time

static inline u64 paravirt_steal_clock(int cpu)
{
	return pv_ops.time.steal_clock(cpu);
}


最终是调用steal_account_process_time 来计算steal time
static __always_inline u64 steal_account_process_time(u64 maxtime)
{
#ifdef CONFIG_PARAVIRT
	if (static_key_false(&paravirt_steal_enabled)) {
		u64 steal;

		steal = paravirt_steal_clock(smp_processor_id());
		steal -= this_rq()->prev_steal_time;
		steal = min(steal, maxtime);
		account_steal_time(steal);
		this_rq()->prev_steal_time += steal;

		return steal;
	}
#endif
	return 0;
}

那估计是在account_process_tick 中来调用steal_account_process_time 得到steal time后
就可以再account_system_time/account_idle_time 中来减去steal time.
void account_process_tick(struct task_struct *p, int user_tick)
{
	u64 cputime, steal;
	struct rq *rq = this_rq();

	if (vtime_accounting_enabled_this_cpu())
		return;

	if (sched_clock_irqtime) {
		irqtime_account_process_tick(p, user_tick, rq, 1);
		return;
	}

	cputime = TICK_NSEC;
	steal = steal_account_process_time(ULONG_MAX);

	if (steal >= cputime)
		return;

	cputime -= steal;

	if (user_tick)
		account_user_time(p, cputime);
	else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
		account_system_time(p, HARDIRQ_OFFSET, cputime);
	else
		account_idle_time(cputime);
}
这里减去steal time后，就通过top 看到更真实的cpu占用率，因为这里已经减去steal time了。

最后看看hyper层是怎么计算steal time的
这个代码在virt/kvm/arm/pvtime.c中
void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
{
	struct kvm *kvm = vcpu->kvm;
	u64 steal;
	__le64 steal_le;
	u64 offset;
	int idx;
	u64 base = vcpu->arch.steal.base;

	if (base == GPA_INVALID)
		return;

	/* Let's do the local bookkeeping */
	steal = vcpu->arch.steal.steal;
#这里很清楚看到steal time的计算公式
	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
	vcpu->arch.steal.steal = steal;

	steal_le = cpu_to_le64(steal);
	idx = srcu_read_lock(&kvm->srcu);
	offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
	kvm_put_guest(kvm, base + offset, steal_le, u64);
	srcu_read_unlock(&kvm->srcu, idx);
}
tiantao2012
原创文章 1453 获赞 74 访问量 151万+
关注他的留言板
猜你喜欢