Linux kernel中ktime_get()方法获取的当前时间比之前的时间晚的debug code

在Linux kernel的测试过程中，我们发现ktime_get()获得的当前时间比之前的时间还要晚，因此我们需要在一些debug，下面介绍一下debug过程中遇到的cpu同步的问题。
Linux kernel 中ktime_get()的实现如下：

ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base;
        u64 nsecs;

        WARN_ON(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get);

为了debug这个问题，我们对ktime_get()做如下修改：

static u64 last_ktime ;
ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base, cur_ktime;
        s64 nsecs;

        WARN_ON(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        cur_ktime = ktime_add_ns(base, nsecs);
        if (unlikely(last_ktime >= ktime_to_ns(cur_ktime))) {
                printk("failed to get ktime, last ktime is %llu, "
                        "current ktime is %llu, nsecs is %lld, "
                        "timekeeping_suspended is %d, tmp is %llu\n", *last_ktime_tmp,
                         ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
                        tmp);
                BUG();
        }
        last_ktime = ktime_to_ns(cur_ktime);

        return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);

则我们可能会引起Linux kernel panic，并得到如下信息：

[   41.502091] failed to get ktime, last ktime is 41502064202, current ktime is 41502064150

原因：
last_ktime是全局变量，所有的cpu1都是对同一个last_ktime进行操作。
假如有两个CPU：cpu0, cpu1，cpu0正在执行：

if (unlikely(last_ktime >= ktime_to_ns(cur_ktime))) {

期望cur_ktime的值与上一次cpu0取到的last_ktime的值进行比较，而cpu1刚好执行完：

last_ktime = ktime_to_ns(cur_ktime);

则全局变量last_ktime此时被最新的值更新了，因此就出现了last_ktime 大于 cur_ktime的情况。
因此我们又对ktime_get()方法做了如下修改（将last_ktime改为percpu变量）：

static DEFINE_PER_CPU(u64, last_ktime);
ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base, cur_ktime;
        s64 nsecs;
        u64 *last_ktime_tmp, tmp = 0;

        WARN_ON(timekeeping_suspended);

        last_ktime_tmp = this_cpu_ptr(&last_ktime);
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        tmp = ktime_to_ns(base);
        cur_ktime = ktime_add_ns(base, nsecs);
        if(ktime_to_ns(cur_ktime) > 20000000000)
        if (unlikely((*last_ktime_tmp) > ktime_to_ns(cur_ktime))) {
                printk("failed to get ktime, last ktime is %llu, "
                        "current ktime is %llu, nsecs is %lld, "
                        "timekeeping_suspended is %d, tmp is %llu\n", *last_ktime_tmp,
                         ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
                        tmp);
                BUG();
        }
        last_ktime_tmp = &ktime_to_ns(cur_ktime);

        return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);

将last_ktime改为percpu变量，我们解决了CPU之间的同步问题，但是又遇到其他的问题：
我们依旧假设当前有两个CPU：cpu0, cpu1，假设当前cpu操作ktime_get()的顺序如下：cpu0: ktime_get()—>cpu1:ktime_get()—>cpu0:ktime_get()，假设依次获得获得的last_ktime为：cpu0:last_ktime=41502064202—>cpu1:last_ktime=41502064302—>cpu0:last_ktime=41502064276,此时cpu1获取的最新的last_ktime比cpu0获取的最新的时间要大，但是因为last_ktime是percpu变量，因此我们并不能及时发现当前时间比之前的时间晚的问题。
最终我们选择使用如下修改，该修改可以准确判断当前时间比之前的时间晚的问题，同时又不会引发cpu同步问题：

static atomic64_t last_ktime;
ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base, cur_ktime;
        s64 nsecs;
        u64 last_ktime_tmp, tmp = 0;

        WARN_ON(timekeeping_suspended);

        last_ktime_tmp = atomic64_read(&last_ktime);
        barrier();
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        tmp = ktime_to_ns(base);
        cur_ktime = ktime_add_ns(base, nsecs);
        if (unlikely(last_ktime_tmp > ktime_to_ns(cur_ktime))) {
                printk("failed to get ktime, last ktime is %llu, "
                        "current ktime is %llu, nsecs is %lld, "
                        "timekeeping_suspended is %d, tmp is %llu\n", last_ktime_tmp,
                         ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
                        tmp);
                BUG();
        }
        atomic64_set(&last_ktime, ktime_to_ns(cur_ktime));

        return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);

Linux kernel中ktime_get()方法获取的当前时间比之前的时间晚的debug code

猜你喜欢