通过一个小实验认识Linux vDSO

这里不再解释vDSO的概念,而直接谈其意义:

  • vDSO类似一个信息公告板,用户可以直取所需,而无需为此办理任何手续。
  • vDSO相当于内核直接暴露出来的一个C库,作为GLIBC的补充。

类似gettimeofday之类的调用,每次都陷入内核去拿一个时间戳,显得有点昂贵了,不如内核把时间戳放在一个公共的可以暴露给任何用户的地方,用户自己去看就行了,这是vDSO的典型用例。

为了简单化描述,我们关闭ASLR:

[root@localhost ~]# sysctl -w kernel.randomize_va_space=0

随便打开一个ping程序,获取其/proc/pid/smap中vdso的map区间:

7ffff7ffa000-7ffff7ffc000 r-xp 00000000 00:00 0                          [vdso]
Size:                  8 kB
...

我们将其dd出来:

[root@localhost ~]# dd if=/proc/3688/mem of=./vsdo.dd obs=1 bs=1 skip=140737354113024 count=8192

随后我们看看它是什么:

[root@localhost ~]# file ./vdso.dd
./vdso.dd: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, BuildID[sha1]=09be88363f7ca8b05e2cb54a82d16bec2e840186, stripped

那么,接下来可以objdump了,就像对待普通的动态链接库一样:

[root@localhost ~]# objdump -T vdso.dd

vdso.dd:     文件格式 elf64-x86-64

DYNAMIC SYMBOL TABLE:
ffffffffff700354 l    d  .eh_frame_hdr	0000000000000000              .eh_frame_hdr
ffffffffff700700  w   DF .text	000000000000059d  LINUX_2.6   clock_gettime
0000000000000000 g    DO *ABS*	0000000000000000  LINUX_2.6   LINUX_2.6
ffffffffff700ca0 g    DF .text	00000000000002d5  LINUX_2.6   __vdso_gettimeofday
ffffffffff700fa0 g    DF .text	000000000000003d  LINUX_2.6   __vdso_getcpu
ffffffffff700ca0  w   DF .text	00000000000002d5  LINUX_2.6   gettimeofday
ffffffffff700f80  w   DF .text	0000000000000016  LINUX_2.6   time
ffffffffff700fa0  w   DF .text	000000000000003d  LINUX_2.6   getcpu
ffffffffff700700 g    DF .text	000000000000059d  LINUX_2.6   __vdso_clock_gettime
ffffffffff700f80 g    DF .text	0000000000000016  LINUX_2.6   __vdso_time

看看,看看,里面竟都是些什么东西,竟是一些时间公告函数啊,这意味着如果你想获取时间,调这里的函数就好了,我们看看最简单的time系统调用是如何来获取时间的,下面是对待vdso.dd文件的objdump -D的结果:

ffffffffff700f80 <__vdso_time@@LINUX_2.6>:
ffffffffff700f80:   55                      push   %rbp
ffffffffff700f81:   48 85 ff                test   %rdi,%rdi
ffffffffff700f84:   48 8b 04 25 a8 f0 5f    mov    0xffffffffff5ff0a8,%rax
ffffffffff700f8b:   ff
ffffffffff700f8c:   48 89 e5                mov    %rsp,%rbp
ffffffffff700f8f:   74 03                   je     ffffffffff700f94 <__vdso_time@@LINUX_2.6+0x14>
ffffffffff700f91:   48 89 07                mov    %rax,(%rdi)
ffffffffff700f94:   5d                      pop    %rbp
ffffffffff700f95:   c3                      retq

很显然,并没有调用任何系统调用,而是直接从地址0xffffffffff5ff0a8处拿到了时间,那么地址0xffffffffff5ff0a8一定就是内核映射到用户态的时间公告板的位置了。

记住地址0xffffffffff5ff0a8,用户态的分析到此告一段落,我们进入内核去看一看。

首先从/proc/kallsyms中查到vdso的位置:

ffffffff81941000 D vdso_start
ffffffff819424b0 D vdso_end

其次我们找到内核时间公告板vsyscall_gtod_data的位置:

ffffffff81a75080 D vsyscall_gtod_data

我们看一下该公告板的值:

crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
  wall_time_sec = 1600912854
crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
  wall_time_sec = 1600912856
crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
  wall_time_sec = 1600912857

显然,公告板的wall_time_sec字段就是返回给time的值了。下面我们找到它的地址:

crash> struct vsyscall_gtod_data ffffffff81a75080 -o
struct vsyscall_gtod_data {
    
    
  [ffffffff81a75080] seqcount_t seq;
        struct {
    
    
            int vclock_mode;
            cycle_t cycle_last;
            cycle_t mask;
            u32 mult;
            u32 shift;
  [ffffffff81a75088] } clock;
  [ffffffff81a750a8] time_t wall_time_sec;
  [ffffffff81a750b0] u64 wall_time_snsec;
  [ffffffff81a750b8] u64 monotonic_time_snsec;
  [ffffffff81a750c0] time_t monotonic_time_sec;
  [ffffffff81a750c8] struct timezone sys_tz;
  [ffffffff81a750d0] struct timespec wall_time_coarse;
  [ffffffff81a750e0] struct timespec monotonic_time_coarse;
}

嗯,就是0xffffffff81a750a8了。它就是映射到0xffffffffff5ff0a8暴露给用户态的那个地址了。

我们接下来证实这一点:

  • 修改掉映射地址,返回给time调用以0.

我们再看公告板:

crash> struct vsyscall_gtod_data ffffffff81a75080
...
  sys_tz = {
    
    
    tz_minuteswest = 0,
    tz_dsttime = 0
  },

我们把sys_tz映射出去怎样,这个值是一直为0的,我们期望的就是time返回0.

为此,我们首先拿到sys_tz和wall_time_sec之间的偏移:

crash> eval ffffffff81a750c8-ffffffff81a750a8
hexadecimal: 20
    decimal: 32
      octal: 40

因此,我们只要把vdso的time函数代码改掉即可:

ffffffffff700f84:   48 8b 04 25 a8 f0 5f    mov    0xffffffffff5ff0a8,%rax

改为:

ffffffffff700f84:   48 8b 04 25 c8 f0 5f    mov    0xffffffffff5ff0c8,%rax

即将time函数的第8个字节,0xa8改成0xc8即可:

通过模式匹配,可以拿到time函数在vdso页面的偏移:

     f80:   55                      push   rbp
     f81:   48 85 ff                test   rdi,rdi
     f84:   48 8b 04 25 a8 f0 5f    mov    rax,QWORD PTR ds:0xffffffffff5ff0a8
     f8b:   ff
     f8c:   48 89 e5                mov    rbp,rsp
     f8f:   74 03                   je     0xf94
     f91:   48 89 07                mov    QWORD PTR [rdi],rax
     f94:   5d                      pop    rbp
     f95:   c3                      ret

即0xf80.

那么0xffffffff81941f80便是time函数其地址了:

unsigned char *addr = (unsigned char *)0xffffffff81941f80;
addr[8] = 0xc8;

在修改之前,我们先编程验证:

#include <time.h>
#include <stdio.h>

typedef time_t  (*time_func)(time_t *);
int main(int argc, char *argv[])
{
    
    
    time_t tloc;
    // 直接从地址拿值
	unsigned long *p = (unsigned long *)0xffffffffff5ff0a8;
	// 通过函数拿值
    time_func func = (time_func)0x7ffff7ffaf80;

    func(&tloc);
    printf("%ld\n", tloc);
	printf("%lu\n", *p);
}

预期的结果应该是两种方式获取的是同一个值:

[root@localhost ~]# ./a.out
1600923922
1600923922
[root@localhost ~]# ./a.out
1600923923
1600923923
[root@localhost ~]#

下面将内核页面对应的指令修改之:

[root@localhost ~]# cat modtime.stp
#!/usr/local/bin/stap -g

function modtime(val:long)
%{
    
    
	unsigned char *addr = (unsigned char *)0xffffffff81941f80;
	unsigned char c = (unsigned char)STAP_ARG_val;

	addr[8] = c;
%}

probe begin
{
    
    
	modtime($1)
	exit()
}

执行之:

[root@localhost ~]# ./modtime.stp 0xc8
[root@localhost ~]# ./a.out
0
1600924228
[root@localhost ~]# ./a.out
0
1600924229
[root@localhost ~]# ./modtime.stp 0xa8
[root@localhost ~]# ./a.out
1600924238
1600924238
[root@localhost ~]#

当修改了vdso页面的指令后,所有调用time的进程都将异常,这是很显然的:

top - 08:00:00 up 42 min,  3 users,  load average: 0.00, 0.00, 0.00
Tasks: 114 total,   1 running, 113 sleeping,   0 stopped,   0 zombie
%Cpu(s):  0.0 us,  0.0 sy,  0.0 ni,100.0 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
KiB Mem :        0 total,        0 free,        0 used,        0 buff/cache
KiB Swap:        0 total,        0 free,        0 used.        0 avail Mem

  PID USER      PR  NI    VIRT    RES    SHR S %CPU %MEM     TIME+ COMMAND
    1 root      20   0   51696   3808   2492 S  0.0  inf   0:01.29 systemd
    2 root      20   0       0      0      0 S  0.0 -nan   0:00.00 kthreadd
    3 root      20   0       0      0      0 S  0.0 -nan   0:00.00 ksoftirqd/0
    7 root      rt   0       0      0      0 S  0.0 -nan   0:00.01 migration/0
    8 root      20   0       0      0      0 S  0.0 -nan   0:00.00 rcu_bh
    9 root      20   0       0      0      0 S  0.0 -nan   0:00.00 rcuob/0
   10 root      20   0       0      0      0 S  0.0 -nan   0:00.00 rcuob/1

值得一提的是,在vdso之前,vsyscall机制也是类似,只是说它仅仅提供了一种map,而没有抽象出动态链接的含义,因此也就无法享受ASLR带来的安全保护了。


浙江温州皮鞋湿,下雨进水不会胖。

猜你喜欢

转载自blog.csdn.net/dog250/article/details/108807183