内核中的alternative宏

通过 alternative() 宏,内核可以在运行时,通过判断当前 CPU 是否支持某些 feature, 实现对内核代码的在线优化(不关机、不换内核的情况下在线改写某些内核指令),以达到加速内核执行的目的,下面以 x86 为例描述其大概的实现过程。
1)alternative() 宏定义

在 arch/x86/include/asm/alternative-asm.h 定义了 ALTERNATIVE() 宏,如下所示:

.macro ALTERNATIVE oldinstr, newinstr, feature
140:
    \oldinstr
141:
    .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
142:
    
    .pushsection .altinstructions,"a"
    altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
    .popsection
    
    .pushsection .altinstr_replacement,"ax"
143:
    \newinstr
144:
    .popsection
.endm

由于是根据 feature 改写指令,因此这里需要有 oldinstr 和 newinstr,还有 feature
ALTERNATIVE() 宏主要做了以下两项工作:
a、将一个 struct alt_instr 实例放入 vmlinux ELF 文件的 .altinstructions section,该结构体存储了指令改写所依赖的必要信息,如下所示:

struct alt_instr {
    s32 instr_offset;   /* original instruction */
    s32 repl_offset;    /* offset to replacement instruction */
    u16 cpuid;      /* cpuid bit set for replacement */
    u8  instrlen;       /* length of original instruction */
    u8  replacementlen; /* length of new instruction */
    u8  padlen;     /* length of build-time padding */
} __packed;

b、将上面的 newinstr 放入 .altinstr_replacement section

2)alternative() 宏调用

定义 alternative_input() 宏

#define alternative_input(oldinstr, newinstr, feature, input...)    \
    asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature)   \
        : : "i" (0), ## input)

在 ./arch/x86/include/asm/processor.h 文件调用 alternative_input()

alternative_input(BASE_PREFETCH, "prefetchnta %P1", X86_FEATURE_XMM, "m" (*(const char *)x));

3)在线改写指令
在 arch/x86/kernel/alternative.c 文件中调用了 apply_alternatives()函数,它实现了指令的在线改写:

apply_alternatives(__alt_instructions, __alt_instructions_end);

其中,__alt_instructions 和 __alt_instructions_end 分别是 .altinstructions section 的起始和结束位置,它们的引用方式是:

extern struct alt_instr __alt_instructions[], __alt_instructions_end[];

它们定义在 arch/x86/kernel/vmlinux.lds.S,如下所示:

/*  
 * struct alt_inst entries. From the header (alternative.h):
 * "Alternative instructions for different CPU types or capabilities"
 * Think locking instructions on spinlocks.
 */
 . = ALIGN(8);
 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
     __alt_instructions = .;
     *(.altinstructions)
     __alt_instructions_end = .;
  }

apply_alternatives() 的定义如下,大体流程是遍历 .altinstructions section 中的所有 struct alt_instr 实例,分别对其进行处理:

void __init_or_module noinline apply_alternatives(struct alt_instr *start,
                          struct alt_instr *end)
{
    struct alt_instr *a;
    u8 *instr, *replacement;
    u8 insn_buff[MAX_PATCH_LEN];
 
    DPRINTK("alt table %px, -> %px", start, end);
    /*   
     * The scan order should be from start to end. A later scanned
     * alternative code can overwrite previously scanned alternative code.
     * Some kernel functions (e.g. memcpy, memset, etc) use this order to
     * patch code.
     *
     * So be careful if you want to change the scan order to any other
     * order.
     */
    for (a = start; a < end; a++) {
        int insn_buff_sz = 0; 
 
        instr = (u8 *)&a->instr_offset + a->instr_offset;
        replacement = (u8 *)&a->repl_offset + a->repl_offset;
        BUG_ON(a->instrlen > sizeof(insn_buff));
        BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); 
        if (!boot_cpu_has(a->cpuid)) {
            if (a->padlen > 1) 
                optimize_nops(a, instr);
 
            continue;
        }    
 
        DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
            a->cpuid >> 5,
            a->cpuid & 0x1f,
            instr, instr, a->instrlen,
            replacement, a->replacementlen, a->padlen);
 
        DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
        DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
 
        memcpy(insn_buff, replacement, a->replacementlen);
        insn_buff_sz = a->replacementlen;
        
        /*   
         * 0xe8 is a relative jump; fix the offset.
         *   
         * Instruction length is checked before the opcode to avoid
         * accessing uninitialized bytes for zero-length replacements.
         */  
        if (a->replacementlen == 5 && *insn_buff == 0xe8) {
            *(s32 *)(insn_buff + 1) += replacement - instr;
            DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
                *(s32 *)(insn_buff + 1),
                (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
        }    
             
        if (a->replacementlen && is_jmp(replacement[0]))
            recompute_jump(a, instr, replacement, insn_buff);
             
        if (a->instrlen > a->replacementlen) {
            add_nops(insn_buff + a->replacementlen,
                 a->instrlen - a->replacementlen);
            insn_buff_sz += a->instrlen - a->replacementlen;
        }    
        DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
             
        text_poke_early(instr, insn_buff, insn_buff_sz);
    }        
}            

猜你喜欢

转载自blog.csdn.net/choumin/article/details/115108813
今日推荐