linux 内存管理---伙伴系统(六)

start_kernel()调用mm_init()进行伙伴系统的初始化。
static void __init mm_init(void)
{
    /*
     * page_cgroup requires countinous pages as memmap
     * and it's bigger than MAX_ORDER unless SPARSEMEM.
     */
    page_cgroup_init_flatmem();
    mem_init();                //伙伴系统的初始化
    kmem_cache_init();    //slab系统初始化
    percpu_init_late();
    pgtable_cache_init();
    vmalloc_init();
}

void __init mem_init(void)
{
    unsigned long codesize, reservedpages, datasize, initsize;
    unsigned long tmp, ram;

#ifdef CONFIG_HIGHMEM
#ifdef CONFIG_DISCONTIGMEM
#error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet"
#endif
     max_mapnr = highend_pfn ? highend_pfn : max_low_pfn;          //最大可映射物理页框
#else
    max_mapnr = max_low_pfn;
#endif
    high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);

     totalram_pages += free_all_bootmem();      //release free pages to the buddy allocator,bootmem中管理的只是低端内存, 返回释放到伙伴系统中的页框数
    totalram_pages -= setup_zero_pages();    /* Setup zeroed pages.  */    //分配页用于将虚拟地址0开始的一页映射到该页上,注意这里分配的并不是物理0页,因为物理0页并不在伙伴系统空闲页中,内核入口地址一般为0x80001000,开始一页和内核映像在bootmem中都标记为reserved,详细可以查阅《linux 内存管理---bootmem(三)》,事实上第一个物理页通常用于安装中断处理函数。

    reservedpages = ram = 0;
    for (tmp = 0; tmp < max_low_pfn; tmp++)
        if (page_is_ram(tmp) && pfn_valid(tmp)) {      //统计物理内存数
            ram++;
            if (PageReserved(pfn_to_page(tmp)))        //统计保留页数
                reservedpages++;
        }
    num_physpages = ram;

#ifdef CONFIG_HIGHMEM                  //将高端内存释放给伙伴系统管理, 如果CONFIG_HIGHMEM选项不打开,则高端内存将不会被纳入伙伴系统中进行管理,这样高端内存就没有使用了,浪费内存
    for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
        struct page *page = pfn_to_page(tmp);

        if (!pfn_valid(tmp))
            continue;

        if (!page_is_ram(tmp)) {
            SetPageReserved(page);
            continue;
        }
        ClearPageReserved(page);
        init_page_count(page);
        __free_page(page);            //高端页框释放给伙伴系统
        totalhigh_pages++;          //统计高端页框数
    }
    totalram_pages += totalhigh_pages;                //可用页框数
    num_physpages += totalhigh_pages;              //物理内存页框数
#endif

    codesize =  (unsigned long) &_etext - (unsigned long) &_text;                //内核镜像代码段大小
    datasize =  (unsigned long) &_edata - (unsigned long) &_etext;               //内核镜像数据段大小
    initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;   //内核镜像init段大小

    printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
           "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
           nr_free_pages() << (PAGE_SHIFT-10),             //这里似乎是个bug,这个为伙伴系统统计的可用内存数,包括释放的低端内存和高端内存
           ram << (PAGE_SHIFT-10),                                //但是这个仅仅包括总的低端内存
           codesize >> 10,
           reservedpages << (PAGE_SHIFT-10),             //保留页框主要包括:  内核镜像代码段,内核镜像数据段,内核镜像init段,内核镜像入口地址前面的内存,initrd内存等
           datasize >> 10,
           initsize >> 10,
           totalhigh_pages << (PAGE_SHIFT-10));
}

unsigned long __init free_all_bootmem(void)       
{
    unsigned long total_pages = 0;
    bootmem_data_t *bdata;

    list_for_each_entry(bdata, &bdata_list, list)
        total_pages += free_all_bootmem_core(bdata);

    return total_pages;
}

static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
    struct page *page;
    unsigned long start, end, pages, count = 0;

    if (!bdata->node_bootmem_map)
        return 0;

    start = bdata->node_min_pfn;    //bootmem起始页框
    end = bdata->node_low_pfn;      //bootmem终止页框

    while (start < end) {
        unsigned long *map, idx, vec;

        map = bdata->node_bootmem_map;
        idx = start - bdata->node_min_pfn;
        vec = ~map[idx / BITS_PER_LONG];            //取反,也就是页空闲,则对应的bit为1,占用对应的bit为0
        /*
         * If we have a properly aligned and fully unreserved
         * BITS_PER_LONG block of pages in front of us, free
         * it in one go.
         */
        if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {          //连续32个页框空闲
            int order = ilog2(BITS_PER_LONG);

            __free_pages_bootmem(pfn_to_page(start), order);
            count += BITS_PER_LONG;
            start += BITS_PER_LONG;
        } else {
            unsigned long off = 0;

            while (vec && off < BITS_PER_LONG) {
                if (vec & 1) {    //页框空闲
                    page = pfn_to_page(start + off);          //一个页框一个页框的释放
                    __free_pages_bootmem(page, 0);
                    count++;
                }
                vec >>= 1;
                off++;
            }
            start = ALIGN(start + 1, BITS_PER_LONG);
        }
    }

    page = virt_to_page(bdata->node_bootmem_map);
    pages = bdata->node_low_pfn - bdata->node_min_pfn;
    pages = bootmem_bootmap_pages(pages);      //计算bootmem分配标志占用的页框数
    count += pages;
    while (pages--)
        __free_pages_bootmem(page++, 0);           // 释放bootmem分配标志占用的页框数

    bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);

    return count;
}

void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
{
    unsigned int nr_pages = 1 << order;
    unsigned int loop;

    prefetchw(page);
    for (loop = 0; loop < nr_pages; loop++) {
        struct page *p = &page[loop];

        if (loop + 1 < nr_pages)
            prefetchw(p + 1);
        __ClearPageReserved(p);          //清除页描述flag中的PG_Reserved标志位
        set_page_count(p, 0);               // page->_count=0;
    }

    set_page_refcounted(page);      // page->_count=1;
    __free_pages(page, order);      //调用伙伴系统释放页框,我们知道在初始化node,zone,free_area等数据结构时,所有的页都设置为保留,即初始的时候伙伴系统中没有一个页可用
}

void __free_pages(struct page *page, unsigned int order)
{
    if (put_page_testzero(page)) {       // return    --page->_count;
        if (order == 0)
            free_hot_cold_page(page, 0);        //释放到per-cpu cache中
        else
            __free_pages_ok(page, order);      //释放到伙伴系统中
    }
}

static void __free_pages_ok(struct page *page, unsigned int order)
{
    unsigned long flags;
    int wasMlocked = __TestClearPageMlocked(page);

    if (!free_pages_prepare(page, order))
        return;

    local_irq_save(flags);
    if (unlikely(wasMlocked))
        free_page_mlock(page);
    __count_vm_events(PGFREE, 1 << order);
     free_one_page(page_zone(page), page, order,
                    get_pageblock_migratetype(page));
    local_irq_restore(flags);
}

static void free_one_page(struct zone *zone, struct page *page, int order,
                int migratetype)
{
    spin_lock(&zone->lock);
    zone->all_unreclaimable = 0;
    zone->pages_scanned = 0;

    __free_one_page(page, zone, order, migratetype);
     __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);        //统计伙伴系统中的空闲内存数
    spin_unlock(&zone->lock);
}

static inline void __free_one_page(struct page *page,
        struct zone *zone, unsigned int order,
        int migratetype)
{
    unsigned long page_idx;
    unsigned long combined_idx;
    unsigned long uninitialized_var(buddy_idx);
    struct page *buddy;

    if (unlikely(PageCompound(page)))
        if (unlikely(destroy_compound_page(page, order)))
            return;

    VM_BUG_ON(migratetype == -1);

    page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);

    VM_BUG_ON(page_idx & ((1 << order) - 1));
    VM_BUG_ON(bad_range(zone, page));

    while (order < MAX_ORDER-1) {
        buddy_idx = __find_buddy_index(page_idx, order);
        buddy = page + (buddy_idx - page_idx);
        if (!page_is_buddy(page, buddy, order))
            break;
        /*
         * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
         * merge with it and move up one order.
         */
        if (page_is_guard(buddy)) {
            clear_page_guard_flag(buddy);
            set_page_private(page, 0);
            __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
        } else {
            list_del(&buddy->lru);
            zone->free_area[order].nr_free--;
            rmv_page_order(buddy);
        }
        combined_idx = buddy_idx & page_idx;
        page = page + (combined_idx - page_idx);
        page_idx = combined_idx;
        order++;
    }
    set_page_order(page, order);

    /*
     * If this is not the largest possible page, check if the buddy
     * of the next-highest order is free. If it is, it's possible
     * that pages are being freed that will coalesce soon. In case,
     * that is happening, add the free page to the tail of the list
     * so it's less likely to be used soon and more likely to be merged
     * as a higher order page
     */
    if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
        struct page *higher_page, *higher_buddy;
        combined_idx = buddy_idx & page_idx;
        higher_page = page + (combined_idx - page_idx);
        buddy_idx = __find_buddy_index(combined_idx, order + 1);
        higher_buddy = page + (buddy_idx - combined_idx);
        if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
            list_add_tail(&page->lru,
                &zone->free_area[order].free_list[migratetype]);
            goto out;
        }
    }

     list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);      //空闲也通过page->lru链表链接起来,migratetype表示迁移类型
out:
    zone->free_area[order].nr_free++;
}

伙伴系统中的空闲可用内存包括:
1.bootmem中的低端内存:标记为空闲的页框和标记本身占用的页框;
2.高端内存页框;

伙伴系统初始化完成之后就可以通过下列函数进行内存分配了:
#define __get_free_page(gfp_mask) \                                              //分配单个页框
        __get_free_pages((gfp_mask), 0)   
__get_free_pages()          //分配多个页框
                                          
最后,start_kernel()会调用init_post(), 在init_post()中调用free_initmem()释放掉init段占用的内存
void  free_initmem(void)
{
    prom_free_prom_memory();
    free_init_pages("unused kernel memory",
            __pa_symbol(&__init_begin),
            __pa_symbol(&__init_end));
}


上面mem_init()中会打印以下启动log:
Memory: 59176k/65536k available (2905k kernel code, 6360k reserved, 980k data, 1684k init, 0k highmem)

总的物理内存为:65536k
可用内存为:59176k
保留内存为:6360k = 2905k kernel code + 980k data + 1684k init + 内核入口地址前面的内存+initrd占用的内存
65536k=59176k+6360k

但是当我们用cat /proc/meminfo查看,发现却是这样的:
1 # cat /proc/meminfo
2 MemTotal: 60860 kB
3 MemFree: 47448 kB
4 Buffers: 0 kB
5 Cached: 7180 kB
6 SwapCached: 0 kB
7 ...

这里的MemTotal=60860KB明显的比上面内核启动时打印的可用内存多,这是因为free_initmem()是在内核基本启动完成才调用的,释放掉了init段数据,所以:
MemTotal=60860KB = 可用内存 + init段=59176k+1684k


参考文档:


猜你喜欢

转载自blog.csdn.net/whuzm08/article/details/80194569