结构体bootmem_data
使用内存位图来表示内存页的使用情况:
用1bit表示1个页的使用情况,数值意义:
1-----------表示相应的物理页框已经使用,不能再分配
0-----------表示相应的物理页框没有被使用,可以分配
例子:
用于存底端内存的使用情况,用1bit表示1个页的使用情况,比如第0,3,9个页可用(但是空闲不一定)
1101 1111 0110=0xdf6
25 /*
26 * node_bootmem_map is a map pointer - the bits represent all physical
27 * memory pages (including holes) on the node.
28 */
29 typedef struct bootmem_data {
30 unsigned long node_boot_start; //底端内存的起始地址0x0
31 unsigned long node_low_pfn; //底端内存的页框地址0x38000
32 void *node_bootmem_map;
//用于存底端内存的使用情况,用1bit表示1个页的使用情况,比如第0,3,9个页可用(但是空闲不一定)
// 1101 1111 0110=0xdf6
33 unsigned long last_offset;
34 unsigned long last_pos;
35 unsigned long last_success; /* Previous allocation point. To speed
36 * up searching */
37 } bootmem_data_t;
初始化bootmem分配器
setup_memory—>init_bootmem—>init_bootmem_core
setup_memory
start_pfn = PFN_UP(init_pg_tables_end);
//代表这内存中内核映像以上第一个可以动态分配的页面。
//也就是跳过内核的text,rodata和临时页目录和页,在前面的kernel/head.s中有详细的分析
find_max_pfn();
max_low_pfn = find_max_low_pfn(); //max_low_pfn=0x38000,也就是896M内存
bootmap_size = init_bootmem(start_pfn, max_low_pfn);
init_bootmem
352 unsigned long __init init_bootmem (unsigned long start, unsigned long pages)
353 {
354 max_low_pfn = pages;
355 min_low_pfn = start;
//start=0x401 pages=0x38000
357 return(init_bootmem_core(NODE_DATA(0), start, 0, pages));
358 }
init_bootmem_core
51 static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
52 unsigned long mapstart, unsigned long start, unsigned long end)
53 {
//mapstart=0x401 start=0 end=0x38000
55 bootmem_data_t *bdata = pgdat->bdata;
56 unsigned long mapsize = ((end - start)+7)/8;
57 // mapsize=(0x38000-0+7)/8 =0x7000
58 pgdat->pgdat_next = pgdat_list;
59 pgdat_list = pgdat;
60
61 mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL);
62 bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
// bdata->node_bootmem_map=0xC040 1000,内存管理位图放在第一个没有使用的内存处。
63 bdata->node_boot_start = (start << PAGE_SHIFT);
// bdata->node_boot_start =0
64 bdata->node_low_pfn = end;
// bdata->node_low_pfn =0x38000
67 /*
68 * Initially all pages are reserved - setup_arch() has to
69 * register free RAM areas explicitly.
70 */
71 memset(bdata->node_bootmem_map, 0xff, mapsize);
//把内存位图,就是用1 bit表示一个物页是否使用,刚开始都标记为使用。
72
73
74 return mapsize;
75 }
初始化bootmem,并且把bootmem_data_t存放到内存节点NODE_DATA(0)中。
注册
register_bootmem_low_pages(max_low_pfn);
978 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
979 {
980 int i;
981
982 if (efi_enabled) {
983 efi_memmap_walk(free_available_memory, NULL);
984 return;
985 }
986 for (i = 0; i < e820.nr_map; i++) {
987 unsigned long curr_pfn, last_pfn, size;
988 /*
989 * Reserve usable low memory
990 */
// 如果type不等于E820_RAM(1),就下一个地址轮询
991 if (e820.map[i].type != E820_RAM)
992 continue;
993 /*
994 * We are rounding up the start address of usable memory:
995 */
996 curr_pfn = PFN_UP(e820.map[i].addr);
997 if (curr_pfn >= max_low_pfn)
998 continue;
999 /*
1000 * ... and at the end of the usable range downwards:
1001 */
1002 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1003
1004 if (last_pfn > max_low_pfn)
1005 last_pfn = max_low_pfn;
1006
1007 /*
1008 * .. finally, did all the rounding and playing
1009 * around just make the area go away?
1010 */
1011 if (last_pfn <= curr_pfn)
1012 continue;
1013
1014 size = last_pfn - curr_pfn;
1016 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1017 }
1018 }
e820地址分布:
BIOS-provided physical RAM map:
BIOS-e820: 0000000000000000 - 000000000009f400 (usable)
BIOS-e820: 000000000009f400 - 00000000000a0000 (reserved)
BIOS-e820: 00000000000f0000 - 0000000000100000 (reserved)
BIOS-e820: 0000000000100000 - 000000007cffd000 (usable)
BIOS-e820: 000000007cffd000 - 000000007d000000 (reserved)
BIOS-e820: 00000000fffc0000 - 0000000100000000 (reserved)
也就是第1条地址和第4条地址: BIOS-e820: 0000000000000000 - 000000000009f400 (usable)和 BIOS-e820: 0000000000100000 - 000000007cffd000 (usable)。因为0x7cffd000>0x38000,所以分别执行:
free_bootmem(0, 0x9f000);
free_bootmem(0x100000, 0x37f00000);
free_bootmem_core
free_bootmem_core最终调用的函数是test_and_clear_bit(i, bdata->node_bootmem_map)。
109 static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
110 {
111 unsigned long i;
112 unsigned long start;
113 /*
114 * round down end of usable mem, partially free pages are
115 * considered reserved.
116 */
117 unsigned long sidx;
118 unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
119 unsigned long end = (addr + size)/PAGE_SIZE;
120
121 BUG_ON(!size);
122 BUG_ON(end > bdata->node_low_pfn);
123
124 if (addr < bdata->last_success)
125 bdata->last_success = addr;
126
127 /*
128 * Round up the beginning of the address.
129 */
130 start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
131 sidx = start - (bdata->node_boot_start/PAGE_SIZE);
132
134 for (i = sidx; i < eidx; i++) {
135 if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
136 BUG();
137 }
138 }
这个函数主要清除内存位图node_bootmem_map。清除对应的页帧号是:0-0x9f,0x100-0x38000。
reserve_bootmem
在node_bootmem_map标志一些物理页不可用,就是把node_bootmem_map的某些位为1,最终调用函数
test_and_set_bit(i, bdata->node_bootmem_map)。
361 void __init reserve_bootmem (unsigned long addr, unsigned long size)
362 {
364 reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size);
365 }
/*
* Marks a particular physical memory range as unallocatable. Usable RAM
* might be used for boot-time allocations - or it might get added
* to the free page pool later on.
*/
82 static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
83 {
84 unsigned long i;
85 /*
86 * round up, partially reserved pages are considered
87 * fully reserved.
88 */
89 unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE;
90 unsigned long eidx = (addr + size - bdata->node_boot_start +
91 PAGE_SIZE-1)/PAGE_SIZE;
92 unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE;
93
94 BUG_ON(!size);
95 BUG_ON(sidx >= eidx);
96 BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn);
97 BUG_ON(end > bdata->node_low_pfn);
98
99 for (i = sidx; i < eidx; i++)
100 if (test_and_set_bit(i, bdata->node_bootmem_map)) {
101 #ifdef CONFIG_DEBUG_BOOTMEM
102 printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
103 #endif
104 }
105 }
保留了哪些内存,在setup_memory中,代码如下:
1062 /*
1063 * Reserve the bootmem bitmap itself as well. We do this in two
1064 * steps (first step was init_bootmem()) because this catches
1065 * the (very unlikely) case of us accidentally initializing the
1066 * bootmem allocator with an invalid RAM area.
1067 */
1068 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
1069 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1070
1071 /*
1072 * reserve physical page 0 - it's a special BIOS page on many boxes,
1073 * enabling clean reboots, SMP operation, laptop functions.
1074 */
1075 reserve_bootmem(0, PAGE_SIZE);
1076
1077 /* reserve EBDA region, it's a 4K region */
1078 reserve_ebda_region();
保留了下面内存:
- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
这是保留内核加载处0x100000-到bootmap表结束,包括数据:kernel镜像中text段,rwdata段,临时页目录,页表,然后包括bootmap,这也是我之前考虑的,内核代码段,临时页表,启动内存图表这个时候应该是不被分配的。 - reserve_bootmem(0, PAGE_SIZE);
保留物理页的第一个页,这是给BIOS用的。 - reserve_ebda_region();
通过函数addr = get_bios_ebda()得到地址,然后保留这个地址的4K.
EBDA
中BIOS使用的两个内存区域(BDA和EBDA)也必须小心地保留。
BIOS数据区(BIOS Data Area,简称BDA)
BIOS扩充数据区(Extended BIOS Data Area,简称EBDA)
相关API
2 #define PAGE_SHIFT 12
3 #define PAGE_SIZE (1UL << PAGE_SHIFT)
4 #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
5 #define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
``
PFN_UP:本页框或上一个页框
PFN_DOWN:是本页框