From 53348f27168534561c0c814843bbf181314374f4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 09:58:06 +0200 Subject: bootmem: Fix __free_pages_bootmem() to use @order properly a226f6c899 (FRV: Clean up bootmem allocator's page freeing algorithm) separated out __free_pages_bootmem() from free_all_bootmem_core(). __free_pages_bootmem() takes @order argument but it assumes @order is either 0 or ilog2(BITS_PER_LONG). Note that all the current users match that assumption and this doesn't cause actual problems. Fix it by using 1 << order instead of BITS_PER_LONG. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310457490-3356-3-git-send-email-tj@kernel.org Cc: David Howells Signed-off-by: H. Peter Anvin --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9119faae6e6..b6da6ed818a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -705,10 +705,10 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) int loop; prefetchw(page); - for (loop = 0; loop < BITS_PER_LONG; loop++) { + for (loop = 0; loop < (1 << order); loop++) { struct page *p = &page[loop]; - if (loop + 1 < BITS_PER_LONG) + if (loop + 1 < (1 << order)) prefetchw(p + 1); __ClearPageReserved(p); set_page_count(p, 0); -- cgit v1.2.3 From 1f5026a7e21e409c2b9dd54f6dfb9446511fb7c5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 09:58:09 +0200 Subject: memblock: Kill MEMBLOCK_ERROR 25818f0f28 (memblock: Make MEMBLOCK_ERROR be 0) thankfully made MEMBLOCK_ERROR 0 and there already are codes which expect error return to be 0. There's no point in keeping MEMBLOCK_ERROR around. End its misery. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310457490-3356-6-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- arch/x86/kernel/aperture_64.c | 2 +- arch/x86/kernel/check.c | 2 +- arch/x86/kernel/e820.c | 2 +- arch/x86/kernel/setup.c | 4 ++-- arch/x86/kernel/trampoline.c | 2 +- arch/x86/mm/init.c | 2 +- arch/x86/mm/memblock.c | 6 +++--- arch/x86/mm/numa.c | 6 +++--- arch/x86/mm/numa_32.c | 4 ++-- arch/x86/mm/numa_emulation.c | 2 +- include/linux/memblock.h | 4 +--- kernel/printk.c | 2 +- mm/memblock.c | 21 ++++++++++----------- mm/nobootmem.c | 2 +- mm/page_alloc.c | 4 ++-- 15 files changed, 31 insertions(+), 34 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3d2661ca654..56363082bbd 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -88,7 +88,7 @@ static u32 __init allocate_aperture(void) */ addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, aper_size, aper_size); - if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { + if (!addr || addr + aper_size > GART_MAX_ADDR) { printk(KERN_ERR "Cannot allocate aperture memory hole (%lx,%uK)\n", addr, aper_size>>10); diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 452932d3473..95680fc4df5 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -86,7 +86,7 @@ void __init setup_bios_corruption_check(void) u64 size; addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); - if (addr == MEMBLOCK_ERROR) + if (!addr) break; if (addr >= corruption_check_size) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 3e2ef842531..0f9ff58d06d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -745,7 +745,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) for (start = startt; ; start += size) { start = memblock_x86_find_in_range_size(start, &size, align); - if (start == MEMBLOCK_ERROR) + if (!start) return 0; if (size >= sizet) break; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index afaf38447ef..31ffe20d5d2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -331,7 +331,7 @@ static void __init relocate_initrd(void) ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, PAGE_SIZE); - if (ramdisk_here == MEMBLOCK_ERROR) + if (!ramdisk_here) panic("Cannot find place for new RAMDISK of size %lld\n", ramdisk_size); @@ -554,7 +554,7 @@ static void __init reserve_crashkernel(void) crash_base = memblock_find_in_range(alignment, CRASH_KERNEL_ADDR_MAX, crash_size, alignment); - if (crash_base == MEMBLOCK_ERROR) { + if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; } diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a91ae7709b4..a1f13ddb06e 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -14,7 +14,7 @@ void __init setup_trampolines(void) /* Has to be in very low memory so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); - if (mem == MEMBLOCK_ERROR) + if (!mem) panic("Cannot allocate trampoline\n"); x86_trampoline_base = __va(mem); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 30326443ab8..13cf05a6160 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -68,7 +68,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, #endif base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); - if (base == MEMBLOCK_ERROR) + if (!base) panic("Cannot find space for the kernel page tables"); pgt_buf_start = base >> PAGE_SHIFT; diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 992da5ec5a6..e126117d1b0 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -66,7 +66,7 @@ u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) return addr; } - return MEMBLOCK_ERROR; + return 0; } static __init struct range *find_range_array(int count) @@ -78,7 +78,7 @@ static __init struct range *find_range_array(int count) end = memblock.current_limit; mem = memblock_find_in_range(0, end, size, sizeof(struct range)); - if (mem == MEMBLOCK_ERROR) + if (!mem) panic("can not find more space for range array"); /* @@ -274,7 +274,7 @@ u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size { u64 addr; addr = find_memory_core_early(nid, size, align, start, end); - if (addr != MEMBLOCK_ERROR) + if (addr) return addr; /* Fallback, should already have start end within node range */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index fbeaaf41661..fa1015de5cc 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -226,10 +226,10 @@ static void __init setup_node_data(int nid, u64 start, u64 end) } else { nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, nd_size, SMP_CACHE_BYTES); - if (nd_pa == MEMBLOCK_ERROR) + if (!nd_pa) nd_pa = memblock_find_in_range(nd_low, nd_high, nd_size, SMP_CACHE_BYTES); - if (nd_pa == MEMBLOCK_ERROR) { + if (!nd_pa) { pr_err("Cannot find %zu bytes in node %d\n", nd_size, nid); return; @@ -395,7 +395,7 @@ static int __init numa_alloc_distance(void) phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), size, PAGE_SIZE); - if (phys == MEMBLOCK_ERROR) { + if (!phys) { pr_warning("NUMA: Warning: can't allocate distance table!\n"); /* don't retry until explicitly reset */ numa_distance = (void *)1LU; diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 3adebe7e536..58878b536ef 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -199,7 +199,7 @@ void __init init_alloc_remap(int nid, u64 start, u64 end) /* allocate node memory and the lowmem remap area */ node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); - if (node_pa == MEMBLOCK_ERROR) { + if (!node_pa) { pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", size, nid); return; @@ -209,7 +209,7 @@ void __init init_alloc_remap(int nid, u64 start, u64 end) remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT, size, LARGE_PAGE_BYTES); - if (remap_pa == MEMBLOCK_ERROR) { + if (!remap_pa) { pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", size, nid); memblock_x86_free_range(node_pa, node_pa + size); diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index d0ed086b624..e3d471c20cd 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -351,7 +351,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), phys_size, PAGE_SIZE); - if (phys == MEMBLOCK_ERROR) { + if (!phys) { pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); goto no_emu; } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7525e38c434..d235ec5fe67 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -2,8 +2,6 @@ #define _LINUX_MEMBLOCK_H #ifdef __KERNEL__ -#define MEMBLOCK_ERROR 0 - #ifdef CONFIG_HAVE_MEMBLOCK /* * Logical memory blocks. @@ -164,7 +162,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { - return MEMBLOCK_ERROR; + return 0; } #endif /* CONFIG_HAVE_MEMBLOCK */ diff --git a/kernel/printk.c b/kernel/printk.c index 35185392173..b1d5a6174d6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -199,7 +199,7 @@ void __init setup_log_buf(int early) unsigned long mem; mem = memblock_alloc(new_log_buf_len, PAGE_SIZE); - if (mem == MEMBLOCK_ERROR) + if (!mem) return; new_log_buf = __va(mem); } else { diff --git a/mm/memblock.c b/mm/memblock.c index 9882a88d4a1..19699366134 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -74,7 +74,7 @@ static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_ /* In case, huge size is requested */ if (end < size) - return MEMBLOCK_ERROR; + return 0; base = round_down(end - size, align); @@ -94,7 +94,7 @@ static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_ base = round_down(res_base - size, align); } - return MEMBLOCK_ERROR; + return 0; } static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, @@ -126,10 +126,10 @@ static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, if (bottom >= top) continue; found = memblock_find_region(bottom, top, size, align); - if (found != MEMBLOCK_ERROR) + if (found) return found; } - return MEMBLOCK_ERROR; + return 0; } /* @@ -214,10 +214,10 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) */ if (use_slab) { new_array = kmalloc(new_size, GFP_KERNEL); - addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array); + addr = new_array ? __pa(new_array) : 0; } else addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); - if (addr == MEMBLOCK_ERROR) { + if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", memblock_type_name(type), type->max, type->max * 2); return -1; @@ -478,8 +478,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph size = round_up(size, align); found = memblock_find_base(size, align, 0, max_addr); - if (found != MEMBLOCK_ERROR && - !memblock_add_region(&memblock.reserved, found, size)) + if (found && !memblock_add_region(&memblock.reserved, found, size)) return found; return 0; @@ -559,14 +558,14 @@ static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, this_end = memblock_nid_range(start, end, &this_nid); if (this_nid == nid) { phys_addr_t ret = memblock_find_region(start, this_end, size, align); - if (ret != MEMBLOCK_ERROR && + if (ret && !memblock_add_region(&memblock.reserved, ret, size)) return ret; } start = this_end; } - return MEMBLOCK_ERROR; + return 0; } phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) @@ -588,7 +587,7 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n for (i = 0; i < mem->cnt; i++) { phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i], size, align, nid); - if (ret != MEMBLOCK_ERROR) + if (ret) return ret; } diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 6e93dc7f258..5b0eb06ecb4 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -43,7 +43,7 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, addr = find_memory_core_early(nid, size, align, goal, limit); - if (addr == MEMBLOCK_ERROR) + if (!addr) return NULL; ptr = phys_to_virt(addr); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b6da6ed818a..c7f0e5be4a3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3878,13 +3878,13 @@ u64 __init find_memory_core_early(int nid, u64 size, u64 align, addr = memblock_find_in_range(final_start, final_end, size, align); - if (addr == MEMBLOCK_ERROR) + if (!addr) continue; return addr; } - return MEMBLOCK_ERROR; + return 0; } #endif -- cgit v1.2.3 From 5dfe8660a3d7f1ee1265c3536433ee53da3f98a3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 09:46:10 +0200 Subject: bootmem: Replace work_with_active_regions() with for_each_mem_pfn_range() Callback based iteration is cumbersome and much less useful than for_each_*() iterator. This patch implements for_each_mem_pfn_range() which replaces work_with_active_regions(). All the current users of work_with_active_regions() are converted. This simplifies walking over early_node_map and will allow converting internal logics in page_alloc to use iterator instead of walking early_node_map directly, which in turn will enable moving node information to memblock. powerpc change is only compile tested. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110714074610.GD3455@htj.dyndns.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- arch/powerpc/mm/numa.c | 50 ++++++++++++++--------------------------------- arch/x86/mm/memblock.c | 23 ++++------------------ drivers/pci/intel-iommu.c | 24 +++++++++-------------- include/linux/mm.h | 22 +++++++++++++++++++-- mm/page_alloc.c | 40 +++++++++++++++++++++++++------------ 5 files changed, 76 insertions(+), 83 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2164006fe17..6f06ea53bca 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -127,45 +127,25 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, } /* - * get_active_region_work_fn - A helper function for get_node_active_region - * Returns datax set to the start_pfn and end_pfn if they contain - * the initial value of datax->start_pfn between them - * @start_pfn: start page(inclusive) of region to check - * @end_pfn: end page(exclusive) of region to check - * @datax: comes in with ->start_pfn set to value to search for and - * goes out with active range if it contains it - * Returns 1 if search value is in range else 0 - */ -static int __init get_active_region_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) -{ - struct node_active_region *data; - data = (struct node_active_region *)datax; - - if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) { - data->start_pfn = start_pfn; - data->end_pfn = end_pfn; - return 1; - } - return 0; - -} - -/* - * get_node_active_region - Return active region containing start_pfn + * get_node_active_region - Return active region containing pfn * Active range returned is empty if none found. - * @start_pfn: The page to return the region for. - * @node_ar: Returned set to the active region containing start_pfn + * @pfn: The page to return the region for + * @node_ar: Returned set to the active region containing @pfn */ -static void __init get_node_active_region(unsigned long start_pfn, - struct node_active_region *node_ar) +static void __init get_node_active_region(unsigned long pfn, + struct node_active_region *node_ar) { - int nid = early_pfn_to_nid(start_pfn); + unsigned long start_pfn, end_pfn; + int i, nid; - node_ar->nid = nid; - node_ar->start_pfn = start_pfn; - node_ar->end_pfn = start_pfn; - work_with_active_regions(nid, get_active_region_work_fn, node_ar); + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + if (pfn >= start_pfn && pfn < end_pfn) { + node_ar->nid = nid; + node_ar->start_pfn = start_pfn; + node_ar->end_pfn = end_pfn; + break; + } + } } static void map_cpu_to_node(int cpu, int node) diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index e126117d1b0..da0d5c84586 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -115,28 +115,13 @@ static void __init memblock_x86_subtract_reserved(struct range *range, int az) memblock_reserve_reserved_regions(); } -struct count_data { - int nr; -}; - -static int __init count_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) -{ - struct count_data *data = datax; - - data->nr++; - - return 0; -} - static int __init count_early_node_map(int nodeid) { - struct count_data data; - - data.nr = 0; - work_with_active_regions(nodeid, count_work_fn, &data); + int i, cnt = 0; - return data.nr; + for_each_mem_pfn_range(i, nodeid, NULL, NULL, NULL) + cnt++; + return cnt; } int __init __get_free_all_memory_range(struct range **rangep, int nodeid, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f02c34d26d1..8ec352077e1 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2178,18 +2178,6 @@ static inline void iommu_prepare_isa(void) static int md_domain_init(struct dmar_domain *domain, int guest_width); -static int __init si_domain_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) -{ - int *ret = datax; - - *ret = iommu_domain_identity_map(si_domain, - (uint64_t)start_pfn << PAGE_SHIFT, - (uint64_t)end_pfn << PAGE_SHIFT); - return *ret; - -} - static int __init si_domain_init(int hw) { struct dmar_drhd_unit *drhd; @@ -2221,9 +2209,15 @@ static int __init si_domain_init(int hw) return 0; for_each_online_node(nid) { - work_with_active_regions(nid, si_domain_work_fn, &ret); - if (ret) - return ret; + unsigned long start_pfn, end_pfn; + int i; + + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { + ret = iommu_domain_identity_map(si_domain, + PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); + if (ret) + return ret; + } } return 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index c70a326b8f2..57e4c9ffdff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1327,9 +1327,27 @@ int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); u64 __init find_memory_core_early(int nid, u64 size, u64 align, u64 goal, u64 limit); -typedef int (*work_fn_t)(unsigned long, unsigned long, void *); -extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); + +extern void __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid); + +/** + * for_each_mem_pfn_range - early memory pfn range iterator + * @i: an integer used as loop variable + * @nid: node selector, %MAX_NUMNODES for all nodes + * @p_start: ptr to ulong for start pfn of the range, can be %NULL + * @p_end: ptr to ulong for end pfn of the range, can be %NULL + * @p_nid: ptr to int for nid of the range, can be %NULL + * + * Walks over configured memory ranges. Available after early_node_map is + * populated. + */ +#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ + for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ + i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) + #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ #if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c7f0e5be4a3..69fffabf61b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3903,18 +3903,6 @@ int __init add_from_early_node_map(struct range *range, int az, return nr_range; } -void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) -{ - int i; - int ret; - - for_each_active_range_index_in_nid(i, nid) { - ret = work_fn(early_node_map[i].start_pfn, - early_node_map[i].end_pfn, data); - if (ret) - break; - } -} /** * sparse_memory_present_with_active_regions - Call memory_present for each active range * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. @@ -4421,6 +4409,34 @@ static inline void setup_nr_node_ids(void) } #endif +/* + * Common iterator interface used to define for_each_mem_pfn_range(). + */ +void __meminit __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) +{ + struct node_active_region *r = NULL; + + while (++*idx < nr_nodemap_entries) { + if (nid == MAX_NUMNODES || nid == early_node_map[*idx].nid) { + r = &early_node_map[*idx]; + break; + } + } + if (!r) { + *idx = -1; + return; + } + + if (out_start_pfn) + *out_start_pfn = r->start_pfn; + if (out_end_pfn) + *out_end_pfn = r->end_pfn; + if (out_nid) + *out_nid = r->nid; +} + /** * add_active_range - Register a range of PFNs backed by physical memory * @nid: The node ID the range resides on -- cgit v1.2.3 From 96e907d1360240d1958fe8ce3a3ac640733330d4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:29 +0200 Subject: bootmem: Reimplement __absent_pages_in_range() using for_each_mem_pfn_range() __absent_pages_in_range() was needlessly complex. Reimplement it using for_each_mem_pfn_range(). Also, update zone_absent_pages_in_node() such that it doesn't call __absent_pages_in_range() with @zone_start_pfn which is larger than @zone_end_pfn. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-3-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/page_alloc.c | 54 ++++++++++++------------------------------------------ 1 file changed, 12 insertions(+), 42 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 69fffabf61b..3092a97268d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4044,46 +4044,16 @@ unsigned long __meminit __absent_pages_in_range(int nid, unsigned long range_start_pfn, unsigned long range_end_pfn) { - int i = 0; - unsigned long prev_end_pfn = 0, hole_pages = 0; - unsigned long start_pfn; - - /* Find the end_pfn of the first active range of pfns in the node */ - i = first_active_region_index_in_nid(nid); - if (i == -1) - return 0; - - prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn); - - /* Account for ranges before physical memory on this node */ - if (early_node_map[i].start_pfn > range_start_pfn) - hole_pages = prev_end_pfn - range_start_pfn; - - /* Find all holes for the zone within the node */ - for (; i != -1; i = next_active_region_index_in_nid(i, nid)) { - - /* No need to continue if prev_end_pfn is outside the zone */ - if (prev_end_pfn >= range_end_pfn) - break; - - /* Make sure the end of the zone is not within the hole */ - start_pfn = min(early_node_map[i].start_pfn, range_end_pfn); - prev_end_pfn = max(prev_end_pfn, range_start_pfn); + unsigned long nr_absent = range_end_pfn - range_start_pfn; + unsigned long start_pfn, end_pfn; + int i; - /* Update the hole size cound and move on */ - if (start_pfn > range_start_pfn) { - BUG_ON(prev_end_pfn > start_pfn); - hole_pages += start_pfn - prev_end_pfn; - } - prev_end_pfn = early_node_map[i].end_pfn; + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { + start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn); + end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn); + nr_absent -= end_pfn - start_pfn; } - - /* Account for ranges past physical memory on this node */ - if (range_end_pfn > prev_end_pfn) - hole_pages += range_end_pfn - - max(range_start_pfn, prev_end_pfn); - - return hole_pages; + return nr_absent; } /** @@ -4104,14 +4074,14 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, unsigned long zone_type, unsigned long *ignored) { + unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; + unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; unsigned long node_start_pfn, node_end_pfn; unsigned long zone_start_pfn, zone_end_pfn; get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); - zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], - node_start_pfn); - zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type], - node_end_pfn); + zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); + zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); adjust_zone_range_for_zone_movable(nid, zone_type, node_start_pfn, node_end_pfn, -- cgit v1.2.3 From c13291a536b835b2ab278ab201f2cb1ce22f2785 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:30 +0200 Subject: bootmem: Use for_each_mem_pfn_range() in page_alloc.c The previous patch added for_each_mem_pfn_range() which is more versatile than for_each_active_range_index_in_nid(). This patch replaces for_each_active_range_index_in_nid() and open coded early_node_map[] walks with for_each_mem_pfn_range(). All conversions in this patch are straight-forward and shouldn't cause any functional difference. After the conversions, for_each_active_range_index_in_nid() doesn't have any user left and is removed. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-4-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/page_alloc.c | 165 +++++++++++++++++++------------------------------------- 1 file changed, 56 insertions(+), 109 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3092a97268d..902f03a4fd6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3711,34 +3711,6 @@ __meminit int init_currently_empty_zone(struct zone *zone, } #ifdef CONFIG_ARCH_POPULATES_NODE_MAP -/* - * Basic iterator support. Return the first range of PFNs for a node - * Note: nid == MAX_NUMNODES returns first region regardless of node - */ -static int __meminit first_active_region_index_in_nid(int nid) -{ - int i; - - for (i = 0; i < nr_nodemap_entries; i++) - if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) - return i; - - return -1; -} - -/* - * Basic iterator support. Return the next active range of PFNs for a node - * Note: nid == MAX_NUMNODES returns next region regardless of node - */ -static int __meminit next_active_region_index_in_nid(int index, int nid) -{ - for (index = index + 1; index < nr_nodemap_entries; index++) - if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) - return index; - - return -1; -} - #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID /* * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. @@ -3748,15 +3720,12 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) */ int __meminit __early_pfn_to_nid(unsigned long pfn) { - int i; - - for (i = 0; i < nr_nodemap_entries; i++) { - unsigned long start_pfn = early_node_map[i].start_pfn; - unsigned long end_pfn = early_node_map[i].end_pfn; + unsigned long start_pfn, end_pfn; + int i, nid; + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) if (start_pfn <= pfn && pfn < end_pfn) - return early_node_map[i].nid; - } + return nid; /* This is a memory hole */ return -1; } @@ -3785,11 +3754,6 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node) } #endif -/* Basic iterator support to walk early_node_map[] */ -#define for_each_active_range_index_in_nid(i, nid) \ - for (i = first_active_region_index_in_nid(nid); i != -1; \ - i = next_active_region_index_in_nid(i, nid)) - /** * free_bootmem_with_active_regions - Call free_bootmem_node for each active range * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. @@ -3799,25 +3763,19 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node) * add_active_ranges() contain no holes and may be freed, this * this function may be used instead of calling free_bootmem() manually. */ -void __init free_bootmem_with_active_regions(int nid, - unsigned long max_low_pfn) +void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) { - int i; - - for_each_active_range_index_in_nid(i, nid) { - unsigned long size_pages = 0; - unsigned long end_pfn = early_node_map[i].end_pfn; - - if (early_node_map[i].start_pfn >= max_low_pfn) - continue; + unsigned long start_pfn, end_pfn; + int i, this_nid; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) { + start_pfn = min(start_pfn, max_low_pfn); + end_pfn = min(end_pfn, max_low_pfn); - size_pages = end_pfn - early_node_map[i].start_pfn; - free_bootmem_node(NODE_DATA(early_node_map[i].nid), - PFN_PHYS(early_node_map[i].start_pfn), - size_pages << PAGE_SHIFT); + if (start_pfn < end_pfn) + free_bootmem_node(NODE_DATA(this_nid), + PFN_PHYS(start_pfn), + (end_pfn - start_pfn) << PAGE_SHIFT); } } @@ -3891,15 +3849,12 @@ u64 __init find_memory_core_early(int nid, u64 size, u64 align, int __init add_from_early_node_map(struct range *range, int az, int nr_range, int nid) { + unsigned long start_pfn, end_pfn; int i; - u64 start, end; /* need to go over early_node_map to find out good range for node */ - for_each_active_range_index_in_nid(i, nid) { - start = early_node_map[i].start_pfn; - end = early_node_map[i].end_pfn; - nr_range = add_range(range, az, nr_range, start, end); - } + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) + nr_range = add_range(range, az, nr_range, start_pfn, end_pfn); return nr_range; } @@ -3913,12 +3868,11 @@ int __init add_from_early_node_map(struct range *range, int az, */ void __init sparse_memory_present_with_active_regions(int nid) { - int i; + unsigned long start_pfn, end_pfn; + int i, this_nid; - for_each_active_range_index_in_nid(i, nid) - memory_present(early_node_map[i].nid, - early_node_map[i].start_pfn, - early_node_map[i].end_pfn); + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) + memory_present(this_nid, start_pfn, end_pfn); } /** @@ -3935,13 +3889,15 @@ void __init sparse_memory_present_with_active_regions(int nid) void __meminit get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) { + unsigned long this_start_pfn, this_end_pfn; int i; + *start_pfn = -1UL; *end_pfn = 0; - for_each_active_range_index_in_nid(i, nid) { - *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); - *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); + for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) { + *start_pfn = min(*start_pfn, this_start_pfn); + *end_pfn = max(*end_pfn, this_end_pfn); } if (*start_pfn == -1UL) @@ -4484,6 +4440,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, void __init remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { + unsigned long this_start_pfn, this_end_pfn; int i, j; int removed = 0; @@ -4491,26 +4448,22 @@ void __init remove_active_range(unsigned int nid, unsigned long start_pfn, nid, start_pfn, end_pfn); /* Find the old active region end and shrink */ - for_each_active_range_index_in_nid(i, nid) { - if (early_node_map[i].start_pfn >= start_pfn && - early_node_map[i].end_pfn <= end_pfn) { + for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) { + if (this_start_pfn >= start_pfn && this_end_pfn <= end_pfn) { /* clear it */ early_node_map[i].start_pfn = 0; early_node_map[i].end_pfn = 0; removed = 1; continue; } - if (early_node_map[i].start_pfn < start_pfn && - early_node_map[i].end_pfn > start_pfn) { - unsigned long temp_end_pfn = early_node_map[i].end_pfn; + if (this_start_pfn < start_pfn && this_end_pfn > start_pfn) { early_node_map[i].end_pfn = start_pfn; - if (temp_end_pfn > end_pfn) - add_active_range(nid, end_pfn, temp_end_pfn); + if (this_end_pfn > end_pfn) + add_active_range(nid, end_pfn, this_end_pfn); continue; } - if (early_node_map[i].start_pfn >= start_pfn && - early_node_map[i].end_pfn > end_pfn && - early_node_map[i].start_pfn < end_pfn) { + if (this_start_pfn >= start_pfn && this_end_pfn > end_pfn && + this_start_pfn < end_pfn) { early_node_map[i].start_pfn = end_pfn; continue; } @@ -4593,15 +4546,11 @@ void __init sort_node_map(void) unsigned long __init node_map_pfn_alignment(void) { unsigned long accl_mask = 0, last_end = 0; + unsigned long start, end, mask; int last_nid = -1; - int i; - - for_each_active_range_index_in_nid(i, MAX_NUMNODES) { - int nid = early_node_map[i].nid; - unsigned long start = early_node_map[i].start_pfn; - unsigned long end = early_node_map[i].end_pfn; - unsigned long mask; + int i, nid; + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { if (!start || last_nid < 0 || last_nid == nid) { last_nid = nid; last_end = end; @@ -4628,12 +4577,12 @@ unsigned long __init node_map_pfn_alignment(void) /* Find the lowest pfn for a node */ static unsigned long __init find_min_pfn_for_node(int nid) { - int i; unsigned long min_pfn = ULONG_MAX; + unsigned long start_pfn; + int i; - /* Assuming a sorted map, the first range found has the starting pfn */ - for_each_active_range_index_in_nid(i, nid) - min_pfn = min(min_pfn, early_node_map[i].start_pfn); + for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL) + min_pfn = min(min_pfn, start_pfn); if (min_pfn == ULONG_MAX) { printk(KERN_WARNING @@ -4662,15 +4611,16 @@ unsigned long __init find_min_pfn_with_active_regions(void) */ static unsigned long __init early_calculate_totalpages(void) { - int i; unsigned long totalpages = 0; + unsigned long start_pfn, end_pfn; + int i, nid; + + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + unsigned long pages = end_pfn - start_pfn; - for (i = 0; i < nr_nodemap_entries; i++) { - unsigned long pages = early_node_map[i].end_pfn - - early_node_map[i].start_pfn; totalpages += pages; if (pages) - node_set_state(early_node_map[i].nid, N_HIGH_MEMORY); + node_set_state(nid, N_HIGH_MEMORY); } return totalpages; } @@ -4725,6 +4675,8 @@ restart: /* Spread kernelcore memory as evenly as possible throughout nodes */ kernelcore_node = required_kernelcore / usable_nodes; for_each_node_state(nid, N_HIGH_MEMORY) { + unsigned long start_pfn, end_pfn; + /* * Recalculate kernelcore_node if the division per node * now exceeds what is necessary to satisfy the requested @@ -4741,13 +4693,10 @@ restart: kernelcore_remaining = kernelcore_node; /* Go through each range of PFNs within this node */ - for_each_active_range_index_in_nid(i, nid) { - unsigned long start_pfn, end_pfn; + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { unsigned long size_pages; - start_pfn = max(early_node_map[i].start_pfn, - zone_movable_pfn[nid]); - end_pfn = early_node_map[i].end_pfn; + start_pfn = max(start_pfn, zone_movable_pfn[nid]); if (start_pfn >= end_pfn) continue; @@ -4849,8 +4798,8 @@ static void check_for_regular_memory(pg_data_t *pgdat) */ void __init free_area_init_nodes(unsigned long *max_zone_pfn) { - unsigned long nid; - int i; + unsigned long start_pfn, end_pfn; + int i, nid; /* Sort early_node_map as initialisation assumes it is sorted */ sort_node_map(); @@ -4900,11 +4849,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) } /* Print out the early_node_map[] */ - printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); - for (i = 0; i < nr_nodemap_entries; i++) - printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, - early_node_map[i].start_pfn, - early_node_map[i].end_pfn); + printk("Early memory PFN ranges\n"); + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) + printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn); /* Initialise every node */ mminit_verify_pageflags_layout(); -- cgit v1.2.3 From eb40c4c27f1722f058e4713ccfedebac577d5190 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:35 +0200 Subject: memblock, x86: Replace memblock_x86_find_in_range_node() with generic memblock calls With the previous changes, generic NUMA aware memblock API has feature parity with memblock_x86_find_in_range_node(). There currently are two users - x86 setup_node_data() and __alloc_memory_core_early() in nobootmem.c. This patch converts the former to use memblock_alloc_nid() and the latter memblock_find_range_in_node(), and kills memblock_x86_find_in_range_node() and related functions including find_memory_early_core_early() in page_alloc.c. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-9-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 1 - arch/x86/mm/memblock.c | 15 --------- arch/x86/mm/numa.c | 9 +----- include/linux/mm.h | 2 -- mm/nobootmem.c | 3 +- mm/page_alloc.c | 67 ----------------------------------------- 6 files changed, 2 insertions(+), 95 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 0cd3800f33b..161792ec524 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -15,7 +15,6 @@ int get_free_all_memory_range(struct range **rangep, int nodeid); void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, unsigned long last_pfn); u64 memblock_x86_hole_size(u64 start, u64 end); -u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); u64 memblock_x86_memory_in_range(u64 addr, u64 limit); bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align); diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index da0d5c84586..e4569f85b39 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -251,21 +251,6 @@ void __init memblock_x86_free_range(u64 start, u64 end) memblock_free(start, end - start); } -/* - * Need to call this function after memblock_x86_register_active_regions, - * so early_node_map[] is filled already. - */ -u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align) -{ - u64 addr; - addr = find_memory_core_early(nid, size, align, start, end); - if (addr) - return addr; - - /* Fallback, should already have start end within node range */ - return memblock_find_in_range(start, end, size, align); -} - /* * Finds an active region in the address range from start_pfn to last_pfn and * returns its range in ei_startpfn and ei_endpfn for the memblock entry. diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index fa1015de5cc..824efadc574 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) /* Initialize NODE_DATA for a node on the local memory */ static void __init setup_node_data(int nid, u64 start, u64 end) { - const u64 nd_low = PFN_PHYS(MAX_DMA_PFN); - const u64 nd_high = PFN_PHYS(max_pfn_mapped); const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); bool remapped = false; u64 nd_pa; @@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end) nd_pa = __pa(nd); remapped = true; } else { - nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, - nd_size, SMP_CACHE_BYTES); - if (!nd_pa) - nd_pa = memblock_find_in_range(nd_low, nd_high, - nd_size, SMP_CACHE_BYTES); + nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); if (!nd_pa) { pr_err("Cannot find %zu bytes in node %d\n", nd_size, nid); return; } - memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA"); nd = __va(nd_pa); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 57e4c9ffdff..9ebc65ae686 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1325,8 +1325,6 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); -u64 __init find_memory_core_early(int nid, u64 size, u64 align, - u64 goal, u64 limit); extern void sparse_memory_present_with_active_regions(int nid); extern void __next_mem_pfn_range(int *idx, int nid, diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 5b0eb06ecb4..c78162668bc 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -41,8 +41,7 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, if (limit > memblock.current_limit) limit = memblock.current_limit; - addr = find_memory_core_early(nid, size, align, goal, limit); - + addr = memblock_find_in_range_node(goal, limit, size, align, nid); if (!addr) return NULL; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 902f03a4fd6..8ab5e5e7fda 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3779,73 +3779,6 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) } } -#ifdef CONFIG_HAVE_MEMBLOCK -/* - * Basic iterator support. Return the last range of PFNs for a node - * Note: nid == MAX_NUMNODES returns last region regardless of node - */ -static int __meminit last_active_region_index_in_nid(int nid) -{ - int i; - - for (i = nr_nodemap_entries - 1; i >= 0; i--) - if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) - return i; - - return -1; -} - -/* - * Basic iterator support. Return the previous active range of PFNs for a node - * Note: nid == MAX_NUMNODES returns next region regardless of node - */ -static int __meminit previous_active_region_index_in_nid(int index, int nid) -{ - for (index = index - 1; index >= 0; index--) - if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) - return index; - - return -1; -} - -#define for_each_active_range_index_in_nid_reverse(i, nid) \ - for (i = last_active_region_index_in_nid(nid); i != -1; \ - i = previous_active_region_index_in_nid(i, nid)) - -u64 __init find_memory_core_early(int nid, u64 size, u64 align, - u64 goal, u64 limit) -{ - int i; - - /* Need to go over early_node_map to find out good range for node */ - for_each_active_range_index_in_nid_reverse(i, nid) { - u64 addr; - u64 ei_start, ei_last; - u64 final_start, final_end; - - ei_last = early_node_map[i].end_pfn; - ei_last <<= PAGE_SHIFT; - ei_start = early_node_map[i].start_pfn; - ei_start <<= PAGE_SHIFT; - - final_start = max(ei_start, goal); - final_end = min(ei_last, limit); - - if (final_start >= final_end) - continue; - - addr = memblock_find_in_range(final_start, final_end, size, align); - - if (!addr) - continue; - - return addr; - } - - return 0; -} -#endif - int __init add_from_early_node_map(struct range *range, int az, int nr_range, int nid) { -- cgit v1.2.3 From 7c0caeb866b0f648d91bb75b8bc6f86af95bb033 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:43:42 +0200 Subject: memblock: Add optional region->nid From 83103b92f3234ec830852bbc5c45911bd6cbdb20 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:22:16 +0200 Add optional region->nid which can be enabled by arch using CONFIG_HAVE_MEMBLOCK_NODE_MAP. When enabled, memblock also carries NUMA node information and replaces early_node_map[]. Newly added memblocks have MAX_NUMNODES as nid. Arch can then call memblock_set_node() to set node information. memblock takes care of merging and node affine allocations w.r.t. node information. When MEMBLOCK_NODE_MAP is enabled, early_node_map[], related data structures and functions to manipulate and iterate it are disabled. memblock version of __next_mem_pfn_range() is provided such that for_each_mem_pfn_range() behaves the same and its users don't have to be updated. -v2: Yinghai spotted section mismatch caused by missing __init_memblock in memblock_set_node(). Fixed. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110714094342.GF3455@htj.dyndns.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 26 +++++++++ include/linux/mm.h | 2 + mm/Kconfig | 3 + mm/memblock.c | 142 +++++++++++++++++++++++++++++++++++++++++------ mm/page_alloc.c | 47 +++++++++------- 5 files changed, 183 insertions(+), 37 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 434b958a4f5..c36a55d3c1c 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -24,6 +24,9 @@ struct memblock_region { phys_addr_t base; phys_addr_t size; +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + int nid; +#endif }; struct memblock_type { @@ -58,6 +61,29 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size); extern long memblock_free(phys_addr_t base, phys_addr_t size); extern long memblock_reserve(phys_addr_t base, phys_addr_t size); +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +extern int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); + +static inline void memblock_set_region_node(struct memblock_region *r, int nid) +{ + r->nid = nid; +} + +static inline int memblock_get_region_node(const struct memblock_region *r) +{ + return r->nid; +} +#else +static inline void memblock_set_region_node(struct memblock_region *r, int nid) +{ +} + +static inline int memblock_get_region_node(const struct memblock_region *r) +{ + return 0; +} +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + /* The numa aware allocator is only available if * CONFIG_ARCH_POPULATES_NODE_MAP is set */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 9ebc65ae686..ceb1e4a1a73 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1307,12 +1307,14 @@ extern void free_area_init_node(int nid, unsigned long * zones_size, * CONFIG_ARCH_POPULATES_NODE_MAP */ extern void free_area_init_nodes(unsigned long *max_zone_pfn); +#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); void sort_node_map(void); +#endif unsigned long node_map_pfn_alignment(void); unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, unsigned long end_pfn); diff --git a/mm/Kconfig b/mm/Kconfig index 8ca47a5ee9c..30a5d4792b8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -131,6 +131,9 @@ config SPARSEMEM_VMEMMAP config HAVE_MEMBLOCK boolean +config HAVE_MEMBLOCK_NODE_MAP + boolean + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" diff --git a/mm/memblock.c b/mm/memblock.c index 992aa180747..e815f4b7580 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -161,12 +161,8 @@ int __init_memblock memblock_reserve_reserved_regions(void) static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { - unsigned long i; - - for (i = r; i < type->cnt - 1; i++) { - type->regions[i].base = type->regions[i + 1].base; - type->regions[i].size = type->regions[i + 1].size; - } + memmove(&type->regions[r], &type->regions[r + 1], + (type->cnt - (r + 1)) * sizeof(type->regions[r])); type->cnt--; /* Special case for empty arrays */ @@ -174,6 +170,7 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u type->cnt = 1; type->regions[0].base = 0; type->regions[0].size = 0; + memblock_set_region_node(&type->regions[0], MAX_NUMNODES); } } @@ -266,7 +263,9 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type) struct memblock_region *this = &type->regions[i]; struct memblock_region *next = &type->regions[i + 1]; - if (this->base + this->size != next->base) { + if (this->base + this->size != next->base || + memblock_get_region_node(this) != + memblock_get_region_node(next)) { BUG_ON(this->base + this->size > next->base); i++; continue; @@ -290,7 +289,7 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type) */ static void __init_memblock memblock_insert_region(struct memblock_type *type, int idx, phys_addr_t base, - phys_addr_t size) + phys_addr_t size, int nid) { struct memblock_region *rgn = &type->regions[idx]; @@ -298,6 +297,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); rgn->base = base; rgn->size = size; + memblock_set_region_node(rgn, nid); type->cnt++; } @@ -327,6 +327,7 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, WARN_ON(type->cnt != 1); type->regions[0].base = base; type->regions[0].size = size; + memblock_set_region_node(&type->regions[0], MAX_NUMNODES); return 0; } repeat: @@ -355,7 +356,7 @@ repeat: nr_new++; if (insert) memblock_insert_region(type, i++, base, - rbase - base); + rbase - base, MAX_NUMNODES); } /* area below @rend is dealt with, forget about it */ base = min(rend, end); @@ -365,7 +366,8 @@ repeat: if (base < end) { nr_new++; if (insert) - memblock_insert_region(type, i, base, end - base); + memblock_insert_region(type, i, base, end - base, + MAX_NUMNODES); } /* @@ -459,6 +461,101 @@ long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) return memblock_add_region(_rgn, base, size); } +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +/* + * Common iterator interface used to define for_each_mem_range(). + */ +void __init_memblock __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) +{ + struct memblock_type *type = &memblock.memory; + struct memblock_region *r; + + while (++*idx < type->cnt) { + r = &type->regions[*idx]; + + if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) + continue; + if (nid == MAX_NUMNODES || nid == r->nid) + break; + } + if (*idx >= type->cnt) { + *idx = -1; + return; + } + + if (out_start_pfn) + *out_start_pfn = PFN_UP(r->base); + if (out_end_pfn) + *out_end_pfn = PFN_DOWN(r->base + r->size); + if (out_nid) + *out_nid = r->nid; +} + +/** + * memblock_set_node - set node ID on memblock regions + * @base: base of area to set node ID for + * @size: size of area to set node ID for + * @nid: node ID to set + * + * Set the nid of memblock memory regions in [@base,@base+@size) to @nid. + * Regions which cross the area boundaries are split as necessary. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, + int nid) +{ + struct memblock_type *type = &memblock.memory; + phys_addr_t end = base + size; + int i; + + /* we'll create at most two more regions */ + while (type->cnt + 2 > type->max) + if (memblock_double_array(type) < 0) + return -ENOMEM; + + for (i = 0; i < type->cnt; i++) { + struct memblock_region *rgn = &type->regions[i]; + phys_addr_t rbase = rgn->base; + phys_addr_t rend = rbase + rgn->size; + + if (rbase >= end) + break; + if (rend <= base) + continue; + + if (rbase < base) { + /* + * @rgn intersects from below. Split and continue + * to process the next region - the new top half. + */ + rgn->base = base; + rgn->size = rend - rgn->base; + memblock_insert_region(type, i, rbase, base - rbase, + rgn->nid); + } else if (rend > end) { + /* + * @rgn intersects from above. Split and redo the + * current region - the new bottom half. + */ + rgn->base = end; + rgn->size = rend - rgn->base; + memblock_insert_region(type, i--, rbase, end - rbase, + rgn->nid); + } else { + /* @rgn is fully contained, set ->nid */ + rgn->nid = nid; + } + } + + memblock_merge_regions(type); + return 0; +} +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { phys_addr_t found; @@ -689,19 +786,26 @@ void __init_memblock memblock_set_current_limit(phys_addr_t limit) memblock.current_limit = limit; } -static void __init_memblock memblock_dump(struct memblock_type *region, char *name) +static void __init_memblock memblock_dump(struct memblock_type *type, char *name) { unsigned long long base, size; int i; - pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); - - for (i = 0; i < region->cnt; i++) { - base = region->regions[i].base; - size = region->regions[i].size; + pr_info(" %s.cnt = 0x%lx\n", name, type->cnt); - pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n", - name, i, base, base + size - 1, size); + for (i = 0; i < type->cnt; i++) { + struct memblock_region *rgn = &type->regions[i]; + char nid_buf[32] = ""; + + base = rgn->base; + size = rgn->size; +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + if (memblock_get_region_node(rgn) != MAX_NUMNODES) + snprintf(nid_buf, sizeof(nid_buf), " on node %d", + memblock_get_region_node(rgn)); +#endif + pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n", + name, i, base, base + size - 1, size, nid_buf); } } @@ -759,11 +863,13 @@ void __init memblock_init(void) */ memblock.memory.regions[0].base = 0; memblock.memory.regions[0].size = 0; + memblock_set_region_node(&memblock.memory.regions[0], MAX_NUMNODES); memblock.memory.cnt = 1; /* Ditto. */ memblock.reserved.regions[0].base = 0; memblock.reserved.regions[0].size = 0; + memblock_set_region_node(&memblock.reserved.regions[0], MAX_NUMNODES); memblock.reserved.cnt = 1; memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8ab5e5e7fda..3c7ea45ffba 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -182,28 +182,31 @@ static unsigned long __meminitdata nr_all_pages; static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP - /* - * MAX_ACTIVE_REGIONS determines the maximum number of distinct - * ranges of memory (RAM) that may be registered with add_active_range(). - * Ranges passed to add_active_range() will be merged if possible - * so the number of times add_active_range() can be called is - * related to the number of nodes and the number of holes - */ - #ifdef CONFIG_MAX_ACTIVE_REGIONS - /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ - #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS - #else - #if MAX_NUMNODES >= 32 - /* If there can be many nodes, allow up to 50 holes per node */ - #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50) + #ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP + /* + * MAX_ACTIVE_REGIONS determines the maximum number of distinct ranges + * of memory (RAM) that may be registered with add_active_range(). + * Ranges passed to add_active_range() will be merged if possible so + * the number of times add_active_range() can be called is related to + * the number of nodes and the number of holes + */ + #ifdef CONFIG_MAX_ACTIVE_REGIONS + /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ + #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS #else - /* By default, allow up to 256 distinct regions */ - #define MAX_ACTIVE_REGIONS 256 + #if MAX_NUMNODES >= 32 + /* If there can be many nodes, allow up to 50 holes per node */ + #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50) + #else + /* By default, allow up to 256 distinct regions */ + #define MAX_ACTIVE_REGIONS 256 + #endif #endif - #endif - static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS]; - static int __meminitdata nr_nodemap_entries; + static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS]; + static int __meminitdata nr_nodemap_entries; +#endif /* !CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; static unsigned long __initdata required_kernelcore; @@ -4268,6 +4271,7 @@ static inline void setup_nr_node_ids(void) } #endif +#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* * Common iterator interface used to define for_each_mem_pfn_range(). */ @@ -4456,6 +4460,11 @@ void __init sort_node_map(void) sizeof(struct node_active_region), cmp_node_active_region, NULL); } +#else /* !CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +static inline void sort_node_map(void) +{ +} +#endif /** * node_map_pfn_alignment - determine the maximum internode alignment -- cgit v1.2.3 From 0ee332c1451869963626bf9cac88f165a90990e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:09 -0800 Subject: memblock: Kill early_node_map[] Now all ARCH_POPULATES_NODE_MAP archs select HAVE_MEBLOCK_NODE_MAP - there's no user of early_node_map[] left. Kill early_node_map[] and replace ARCH_POPULATES_NODE_MAP with HAVE_MEMBLOCK_NODE_MAP. Also, relocate for_each_mem_pfn_range() and helper from mm.h to memblock.h as page_alloc.c would no longer host an alternative implementation. This change is ultimately one to one mapping and shouldn't cause any observable difference; however, after the recent changes, there are some functions which now would fit memblock.c better than page_alloc.c and dependency on HAVE_MEMBLOCK_NODE_MAP instead of HAVE_MEMBLOCK doesn't make much sense on some of them. Further cleanups for functions inside HAVE_MEMBLOCK_NODE_MAP in mm.h would be nice. -v2: Fix compile bug introduced by mis-spelling CONFIG_HAVE_MEMBLOCK_NODE_MAP to CONFIG_MEMBLOCK_HAVE_NODE_MAP in mmzone.h. Reported by Stephen Rothwell. Signed-off-by: Tejun Heo Cc: Stephen Rothwell Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Tony Luck Cc: Ralf Baechle Cc: Martin Schwidefsky Cc: Chen Liqin Cc: Paul Mundt Cc: "David S. Miller" Cc: "H. Peter Anvin" --- arch/ia64/Kconfig | 3 - arch/mips/Kconfig | 3 - arch/powerpc/Kconfig | 3 - arch/s390/Kconfig | 3 - arch/score/Kconfig | 3 - arch/sh/mm/Kconfig | 3 - arch/sparc/Kconfig | 3 - arch/x86/Kconfig | 3 - drivers/iommu/intel-iommu.c | 1 + include/linux/memblock.h | 23 +++- include/linux/mm.h | 50 ++------- include/linux/mmzone.h | 8 +- mm/memblock.c | 2 +- mm/page_alloc.c | 259 +++----------------------------------------- 14 files changed, 55 insertions(+), 312 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index e2c7de0d823..3b7a7c48378 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -477,9 +477,6 @@ config NODES_SHIFT MAX_NUMNODES will be 2^(This value). If in doubt, use the default. -config ARCH_POPULATES_NODE_MAP - def_bool y - # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. # VIRTUAL_MEM_MAP has been retained for historical reasons. config VIRTUAL_MEM_MAP diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index b789847d93f..9c652eb68aa 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2067,9 +2067,6 @@ config ARCH_DISCONTIGMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. -config ARCH_POPULATES_NODE_MAP - def_bool y - config ARCH_SPARSEMEM_ENABLE bool select SPARSEMEM_STATIC diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8516477c4dc..ead0bc68439 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -422,9 +422,6 @@ config ARCH_SPARSEMEM_DEFAULT def_bool y depends on (SMP && PPC_PSERIES) || PPC_PS3 -config ARCH_POPULATES_NODE_MAP - def_bool y - config SYS_SUPPORTS_HUGETLBFS bool diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e383caf251a..d48ede33443 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -348,9 +348,6 @@ config WARN_DYNAMIC_STACK Say N if you are unsure. -config ARCH_POPULATES_NODE_MAP - def_bool y - comment "Kernel preemption" source "kernel/Kconfig.preempt" diff --git a/arch/score/Kconfig b/arch/score/Kconfig index e5ae12f4878..8b0c9464aa9 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -63,9 +63,6 @@ config 32BIT config ARCH_FLATMEM_ENABLE def_bool y -config ARCH_POPULATES_NODE_MAP - def_bool y - source "mm/Kconfig" config MEMORY_START diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index c3e61b36649..cb8f9920f4d 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -143,9 +143,6 @@ config MAX_ACTIVE_REGIONS CPU_SUBTYPE_SH7785) default "1" -config ARCH_POPULATES_NODE_MAP - def_bool y - config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 91a6d1e2bf3..70ae9d81870 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -353,9 +353,6 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES -config ARCH_POPULATES_NODE_MAP - def_bool y if SPARC64 - config ARCH_SELECT_MEMORY_MODEL def_bool y if SPARC64 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5d1514c263f..9bab4a90d7a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -206,9 +206,6 @@ config ZONE_DMA32 bool default X86_64 -config ARCH_POPULATES_NODE_MAP - def_bool y - config AUDIT_ARCH bool default X86_64 diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index bcbd693b351..d1c17934d66 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c7b68f489d4..cd7606b71e5 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -58,6 +58,26 @@ int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid); + +/** + * for_each_mem_pfn_range - early memory pfn range iterator + * @i: an integer used as loop variable + * @nid: node selector, %MAX_NUMNODES for all nodes + * @p_start: ptr to ulong for start pfn of the range, can be %NULL + * @p_end: ptr to ulong for end pfn of the range, can be %NULL + * @p_nid: ptr to int for nid of the range, can be %NULL + * + * Walks over configured memory ranges. Available after early_node_map is + * populated. + */ +#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ + for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ + i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); @@ -101,9 +121,6 @@ static inline int memblock_get_region_node(const struct memblock_region *r) } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -/* The numa aware allocator is only available if - * CONFIG_ARCH_POPULATES_NODE_MAP is set - */ phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); diff --git a/include/linux/mm.h b/include/linux/mm.h index 6b365aee839..c6f49bea52a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1252,43 +1252,34 @@ static inline void pgtable_page_dtor(struct page *page) extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* - * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its + * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its * zones, allocate the backing mem_map and account for memory holes in a more * architecture independent manner. This is a substitute for creating the * zone_sizes[] and zholes_size[] arrays and passing them to * free_area_init_node() * * An architecture is expected to register range of page frames backed by - * physical memory with add_active_range() before calling + * physical memory with memblock_add[_node]() before calling * free_area_init_nodes() passing in the PFN each zone ends at. At a basic * usage, an architecture is expected to do something like * * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, * max_highmem_pfn}; * for_each_valid_physical_page_range() - * add_active_range(node_id, start_pfn, end_pfn) + * memblock_add_node(base, size, nid) * free_area_init_nodes(max_zone_pfns); * - * If the architecture guarantees that there are no holes in the ranges - * registered with add_active_range(), free_bootmem_active_regions() - * will call free_bootmem_node() for each registered physical page range. - * Similarly sparse_memory_present_with_active_regions() calls - * memory_present() for each range when SPARSEMEM is enabled. + * free_bootmem_with_active_regions() calls free_bootmem_node() for each + * registered physical page range. Similarly + * sparse_memory_present_with_active_regions() calls memory_present() for + * each range when SPARSEMEM is enabled. * * See mm/page_alloc.c for more information on each function exposed by - * CONFIG_ARCH_POPULATES_NODE_MAP + * CONFIG_HAVE_MEMBLOCK_NODE_MAP. */ extern void free_area_init_nodes(unsigned long *max_zone_pfn); -#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP -extern void add_active_range(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern void remove_active_range(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern void remove_all_active_ranges(void); -void sort_node_map(void); -#endif unsigned long node_map_pfn_alignment(void); unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, unsigned long end_pfn); @@ -1303,28 +1294,9 @@ int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); extern void sparse_memory_present_with_active_regions(int nid); -extern void __next_mem_pfn_range(int *idx, int nid, - unsigned long *out_start_pfn, - unsigned long *out_end_pfn, int *out_nid); - -/** - * for_each_mem_pfn_range - early memory pfn range iterator - * @i: an integer used as loop variable - * @nid: node selector, %MAX_NUMNODES for all nodes - * @p_start: ptr to ulong for start pfn of the range, can be %NULL - * @p_end: ptr to ulong for end pfn of the range, can be %NULL - * @p_nid: ptr to int for nid of the range, can be %NULL - * - * Walks over configured memory ranges. Available after early_node_map is - * populated. - */ -#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ - for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ - i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) - -#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ +#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) static inline int __early_pfn_to_nid(unsigned long pfn) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 188cb2ffe8d..3ac040f1936 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -598,13 +598,13 @@ struct zonelist { #endif }; -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP struct node_active_region { unsigned long start_pfn; unsigned long end_pfn; int nid; }; -#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #ifndef CONFIG_DISCONTIGMEM /* The array of struct pages - for discontigmem use pgdat->lmem_map */ @@ -720,7 +720,7 @@ extern int movable_zone; static inline int zone_movable_is_highmem(void) { -#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP) +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE) return movable_zone == ZONE_HIGHMEM; #else return 0; @@ -938,7 +938,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, #endif #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ - !defined(CONFIG_ARCH_POPULATES_NODE_MAP) + !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) static inline unsigned long early_pfn_to_nid(unsigned long pfn) { return 0; diff --git a/mm/memblock.c b/mm/memblock.c index ef4987b03af..1adbef09b43 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -716,7 +716,7 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start, phys_addr_t end, int *nid) { -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP unsigned long start_pfn, end_pfn; int i; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6ce27331834..63ff8dab433 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -181,42 +181,17 @@ static unsigned long __meminitdata nr_kernel_pages; static unsigned long __meminitdata nr_all_pages; static unsigned long __meminitdata dma_reserve; -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP - #ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP - /* - * MAX_ACTIVE_REGIONS determines the maximum number of distinct ranges - * of memory (RAM) that may be registered with add_active_range(). - * Ranges passed to add_active_range() will be merged if possible so - * the number of times add_active_range() can be called is related to - * the number of nodes and the number of holes - */ - #ifdef CONFIG_MAX_ACTIVE_REGIONS - /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ - #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS - #else - #if MAX_NUMNODES >= 32 - /* If there can be many nodes, allow up to 50 holes per node */ - #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50) - #else - /* By default, allow up to 256 distinct regions */ - #define MAX_ACTIVE_REGIONS 256 - #endif - #endif - - static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS]; - static int __meminitdata nr_nodemap_entries; -#endif /* !CONFIG_HAVE_MEMBLOCK_NODE_MAP */ - - static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; - static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; - static unsigned long __initdata required_kernelcore; - static unsigned long __initdata required_movablecore; - static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; - - /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ - int movable_zone; - EXPORT_SYMBOL(movable_zone); -#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; +static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; +static unsigned long __initdata required_kernelcore; +static unsigned long __initdata required_movablecore; +static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; + +/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ +int movable_zone; +EXPORT_SYMBOL(movable_zone); +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #if MAX_NUMNODES > 1 int nr_node_ids __read_mostly = MAX_NUMNODES; @@ -3734,7 +3709,7 @@ __meminit int init_currently_empty_zone(struct zone *zone, return 0; } -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID /* * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. @@ -4002,7 +3977,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); } -#else +#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, unsigned long zone_type, unsigned long *zones_size) @@ -4020,7 +3995,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } -#endif +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) @@ -4243,10 +4218,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) */ if (pgdat == NODE_DATA(0)) { mem_map = NODE_DATA(0)->node_mem_map; -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP if (page_to_pfn(mem_map) != pgdat->node_start_pfn) mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); -#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ } #endif #endif /* CONFIG_FLAT_NODE_MEM_MAP */ @@ -4271,7 +4246,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, free_area_init_core(pgdat, zones_size, zholes_size); } -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP #if MAX_NUMNODES > 1 /* @@ -4292,201 +4267,6 @@ static inline void setup_nr_node_ids(void) } #endif -#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP -/* - * Common iterator interface used to define for_each_mem_pfn_range(). - */ -void __meminit __next_mem_pfn_range(int *idx, int nid, - unsigned long *out_start_pfn, - unsigned long *out_end_pfn, int *out_nid) -{ - struct node_active_region *r = NULL; - - while (++*idx < nr_nodemap_entries) { - if (nid == MAX_NUMNODES || nid == early_node_map[*idx].nid) { - r = &early_node_map[*idx]; - break; - } - } - if (!r) { - *idx = -1; - return; - } - - if (out_start_pfn) - *out_start_pfn = r->start_pfn; - if (out_end_pfn) - *out_end_pfn = r->end_pfn; - if (out_nid) - *out_nid = r->nid; -} - -/** - * add_active_range - Register a range of PFNs backed by physical memory - * @nid: The node ID the range resides on - * @start_pfn: The start PFN of the available physical memory - * @end_pfn: The end PFN of the available physical memory - * - * These ranges are stored in an early_node_map[] and later used by - * free_area_init_nodes() to calculate zone sizes and holes. If the - * range spans a memory hole, it is up to the architecture to ensure - * the memory is not freed by the bootmem allocator. If possible - * the range being registered will be merged with existing ranges. - */ -void __init add_active_range(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - int i; - - mminit_dprintk(MMINIT_TRACE, "memory_register", - "Entering add_active_range(%d, %#lx, %#lx) " - "%d entries of %d used\n", - nid, start_pfn, end_pfn, - nr_nodemap_entries, MAX_ACTIVE_REGIONS); - - mminit_validate_memmodel_limits(&start_pfn, &end_pfn); - - /* Merge with existing active regions if possible */ - for (i = 0; i < nr_nodemap_entries; i++) { - if (early_node_map[i].nid != nid) - continue; - - /* Skip if an existing region covers this new one */ - if (start_pfn >= early_node_map[i].start_pfn && - end_pfn <= early_node_map[i].end_pfn) - return; - - /* Merge forward if suitable */ - if (start_pfn <= early_node_map[i].end_pfn && - end_pfn > early_node_map[i].end_pfn) { - early_node_map[i].end_pfn = end_pfn; - return; - } - - /* Merge backward if suitable */ - if (start_pfn < early_node_map[i].start_pfn && - end_pfn >= early_node_map[i].start_pfn) { - early_node_map[i].start_pfn = start_pfn; - return; - } - } - - /* Check that early_node_map is large enough */ - if (i >= MAX_ACTIVE_REGIONS) { - printk(KERN_CRIT "More than %d memory regions, truncating\n", - MAX_ACTIVE_REGIONS); - return; - } - - early_node_map[i].nid = nid; - early_node_map[i].start_pfn = start_pfn; - early_node_map[i].end_pfn = end_pfn; - nr_nodemap_entries = i + 1; -} - -/** - * remove_active_range - Shrink an existing registered range of PFNs - * @nid: The node id the range is on that should be shrunk - * @start_pfn: The new PFN of the range - * @end_pfn: The new PFN of the range - * - * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. - * The map is kept near the end physical page range that has already been - * registered. This function allows an arch to shrink an existing registered - * range. - */ -void __init remove_active_range(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long this_start_pfn, this_end_pfn; - int i, j; - int removed = 0; - - printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n", - nid, start_pfn, end_pfn); - - /* Find the old active region end and shrink */ - for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) { - if (this_start_pfn >= start_pfn && this_end_pfn <= end_pfn) { - /* clear it */ - early_node_map[i].start_pfn = 0; - early_node_map[i].end_pfn = 0; - removed = 1; - continue; - } - if (this_start_pfn < start_pfn && this_end_pfn > start_pfn) { - early_node_map[i].end_pfn = start_pfn; - if (this_end_pfn > end_pfn) - add_active_range(nid, end_pfn, this_end_pfn); - continue; - } - if (this_start_pfn >= start_pfn && this_end_pfn > end_pfn && - this_start_pfn < end_pfn) { - early_node_map[i].start_pfn = end_pfn; - continue; - } - } - - if (!removed) - return; - - /* remove the blank ones */ - for (i = nr_nodemap_entries - 1; i > 0; i--) { - if (early_node_map[i].nid != nid) - continue; - if (early_node_map[i].end_pfn) - continue; - /* we found it, get rid of it */ - for (j = i; j < nr_nodemap_entries - 1; j++) - memcpy(&early_node_map[j], &early_node_map[j+1], - sizeof(early_node_map[j])); - j = nr_nodemap_entries - 1; - memset(&early_node_map[j], 0, sizeof(early_node_map[j])); - nr_nodemap_entries--; - } -} - -/** - * remove_all_active_ranges - Remove all currently registered regions - * - * During discovery, it may be found that a table like SRAT is invalid - * and an alternative discovery method must be used. This function removes - * all currently registered regions. - */ -void __init remove_all_active_ranges(void) -{ - memset(early_node_map, 0, sizeof(early_node_map)); - nr_nodemap_entries = 0; -} - -/* Compare two active node_active_regions */ -static int __init cmp_node_active_region(const void *a, const void *b) -{ - struct node_active_region *arange = (struct node_active_region *)a; - struct node_active_region *brange = (struct node_active_region *)b; - - /* Done this way to avoid overflows */ - if (arange->start_pfn > brange->start_pfn) - return 1; - if (arange->start_pfn < brange->start_pfn) - return -1; - - return 0; -} - -/* sort the node_map by start_pfn */ -void __init sort_node_map(void) -{ - sort(early_node_map, (size_t)nr_nodemap_entries, - sizeof(struct node_active_region), - cmp_node_active_region, NULL); -} -#else /* !CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -static inline void sort_node_map(void) -{ -} -#endif - /** * node_map_pfn_alignment - determine the maximum internode alignment * @@ -4764,9 +4544,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) unsigned long start_pfn, end_pfn; int i, nid; - /* Sort early_node_map as initialisation assumes it is sorted */ - sort_node_map(); - /* Record where the zone boundaries are */ memset(arch_zone_lowest_possible_pfn, 0, sizeof(arch_zone_lowest_possible_pfn)); @@ -4867,7 +4644,7 @@ static int __init cmdline_parse_movablecore(char *p) early_param("kernelcore", cmdline_parse_kernelcore); early_param("movablecore", cmdline_parse_movablecore); -#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ /** * set_dma_reserve - set the specified number of pages reserved in the first zone -- cgit v1.2.3