From 5853ff23c2f0f6c87a859e7f882eac3300b329a0 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 25 Mar 2013 15:47:38 -0700 Subject: mm: export split_page() This symbol will be used in the Hyper-V balloon driver to support 2M allocations. Signed-off-by: K. Y. Srinivasan Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8fcced7823fa..7ff1536f01b8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1397,6 +1397,7 @@ void split_page(struct page *page, unsigned int order) for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); } +EXPORT_SYMBOL_GPL(split_page); static int __isolate_free_page(struct page *page, unsigned int order) { -- cgit v1.2.3-59-g8ed1b From 4b59e6c4730978679b414a8da61514a2518da512 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 29 Apr 2013 15:06:11 -0700 Subject: mm, show_mem: suppress page counts in non-blockable contexts On large systems with a lot of memory, walking all RAM to determine page types may take a half second or even more. In non-blockable contexts, the page allocator will emit a page allocation failure warning unless __GFP_NOWARN is specified. In such contexts, irqs are typically disabled and such a lengthy delay may even result in NMI watchdog timeouts. To fix this, suppress the page walk in such contexts when printing the page allocation failure warning. Signed-off-by: David Rientjes Cc: Mel Gorman Acked-by: Michal Hocko Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/init.c | 3 +++ arch/ia64/mm/contig.c | 2 ++ arch/ia64/mm/discontig.c | 2 ++ arch/parisc/mm/init.c | 2 ++ arch/unicore32/mm/init.c | 3 +++ include/linux/mm.h | 3 ++- lib/show_mem.c | 3 +++ mm/page_alloc.c | 7 +++++++ 8 files changed, 24 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index ad722f1208a5..ad9a9f3f0322 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -99,6 +99,9 @@ void show_mem(unsigned int filter) printk("Mem-info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; + for_each_bank (i, mi) { struct membank *bank = &mi->bank[i]; unsigned int pfn1, pfn2; diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 80dab509dfb0..67c59ebec899 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -47,6 +47,8 @@ void show_mem(unsigned int filter) printk(KERN_INFO "Mem-info:\n"); show_free_areas(filter); printk(KERN_INFO "Node memory in pages:\n"); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c2e955ee79a8..a57436e5d405 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -623,6 +623,8 @@ void show_mem(unsigned int filter) printk(KERN_INFO "Mem-info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 3ac462de53a4..cf2da13c41e6 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -697,6 +697,8 @@ void show_mem(unsigned int filter) printk(KERN_INFO "Mem-info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; #ifndef CONFIG_DISCONTIGMEM i = max_mapnr; while (i-- > 0) { diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index de186bde8975..644482882bae 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -66,6 +66,9 @@ void show_mem(unsigned int filter) printk(KERN_DEFAULT "Mem-info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; + for_each_bank(i, mi) { struct membank *bank = &mi->bank[i]; unsigned int pfn1, pfn2; diff --git a/include/linux/mm.h b/include/linux/mm.h index e2091b88d24c..f3c7b1f9d1d8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -899,7 +899,8 @@ extern void pagefault_out_of_memory(void); * Flags passed to show_mem() and show_free_areas() to suppress output in * various contexts. */ -#define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */ +#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ +#define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */ extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); diff --git a/lib/show_mem.c b/lib/show_mem.c index 4407f8c9b1f7..b7c72311ad0c 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -18,6 +18,9 @@ void show_mem(unsigned int filter) printk("Mem-Info:\n"); show_free_areas(filter); + if (filter & SHOW_MEM_FILTER_PAGE_COUNT) + return; + for_each_online_pgdat(pgdat) { unsigned long i, flags; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7ff1536f01b8..da7a2fe7332e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2002,6 +2002,13 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) debug_guardpage_minorder() > 0) return; + /* + * Walking all memory to count page types is very expensive and should + * be inhibited in non-blockable contexts. + */ + if (!(gfp_mask & __GFP_WAIT)) + filter |= SHOW_MEM_FILTER_PAGE_COUNT; + /* * This documents exceptions given to allocations in certain * contexts that are allowed to allocate outside current's set -- cgit v1.2.3-59-g8ed1b From 69afade72a3e13e96a065f757891d384d466123f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 29 Apr 2013 15:06:21 -0700 Subject: mm: introduce common help functions to deal with reserved/managed pages The original goal of this patchset is to fix the bug reported by https://bugzilla.kernel.org/show_bug.cgi?id=53501 Now it has also been expanded to reduce common code used by memory initializion. This is the first part, which applies to v3.9-rc1. It introduces following common helper functions to simplify free_initmem() and free_initrd_mem() on different architectures: adjust_managed_page_count(): will be used to adjust totalram_pages, totalhigh_pages, zone->managed_pages when reserving/unresering a page. __free_reserved_page(): free a reserved page into the buddy system without adjusting page statistics info free_reserved_page(): free a reserved page into the buddy system and adjust page statistics info mark_page_reserved(): mark a page as reserved and adjust page statistics info free_reserved_area(): free a continous ranges of pages by calling free_reserved_page() free_initmem_default(): default method to free __init pages. We have only tested these patchset on x86 platforms, and have done basic compliation tests using cross-compilers from ftp.kernel.org. That means some code may not pass compilation on some architectures. So any help to test this patchset are welcomed! There are several other parts still under development: Part2: introduce free_highmem_page() to simplify freeing highmem pages Part3: refine code to manage totalram_pages, totalhigh_pages and zone->managed_pages Part4: introduce helper functions to simplify mem_init() and remove the global variable num_physpages. This patch: Code to deal with reserved/managed pages are duplicated by many architectures, so introduce common help functions to reduce duplicated code. These common help functions will also be used to concentrate code to modify totalram_pages and zone->managed_pages, which makes the code much more clear. Signed-off-by: Jiang Liu Acked-by: Geert Uytterhoeven Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Anatolij Gustschin Cc: Aurelien Jacquiot Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Zankel Cc: David Howells Cc: David S. Miller Cc: Eric Biederman Cc: Fenghua Yu Cc: Guan Xuetao Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Heiko Carstens Cc: Helge Deller Cc: Hirokazu Takata Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jeff Dike Cc: Jiang Liu Cc: Jiang Liu Cc: Jonas Bonn Cc: Koichi Yasutake Cc: Lennox Wu Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Max Filippov Cc: Michal Simek Cc: Mikael Starvik Cc: Mike Frysinger Cc: Paul Mackerras Cc: Paul Mundt Cc: Ralf Baechle Cc: Richard Henderson Cc: Russell King Cc: Sam Ravnborg Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 20 ++++++++++++++++++++ 2 files changed, 68 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index f3c7b1f9d1d8..d064c73c925e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1295,6 +1295,54 @@ extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void free_initmem(void); +/* + * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) + * into the buddy system. The freed pages will be poisoned with pattern + * "poison" if it's non-zero. + * Return pages freed into the buddy system. + */ +extern unsigned long free_reserved_area(unsigned long start, unsigned long end, + int poison, char *s); + +static inline void adjust_managed_page_count(struct page *page, long count) +{ + totalram_pages += count; +} + +/* Free the reserved page into the buddy system, so it gets managed. */ +static inline void __free_reserved_page(struct page *page) +{ + ClearPageReserved(page); + init_page_count(page); + __free_page(page); +} + +static inline void free_reserved_page(struct page *page) +{ + __free_reserved_page(page); + adjust_managed_page_count(page, 1); +} + +static inline void mark_page_reserved(struct page *page) +{ + SetPageReserved(page); + adjust_managed_page_count(page, -1); +} + +/* + * Default method to free all the __init memory into the buddy system. + * The freed pages will be poisoned with pattern "poison" if it is + * non-zero. Return pages freed into the buddy system. + */ +static inline unsigned long free_initmem_default(int poison) +{ + extern char __init_begin[], __init_end[]; + + return free_reserved_area(PAGE_ALIGN((unsigned long)&__init_begin) , + ((unsigned long)&__init_end) & PAGE_MASK, + poison, "unused kernel"); +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its diff --git a/mm/page_alloc.c b/mm/page_alloc.c index da7a2fe7332e..5c660f5ba3d3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5121,6 +5121,26 @@ early_param("movablecore", cmdline_parse_movablecore); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +unsigned long free_reserved_area(unsigned long start, unsigned long end, + int poison, char *s) +{ + unsigned long pages, pos; + + pos = start = PAGE_ALIGN(start); + end &= PAGE_MASK; + for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { + if (poison) + memset((void *)pos, poison, PAGE_SIZE); + free_reserved_page(virt_to_page(pos)); + } + + if (pages && s) + pr_info("Freeing %s memory: %ldK (%lx - %lx)\n", + s, pages << (PAGE_SHIFT - 10), start, end); + + return pages; +} + /** * set_dma_reserve - set the specified number of pages reserved in the first zone * @new_dma_reserve: The number of pages to mark reserved -- cgit v1.2.3-59-g8ed1b From cfa11e08ed39eb28a9eff9a907b20913020c69b5 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 29 Apr 2013 15:07:00 -0700 Subject: mm: introduce free_highmem_page() helper to free highmem pages into buddy system The original goal of this patchset is to fix the bug reported by https://bugzilla.kernel.org/show_bug.cgi?id=53501 Now it has also been expanded to reduce common code used by memory initializion. This is the second part, which applies to the previous part at: http://marc.info/?l=linux-mm&m=136289696323825&w=2 It introduces a helper function free_highmem_page() to free highmem pages into the buddy system when initializing mm subsystem. Introduction of free_highmem_page() is one step forward to clean up accesses and modificaitons of totalhigh_pages, totalram_pages and zone->managed_pages etc. I hope we could remove all references to totalhigh_pages from the arch/ subdirectory. We have only tested these patchset on x86 platforms, and have done basic compliation tests using cross-compilers from ftp.kernel.org. That means some code may not pass compilation on some architectures. So any help to test this patchset are welcomed! There are several other parts still under development: Part3: refine code to manage totalram_pages, totalhigh_pages and zone->managed_pages Part4: introduce helper functions to simplify mem_init() and remove the global variable num_physpages. This patch: Introduce helper function free_highmem_page(), which will be used by architectures with HIGHMEM enabled to free highmem pages into the buddy system. Signed-off-by: Jiang Liu Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "Suzuki K. Poulose" Cc: Alexander Graf Cc: Arnd Bergmann Cc: Attilio Rao Cc: Benjamin Herrenschmidt Cc: Cong Wang Cc: David Daney Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: James Hogan Cc: Jeff Dike Cc: Jiang Liu Cc: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Konstantin Khlebnikov Cc: Linus Walleij Cc: Marek Szyprowski Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Michal Simek Cc: Michel Lespinasse Cc: Minchan Kim Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Weinberger Cc: Rik van Riel Cc: Russell King Cc: Sam Ravnborg Cc: Stephen Boyd Cc: Thomas Gleixner Cc: Yinghai Lu Reviewed-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++++ mm/page_alloc.c | 9 +++++++++ 2 files changed, 16 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index d064c73c925e..43b70d5f8201 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1303,6 +1303,13 @@ extern void free_initmem(void); */ extern unsigned long free_reserved_area(unsigned long start, unsigned long end, int poison, char *s); +#ifdef CONFIG_HIGHMEM +/* + * Free a highmem page into the buddy system, adjusting totalhigh_pages + * and totalram_pages. + */ +extern void free_highmem_page(struct page *page); +#endif static inline void adjust_managed_page_count(struct page *page, long count) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5c660f5ba3d3..72da11c6804d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5141,6 +5141,15 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end, return pages; } +#ifdef CONFIG_HIGHMEM +void free_highmem_page(struct page *page) +{ + __free_reserved_page(page); + totalram_pages++; + totalhigh_pages++; +} +#endif + /** * set_dma_reserve - set the specified number of pages reserved in the first zone * @new_dma_reserve: The number of pages to mark reserved -- cgit v1.2.3-59-g8ed1b From 949f7ec5760b021da3cccc1eaeb0671270e4238f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 29 Apr 2013 15:07:48 -0700 Subject: mm, hugetlb: include hugepages in meminfo Particularly in oom conditions, it's troublesome that hugetlb memory is not displayed. All other meminfo that is emitted will not add up to what is expected, and there is no artifact left in the kernel log to show that a potentially significant amount of memory is actually allocated as hugepages which are not available to be reclaimed. Booting with hugepages=8192 on the command line, this memory is now shown in oom conditions. For example, with echo m > /proc/sysrq-trigger: Node 0 hugepages_total=2048 hugepages_free=2048 hugepages_surp=0 hugepages_size=2048kB Node 1 hugepages_total=2048 hugepages_free=2048 hugepages_surp=0 hugepages_size=2048kB Node 2 hugepages_total=2048 hugepages_free=2048 hugepages_surp=0 hugepages_size=2048kB Node 3 hugepages_total=2048 hugepages_free=2048 hugepages_surp=0 hugepages_size=2048kB [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: David Rientjes Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ++++ mm/hugetlb.c | 15 +++++++++++++++ mm/page_alloc.c | 3 +++ 3 files changed, 22 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 16e4e9a643fb..3a62df310f2e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -58,6 +58,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, int hugetlb_prefault(struct address_space *, struct vm_area_struct *); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(int, char *); +void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); @@ -114,6 +115,9 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) { } #define hugetlb_report_node_meminfo(n, buf) 0 +static inline void hugetlb_show_meminfo(void) +{ +} #define follow_huge_pmd(mm, addr, pmd, write) NULL #define follow_huge_pud(mm, addr, pud, write) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 73b864a32017..9b9aeef8e590 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2121,6 +2121,21 @@ int hugetlb_report_node_meminfo(int nid, char *buf) nid, h->surplus_huge_pages_node[nid]); } +void hugetlb_show_meminfo(void) +{ + struct hstate *h; + int nid; + + for_each_node_state(nid, N_MEMORY) + for_each_hstate(h) + pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n", + nid, + h->nr_huge_pages_node[nid], + h->free_huge_pages_node[nid], + h->surplus_huge_pages_node[nid], + 1UL << (huge_page_order(h) + PAGE_SHIFT - 10)); +} + /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ unsigned long hugetlb_total_pages(void) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 72da11c6804d..7350986bbf99 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -3113,6 +3114,8 @@ void show_free_areas(unsigned int filter) printk("= %lukB\n", K(total)); } + hugetlb_show_meminfo(); + printk("%ld total pagecache pages\n", global_page_state(NR_FILE_PAGES)); show_swap_cache_info(); -- cgit v1.2.3-59-g8ed1b From fed2719e7a8612471bd17113ed326d38df434f17 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 29 Apr 2013 15:07:57 -0700 Subject: mm: page_alloc: avoid marking zones full prematurely after zone_reclaim() The following problem was reported against a distribution kernel when zone_reclaim was enabled but the same problem applies to the mainline kernel. The reproduction case was as follows 1. Run numactl -m +0 dd if=largefile of=/dev/null This allocates a large number of clean pages in node 0 2. numactl -N +0 memhog 0.5*Mg This start a memory-using application in node 0. The expected behaviour is that the clean pages get reclaimed and the application uses node 0 for its memory. The observed behaviour was that the memory for the memhog application was allocated off-node since commits cd38b115d5ad ("mm: page allocator: initialise ZLC for first zone eligible for zone_reclaim") and commit 76d3fbf8fbf6 ("mm: page allocator: reconsider zones for allocation after direct reclaim"). The assumption of those patches was that it was always preferable to allocate quickly than stall for long periods of time and they were meant to take care that the zone was only marked full when necessary but an important case was missed. In the allocator fast path, only the low watermarks are checked. If the zones free pages are between the low and min watermark then allocations from the allocators slow path will succeed. However, zone_reclaim will only reclaim SWAP_CLUSTER_MAX or 1< Reported-by: Hedi Berriche Tested-by: Hedi Berriche Reviewed-by: Michal Hocko Reviewed-by: Wanpeng Li Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7350986bbf99..b54c5cbf0200 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1942,9 +1942,24 @@ zonelist_scan: continue; default: /* did we reclaim enough */ - if (!zone_watermark_ok(zone, order, mark, + if (zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) + goto try_this_zone; + + /* + * Failed to reclaim enough to meet watermark. + * Only mark the zone full if checking the min + * watermark or if we failed to reclaim just + * 1< Date: Mon, 29 Apr 2013 15:07:59 -0700 Subject: mm: speedup in __early_pfn_to_nid When booting on a large memory system, the kernel spends considerable time in memmap_init_zone() setting up memory zones. Analysis shows significant time spent in __early_pfn_to_nid(). The routine memmap_init_zone() checks each PFN to verify the nid is valid. __early_pfn_to_nid() sequentially scans the list of pfn ranges to find the right range and returns the nid. This does not scale well. On a 4 TB (single rack) system there are 308 memory ranges to scan. The higher the PFN the more time spent sequentially spinning through memory ranges. Since memmap_init_zone() increments pfn, it will almost always be looking for the same range as the previous pfn, so check that range first. If it is in the same range, return that nid. If not, scan the list as before. A 4 TB (single rack) UV1 system takes 512 seconds to get through the zone code. This performance optimization reduces the time by 189 seconds, a 36% improvement. A 2 TB (single rack) UV2 system goes from 212.7 seconds to 99.8 seconds, a 112.9 second (53%) reduction. [akpm@linux-foundation.org: make the statics __meminitdata] [akpm@linux-foundation.org: fix comment formatting] [akpm@linux-foundation.org: fix ia64, per yinghai] [akpm@linux-foundation.org: add missing semicolon, per Tony] Signed-off-by: Russ Anderson Cc: David Rientjes Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Tested-by: "Luck, Tony" Cc: Yinghai Lu Cc: Lin Feng Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/numa.c | 15 ++++++++++++++- mm/page_alloc.c | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index def782e31aac..4248492b9321 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -61,13 +61,26 @@ paddr_to_nid(unsigned long paddr) int __meminit __early_pfn_to_nid(unsigned long pfn) { int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; + /* + * NOTE: The following SMP-unsafe globals are only used early in boot + * when the kernel is running single-threaded. + */ + static int __meminitdata last_ssec, last_esec; + static int __meminitdata last_nid; + + if (section >= last_ssec && section < last_esec) + return last_nid; for (i = 0; i < num_node_memblks; i++) { ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT; esec = (node_memblk[i].start_paddr + node_memblk[i].size + ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT; - if (section >= ssec && section < esec) + if (section >= ssec && section < esec) { + last_ssec = ssec; + last_esec = esec; + last_nid = node_memblk[i].nid; return node_memblk[i].nid; + } } return -1; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b54c5cbf0200..5a234b64f3ac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4187,10 +4187,23 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) { unsigned long start_pfn, end_pfn; int i, nid; + /* + * NOTE: The following SMP-unsafe globals are only used early in boot + * when the kernel is running single-threaded. + */ + static unsigned long __meminitdata last_start_pfn, last_end_pfn; + static int __meminitdata last_nid; + + if (last_start_pfn <= pfn && pfn < last_end_pfn) + return last_nid; for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) - if (start_pfn <= pfn && pfn < end_pfn) + if (start_pfn <= pfn && pfn < end_pfn) { + last_start_pfn = start_pfn; + last_end_pfn = end_pfn; + last_nid = nid; return nid; + } /* This is a memory hole */ return -1; } -- cgit v1.2.3-59-g8ed1b From f9872caf07c1c774034b8bddde7d4a3a7f4a6484 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Mon, 29 Apr 2013 15:08:01 -0700 Subject: page_alloc: make setup_nr_node_ids() usable for arch init code powerpc and x86 were opencoding copies of setup_nr_node_ids(), which page_alloc provides but makes static. Make it avaliable to the archs in linux/mm.h. Signed-off-by: Cody P Schafer Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++++ mm/page_alloc.c | 6 +----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6d7266842abd..7aa11a6736eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1816,5 +1816,11 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; } static inline bool page_is_guard(struct page *page) { return false; } #endif /* CONFIG_DEBUG_PAGEALLOC */ +#if MAX_NUMNODES > 1 +void __init setup_nr_node_ids(void); +#else +static inline void setup_nr_node_ids(void) {} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5a234b64f3ac..98cbdf6e5532 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4749,7 +4749,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, /* * Figure out the number of possible node ids. */ -static void __init setup_nr_node_ids(void) +void __init setup_nr_node_ids(void) { unsigned int node; unsigned int highest = 0; @@ -4758,10 +4758,6 @@ static void __init setup_nr_node_ids(void) highest = node; nr_node_ids = highest + 1; } -#else -static inline void setup_nr_node_ids(void) -{ -} #endif /** -- cgit v1.2.3-59-g8ed1b