From fc44f7f9231a73821fc858f5bc48883a9e78f6de Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 5 Apr 2018 16:22:56 -0700 Subject: mm/memory_hotplug: don't read nid from struct page during hotplug During memory hotplugging the probe routine will leave struct pages uninitialized, the same as it is currently done during boot. Therefore, we do not want to access the inside of struct pages before __init_single_page() is called during onlining. Because during hotplug we know that pages in one memory block belong to the same numa node, we can skip the checking. We should keep checking for the boot case. [pasha.tatashin@oracle.com: s/register_new_memory()/hotplug_memory_register()] Link: http://lkml.kernel.org/r/20180228030308.1116-6-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180215165920.8570-6-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Reviewed-by: Ingo Molnar Cc: Baoquan He Cc: Bharata B Rao Cc: Daniel Jordan Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Steven Sistare Cc: Thomas Gleixner Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/memory.h') diff --git a/include/linux/memory.h b/include/linux/memory.h index f71e732c77b2..9f8cd856ca1e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -109,7 +109,7 @@ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); -extern int register_new_memory(int, struct mem_section *); +int hotplug_memory_register(int nid, struct mem_section *section); #ifdef CONFIG_MEMORY_HOTREMOVE extern int unregister_memory_section(struct mem_section *); #endif -- cgit v1.3-6-gb490 From d0dc12e86b3197a14a908d4fe7cb35b73dda82b5 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 5 Apr 2018 16:23:00 -0700 Subject: mm/memory_hotplug: optimize memory hotplug During memory hotplugging we traverse struct pages three times: 1. memset(0) in sparse_add_one_section() 2. loop in __add_section() to set do: set_page_node(page, nid); and SetPageReserved(page); 3. loop in memmap_init_zone() to call __init_single_pfn() This patch removes the first two loops, and leaves only loop 3. All struct pages are initialized in one place, the same as it is done during boot. The benefits: - We improve memory hotplug performance because we are not evicting the cache several times and also reduce loop branching overhead. - Remove condition from hotpath in __init_single_pfn(), that was added in order to fix the problem that was reported by Bharata in the above email thread, thus also improve performance during normal boot. - Make memory hotplug more similar to the boot memory initialization path because we zero and initialize struct pages only in one function. - Simplifies memory hotplug struct page initialization code, and thus enables future improvements, such as multi-threading the initialization of struct pages in order to improve hotplug performance even further on larger machines. [pasha.tatashin@oracle.com: v5] Link: http://lkml.kernel.org/r/20180228030308.1116-7-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180215165920.8570-7-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Ingo Molnar Cc: Michal Hocko Cc: Baoquan He Cc: Bharata B Rao Cc: Daniel Jordan Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Steven Sistare Cc: Thomas Gleixner Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 2 ++ include/linux/memory.h | 1 + mm/memory_hotplug.c | 27 ++++++++------------------- mm/page_alloc.c | 28 ++++++++++------------------ mm/sparse.c | 8 +++++++- 5 files changed, 28 insertions(+), 38 deletions(-) (limited to 'include/linux/memory.h') diff --git a/drivers/base/node.c b/drivers/base/node.c index 92b00a7e6a02..7a3a580821e0 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -407,6 +407,8 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid, if (!mem_blk) return -EFAULT; + + mem_blk->nid = nid; if (!node_online(nid)) return 0; diff --git a/include/linux/memory.h b/include/linux/memory.h index 9f8cd856ca1e..31ca3e28b0eb 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -33,6 +33,7 @@ struct memory_block { void *hw; /* optional pointer to fw/hw data */ int (*phys_callback)(struct memory_block *); struct device dev; + int nid; /* NID for this memory block */ }; int arch_get_memory_phys_device(unsigned long start_pfn); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 477e183a4ac7..6a9ba14e18ed 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -250,7 +250,6 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, struct vmem_altmap *altmap, bool want_memblock) { int ret; - int i; if (pfn_valid(phys_start_pfn)) return -EEXIST; @@ -259,23 +258,6 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, if (ret < 0) return ret; - /* - * Make all the pages reserved so that nobody will stumble over half - * initialized state. - * FIXME: We also have to associate it with a node because page_to_nid - * relies on having page with the proper node. - */ - for (i = 0; i < PAGES_PER_SECTION; i++) { - unsigned long pfn = phys_start_pfn + i; - struct page *page; - if (!pfn_valid(pfn)) - continue; - - page = pfn_to_page(pfn); - set_page_node(page, nid); - SetPageReserved(page); - } - if (!want_memblock) return 0; @@ -908,8 +890,15 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ int nid; int ret; struct memory_notify arg; + struct memory_block *mem; + + /* + * We can't use pfn_to_nid() because nid might be stored in struct page + * which is not yet initialized. Instead, we find nid from memory block. + */ + mem = find_memory_block(__pfn_to_section(pfn)); + nid = mem->nid; - nid = pfn_to_nid(pfn); /* associate pfn range with the zone */ zone = move_pfn_range(online_type, nid, pfn, nr_pages); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3183eb2f579c..a3e2ba4f76bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1143,10 +1143,9 @@ static void free_one_page(struct zone *zone, } static void __meminit __init_single_page(struct page *page, unsigned long pfn, - unsigned long zone, int nid, bool zero) + unsigned long zone, int nid) { - if (zero) - mm_zero_struct_page(page); + mm_zero_struct_page(page); set_page_links(page, zone, nid, pfn); init_page_count(page); page_mapcount_reset(page); @@ -1160,12 +1159,6 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, #endif } -static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, - int nid, bool zero) -{ - return __init_single_page(pfn_to_page(pfn), pfn, zone, nid, zero); -} - #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT static void __meminit init_reserved_page(unsigned long pfn) { @@ -1184,7 +1177,7 @@ static void __meminit init_reserved_page(unsigned long pfn) if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) break; } - __init_single_pfn(pfn, zid, nid, true); + __init_single_page(pfn_to_page(pfn), pfn, zid, nid); } #else static inline void init_reserved_page(unsigned long pfn) @@ -1501,7 +1494,7 @@ static unsigned long __init deferred_init_pages(int nid, int zid, } else { page++; } - __init_single_page(page, pfn, zid, nid, true); + __init_single_page(page, pfn, zid, nid); nr_pages++; } return (nr_pages); @@ -5434,6 +5427,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, pg_data_t *pgdat = NODE_DATA(nid); unsigned long pfn; unsigned long nr_initialised = 0; + struct page *page; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP struct memblock_region *r = NULL, *tmp; #endif @@ -5486,6 +5480,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, #endif not_early: + page = pfn_to_page(pfn); + __init_single_page(page, pfn, zone, nid); + if (context == MEMMAP_HOTPLUG) + SetPageReserved(page); + /* * Mark the block movable so that blocks are reserved for * movable at startup. This will force kernel allocations @@ -5502,15 +5501,8 @@ not_early: * because this is done early in sparse_add_one_section */ if (!(pfn & (pageblock_nr_pages - 1))) { - struct page *page = pfn_to_page(pfn); - - __init_single_page(page, pfn, zone, nid, - context != MEMMAP_HOTPLUG); set_pageblock_migratetype(page, MIGRATE_MOVABLE); cond_resched(); - } else { - __init_single_pfn(pfn, zone, nid, - context != MEMMAP_HOTPLUG); } } } diff --git a/mm/sparse.c b/mm/sparse.c index 58cab483e81b..62eef264a7bd 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -779,7 +779,13 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, goto out; } - memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION); +#ifdef CONFIG_DEBUG_VM + /* + * Poison uninitialized struct pages in order to catch invalid flags + * combinations. + */ + memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION); +#endif section_mark_present(ms); -- cgit v1.3-6-gb490