diff options
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r-- | mm/memory_hotplug.c | 234 |
1 files changed, 112 insertions, 122 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2b2b3ccbbfb5..124e794867c5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -34,6 +34,7 @@ #include <linux/hugetlb.h> #include <linux/memblock.h> #include <linux/compaction.h> +#include <linux/rmap.h> #include <asm/tlbflush.h> @@ -253,7 +254,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, if (pfn_valid(phys_start_pfn)) return -EEXIST; - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); + ret = sparse_add_one_section(nid, phys_start_pfn, altmap); if (ret < 0) return ret; @@ -743,14 +744,13 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, int nid = pgdat->node_id; unsigned long flags; - if (zone_is_empty(zone)) - init_currently_empty_zone(zone, start_pfn, nr_pages); - clear_zone_contiguous(zone); /* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */ pgdat_resize_lock(pgdat, &flags); zone_span_writelock(zone); + if (zone_is_empty(zone)) + init_currently_empty_zone(zone, start_pfn, nr_pages); resize_zone_range(zone, start_pfn, nr_pages); zone_span_writeunlock(zone); resize_pgdat_range(pgdat, start_pfn, nr_pages); @@ -1078,7 +1078,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) * * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ -int __ref add_memory_resource(int nid, struct resource *res, bool online) +int __ref add_memory_resource(int nid, struct resource *res) { u64 start, size; bool new_node = false; @@ -1133,7 +1133,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) mem_hotplug_done(); /* online pages if requested */ - if (online) + if (memhp_auto_online) walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, online_memory_block); @@ -1157,7 +1157,7 @@ int __ref __add_memory(int nid, u64 start, u64 size) if (IS_ERR(res)) return PTR_ERR(res); - ret = add_memory_resource(nid, res, memhp_auto_online); + ret = add_memory_resource(nid, res); if (ret < 0) release_memory_resource(res); return ret; @@ -1226,14 +1226,15 @@ static bool is_pageblock_removable_nolock(struct page *page) if (!zone_spans_pfn(zone, pfn)) return false; - return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true); + return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, SKIP_HWPOISON); } /* Checks if this range of memory is likely to be hot-removable. */ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); + struct page *end_page = pfn_to_page(end_pfn); /* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) { @@ -1273,6 +1274,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, i++; if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) continue; + /* Check if we got outside of the zone */ + if (zone && !zone_spans_pfn(zone, pfn + i)) + return 0; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; @@ -1301,23 +1305,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, static unsigned long scan_movable_pages(unsigned long start, unsigned long end) { unsigned long pfn; - struct page *page; + for (pfn = start; pfn < end; pfn++) { - if (pfn_valid(pfn)) { - page = pfn_to_page(pfn); - if (PageLRU(page)) - return pfn; - if (__PageMovable(page)) - return pfn; - if (PageHuge(page)) { - if (hugepage_migration_supported(page_hstate(page)) && - page_huge_active(page)) - return pfn; - else - pfn = round_up(pfn + 1, - 1 << compound_order(page)) - 1; - } - } + struct page *page, *head; + unsigned long skip; + + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + if (PageLRU(page)) + return pfn; + if (__PageMovable(page)) + return pfn; + + if (!PageHuge(page)) + continue; + head = compound_head(page); + if (hugepage_migration_supported(page_hstate(head)) && + page_huge_active(head)) + return pfn; + skip = (1 << compound_order(head)) - (page - head); + pfn += skip - 1; } return 0; } @@ -1339,18 +1347,15 @@ static struct page *new_node_page(struct page *page, unsigned long private) return new_page_nodemask(page, nid, &nmask); } -#define NR_OFFLINE_AT_ONCE_PAGES (256) static int do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; struct page *page; - int move_pages = NR_OFFLINE_AT_ONCE_PAGES; - int not_managed = 0; int ret = 0; LIST_HEAD(source); - for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) { + for (pfn = start_pfn; pfn < end_pfn; pfn++) { if (!pfn_valid(pfn)) continue; page = pfn_to_page(pfn); @@ -1362,13 +1367,27 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) ret = -EBUSY; break; } - if (isolate_huge_page(page, &source)) - move_pages -= 1 << compound_order(head); + isolate_huge_page(page, &source); continue; } else if (PageTransHuge(page)) pfn = page_to_pfn(compound_head(page)) + hpage_nr_pages(page) - 1; + /* + * HWPoison pages have elevated reference counts so the migration would + * fail on them. It also doesn't make any sense to migrate them in the + * first place. Still try to unmap such a page in case it is still mapped + * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep + * the unmap as the catch all safety net). + */ + if (PageHWPoison(page)) { + if (WARN_ON(PageLRU(page))) + isolate_lru_page(page); + if (page_mapped(page)) + try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS); + continue; + } + if (!get_page_unless_zero(page)) continue; /* @@ -1380,41 +1399,31 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) else ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); if (!ret) { /* Success */ - put_page(page); list_add_tail(&page->lru, &source); - move_pages--; if (!__PageMovable(page)) inc_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); } else { -#ifdef CONFIG_DEBUG_VM - pr_alert("failed to isolate pfn %lx\n", pfn); + pr_warn("failed to isolate pfn %lx\n", pfn); dump_page(page, "isolation failed"); -#endif - put_page(page); - /* Because we don't have big zone->lock. we should - check this again here. */ - if (page_count(page)) { - not_managed++; - ret = -EBUSY; - break; - } } + put_page(page); } if (!list_empty(&source)) { - if (not_managed) { - putback_movable_pages(&source); - goto out; - } - /* Allocate a new page from the nearest neighbor node */ ret = migrate_pages(&source, new_node_page, NULL, 0, MIGRATE_SYNC, MR_MEMORY_HOTPLUG); - if (ret) + if (ret) { + list_for_each_entry(page, &source, lru) { + pr_warn("migrating pfn %lx failed ret:%d ", + page_to_pfn(page), ret); + dump_page(page, "migration failure"); + } putback_movable_pages(&source); + } } -out: + return ret; } @@ -1553,12 +1562,7 @@ static int __ref __offline_pages(unsigned long start_pfn, unsigned long valid_start, valid_end; struct zone *zone; struct memory_notify arg; - - /* at least, alignment against pageblock is necessary */ - if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) - return -EINVAL; - if (!IS_ALIGNED(end_pfn, pageblock_nr_pages)) - return -EINVAL; + char *reason; mem_hotplug_begin(); @@ -1566,8 +1570,9 @@ static int __ref __offline_pages(unsigned long start_pfn, we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) { - mem_hotplug_done(); - return -EINVAL; + ret = -EINVAL; + reason = "multizone range"; + goto failed_removal; } zone = page_zone(pfn_to_page(valid_start)); @@ -1576,10 +1581,11 @@ static int __ref __offline_pages(unsigned long start_pfn, /* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn, - MIGRATE_MOVABLE, true); + MIGRATE_MOVABLE, + SKIP_HWPOISON | REPORT_FAILURE); if (ret) { - mem_hotplug_done(); - return ret; + reason = "failure to isolate range"; + goto failed_removal; } arg.start_pfn = start_pfn; @@ -1588,37 +1594,47 @@ static int __ref __offline_pages(unsigned long start_pfn, ret = memory_notify(MEM_GOING_OFFLINE, &arg); ret = notifier_to_errno(ret); - if (ret) - goto failed_removal; + if (ret) { + reason = "notifier failure"; + goto failed_removal_isolated; + } - pfn = start_pfn; -repeat: - /* start memory hot removal */ - ret = -EINTR; - if (signal_pending(current)) - goto failed_removal; + do { + for (pfn = start_pfn; pfn;) { + if (signal_pending(current)) { + ret = -EINTR; + reason = "signal backoff"; + goto failed_removal_isolated; + } - cond_resched(); - lru_add_drain_all(); - drain_all_pages(zone); + cond_resched(); + lru_add_drain_all(); + drain_all_pages(zone); + + pfn = scan_movable_pages(pfn, end_pfn); + if (pfn) { + /* + * TODO: fatal migration failures should bail + * out + */ + do_migrate_range(pfn, end_pfn); + } + } - pfn = scan_movable_pages(start_pfn, end_pfn); - if (pfn) { /* We have movable pages */ - ret = do_migrate_range(pfn, end_pfn); - goto repeat; - } + /* + * Dissolve free hugepages in the memory block before doing + * offlining actually in order to make hugetlbfs's object + * counting consistent. + */ + ret = dissolve_free_huge_pages(start_pfn, end_pfn); + if (ret) { + reason = "failure to dissolve huge pages"; + goto failed_removal_isolated; + } + /* check again */ + offlined_pages = check_pages_isolated(start_pfn, end_pfn); + } while (offlined_pages < 0); - /* - * dissolve free hugepages in the memory block before doing offlining - * actually in order to make hugetlbfs's object counting consistent. - */ - ret = dissolve_free_huge_pages(start_pfn, end_pfn); - if (ret) - goto failed_removal; - /* check again */ - offlined_pages = check_pages_isolated(start_pfn, end_pfn); - if (offlined_pages < 0) - goto repeat; pr_info("Offlined Pages %ld\n", offlined_pages); /* Ok, all of our target is isolated. We cannot do rollback at this point. */ @@ -1654,13 +1670,15 @@ repeat: mem_hotplug_done(); return 0; +failed_removal_isolated: + undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); failed_removal: - pr_debug("memory offlining [mem %#010llx-%#010llx] failed\n", + pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n", (unsigned long long) start_pfn << PAGE_SHIFT, - ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); + ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, + reason); memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ - undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); mem_hotplug_done(); return ret; } @@ -1753,34 +1771,6 @@ static int check_cpu_on_node(pg_data_t *pgdat) return 0; } -static void unmap_cpu_on_node(pg_data_t *pgdat) -{ -#ifdef CONFIG_ACPI_NUMA - int cpu; - - for_each_possible_cpu(cpu) - if (cpu_to_node(cpu) == pgdat->node_id) - numa_clear_node(cpu); -#endif -} - -static int check_and_unmap_cpu_on_node(pg_data_t *pgdat) -{ - int ret; - - ret = check_cpu_on_node(pgdat); - if (ret) - return ret; - - /* - * the node will be offlined when we come here, so we can clear - * the cpu_to_node() now. - */ - - unmap_cpu_on_node(pgdat); - return 0; -} - /** * try_offline_node * @nid: the node ID @@ -1813,7 +1803,7 @@ void try_offline_node(int nid) return; } - if (check_and_unmap_cpu_on_node(pgdat)) + if (check_cpu_on_node(pgdat)) return; /* @@ -1858,7 +1848,7 @@ void __ref __remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); - arch_remove_memory(start, size, NULL); + arch_remove_memory(nid, start, size, NULL); try_offline_node(nid); |