From c8721bbbdd36382de51cd6b7a56322e0acca2414 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Sep 2013 14:22:09 -0700 Subject: mm: memory-hotplug: enable memory hotplug to handle hugepage Until now we can't offline memory blocks which contain hugepages because a hugepage is considered as an unmovable page. But now with this patch series, a hugepage has become movable, so by using hugepage migration we can offline such memory blocks. What's different from other users of hugepage migration is that we need to decompose all the hugepages inside the target memory block into free buddy pages after hugepage migration, because otherwise free hugepages remaining in the memory block intervene the memory offlining. For this reason we introduce new functions dissolve_free_huge_page() and dissolve_free_huge_pages(). Other than that, what this patch does is straightforwardly to add hugepage migration code, that is, adding hugepage code to the functions which scan over pfn and collect hugepages to be migrated, and adding a hugepage allocation function to alloc_migrate_target(). As for larger hugepages (1GB for x86_64), it's not easy to do hotremove over them because it's larger than memory block. So we now simply leave it to fail as it is. [yongjun_wei@trendmicro.com.cn: remove duplicated include] Signed-off-by: Naoya Horiguchi Acked-by: Andi Kleen Cc: Hillf Danton Cc: Wanpeng Li Cc: Mel Gorman Cc: Hugh Dickins Cc: KOSAKI Motohiro Cc: Michal Hocko Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Signed-off-by: Wei Yongjun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d37b3b95c439..fb4293b93fd0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -522,9 +523,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) { struct page *page; - if (list_empty(&h->hugepage_freelists[nid])) + list_for_each_entry(page, &h->hugepage_freelists[nid], lru) + if (!is_migrate_isolate_page(page)) + break; + /* + * if 'non-isolated free hugepage' not found on the list, + * the allocation fails. + */ + if (&h->hugepage_freelists[nid] == &page->lru) return NULL; - page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); list_move(&page->lru, &h->hugepage_activelist); set_page_refcounted(page); h->free_huge_pages--; @@ -878,6 +885,44 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, return ret; } +/* + * Dissolve a given free hugepage into free buddy pages. This function does + * nothing for in-use (including surplus) hugepages. + */ +static void dissolve_free_huge_page(struct page *page) +{ + spin_lock(&hugetlb_lock); + if (PageHuge(page) && !page_count(page)) { + struct hstate *h = page_hstate(page); + int nid = page_to_nid(page); + list_del(&page->lru); + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; + update_and_free_page(h, page); + } + spin_unlock(&hugetlb_lock); +} + +/* + * Dissolve free hugepages in a given pfn range. Used by memory hotplug to + * make specified memory blocks removable from the system. + * Note that start_pfn should aligned with (minimum) hugepage size. + */ +void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned int order = 8 * sizeof(void *); + unsigned long pfn; + struct hstate *h; + + /* Set scan step to minimum hugepage size */ + for_each_hstate(h) + if (order > huge_page_order(h)) + order = huge_page_order(h); + VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order)); + for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) + dissolve_free_huge_page(pfn_to_page(pfn)); +} + static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) { struct page *page; @@ -3457,3 +3502,25 @@ void putback_active_hugepage(struct page *page) spin_unlock(&hugetlb_lock); put_page(page); } + +bool is_hugepage_active(struct page *page) +{ + VM_BUG_ON(!PageHuge(page)); + /* + * This function can be called for a tail page because the caller, + * scan_movable_pages, scans through a given pfn-range which typically + * covers one memory block. In systems using gigantic hugepage (1GB + * for x86_64,) a hugepage is larger than a memory block, and we don't + * support migrating such large hugepages for now, so return false + * when called for tail pages. + */ + if (PageTail(page)) + return false; + /* + * Refcount of a hwpoisoned hugepages is 1, but they are not active, + * so we should return false for them. + */ + if (unlikely(PageHWPoison(page))) + return false; + return page_count(page) > 0; +} -- cgit v1.2.3-59-g8ed1b