aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c202
1 files changed, 124 insertions, 78 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c6e4dd3e1c08..df0694c6adef 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -61,7 +61,7 @@ int sysctl_memory_failure_early_kill __read_mostly = 0;
int sysctl_memory_failure_recovery __read_mostly = 1;
-atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
+atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
@@ -784,12 +784,12 @@ static struct page_state {
{ sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty },
{ sc|dirty, sc, "clean swapcache", me_swapcache_clean },
- { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty },
- { unevict, unevict, "clean unevictable LRU", me_pagecache_clean },
-
{ mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty },
{ mlock, mlock, "clean mlocked LRU", me_pagecache_clean },
+ { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty },
+ { unevict, unevict, "clean unevictable LRU", me_pagecache_clean },
+
{ lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty },
{ lru|dirty, lru, "clean LRU", me_pagecache_clean },
@@ -1021,6 +1021,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
struct page *hpage;
int res;
unsigned int nr_pages;
+ unsigned long page_flags;
if (!sysctl_memory_failure_recovery)
panic("Memory failure from trap %d on page %lx", trapno, pfn);
@@ -1039,8 +1040,18 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
return 0;
}
- nr_pages = 1 << compound_trans_order(hpage);
- atomic_long_add(nr_pages, &mce_bad_pages);
+ /*
+ * Currently errors on hugetlbfs pages are measured in hugepage units,
+ * so nr_pages should be 1 << compound_order. OTOH when errors are on
+ * transparent hugepages, they are supposed to be split and error
+ * measurement is done in normal page units. So nr_pages should be one
+ * in this case.
+ */
+ if (PageHuge(p))
+ nr_pages = 1 << compound_order(hpage);
+ else /* normal page or thp */
+ nr_pages = 1;
+ atomic_long_add(nr_pages, &num_poisoned_pages);
/*
* We need/can do nothing about count=0 pages.
@@ -1070,7 +1081,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
if (!PageHWPoison(hpage)
|| (hwpoison_filter(p) && TestClearPageHWPoison(p))
|| (p != hpage && TestSetPageHWPoison(hpage))) {
- atomic_long_sub(nr_pages, &mce_bad_pages);
+ atomic_long_sub(nr_pages, &num_poisoned_pages);
return 0;
}
set_page_hwpoison_huge_page(hpage);
@@ -1119,6 +1130,15 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
lock_page(hpage);
/*
+ * We use page flags to determine what action should be taken, but
+ * the flags can be modified by the error containment action. One
+ * example is an mlocked page, where PG_mlocked is cleared by
+ * page_remove_rmap() in try_to_unmap_one(). So to determine page status
+ * correctly, we save a copy of the page flags at this time.
+ */
+ page_flags = p->flags;
+
+ /*
* unpoison always clear PG_hwpoison inside page lock
*/
if (!PageHWPoison(p)) {
@@ -1128,7 +1148,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
}
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
- atomic_long_sub(nr_pages, &mce_bad_pages);
+ atomic_long_sub(nr_pages, &num_poisoned_pages);
unlock_page(hpage);
put_page(hpage);
return 0;
@@ -1176,12 +1196,19 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
}
res = -EBUSY;
- for (ps = error_states;; ps++) {
- if ((p->flags & ps->mask) == ps->res) {
- res = page_action(ps, p, pfn);
+ /*
+ * The first check uses the current page flags which may not have any
+ * relevant information. The second check with the saved page flagss is
+ * carried out only if the first check can't determine the page status.
+ */
+ for (ps = error_states;; ps++)
+ if ((p->flags & ps->mask) == ps->res)
break;
- }
- }
+ if (!ps->mask)
+ for (ps = error_states;; ps++)
+ if ((page_flags & ps->mask) == ps->res)
+ break;
+ res = page_action(ps, p, pfn);
out:
unlock_page(hpage);
return res;
@@ -1323,7 +1350,7 @@ int unpoison_memory(unsigned long pfn)
return 0;
}
if (TestClearPageHWPoison(p))
- atomic_long_sub(nr_pages, &mce_bad_pages);
+ atomic_long_sub(nr_pages, &num_poisoned_pages);
pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
return 0;
}
@@ -1337,7 +1364,7 @@ int unpoison_memory(unsigned long pfn)
*/
if (TestClearPageHWPoison(page)) {
pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
- atomic_long_sub(nr_pages, &mce_bad_pages);
+ atomic_long_sub(nr_pages, &num_poisoned_pages);
freeit = 1;
if (PageHuge(page))
clear_page_hwpoison_huge_page(page);
@@ -1368,7 +1395,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x)
* that is not free, and 1 for any other page type.
* For 1 the page is returned with increased page count, otherwise not.
*/
-static int get_any_page(struct page *p, unsigned long pfn, int flags)
+static int __get_any_page(struct page *p, unsigned long pfn, int flags)
{
int ret;
@@ -1393,11 +1420,9 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
if (!get_page_unless_zero(compound_head(p))) {
if (PageHuge(p)) {
pr_info("%s: %#lx free huge page\n", __func__, pfn);
- ret = dequeue_hwpoisoned_huge_page(compound_head(p));
+ ret = 0;
} else if (is_free_buddy_page(p)) {
pr_info("%s: %#lx free buddy page\n", __func__, pfn);
- /* Set hwpoison bit while page is still isolated */
- SetPageHWPoison(p);
ret = 0;
} else {
pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
@@ -1413,43 +1438,68 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
return ret;
}
+static int get_any_page(struct page *page, unsigned long pfn, int flags)
+{
+ int ret = __get_any_page(page, pfn, flags);
+
+ if (ret == 1 && !PageHuge(page) && !PageLRU(page)) {
+ /*
+ * Try to free it.
+ */
+ put_page(page);
+ shake_page(page, 1);
+
+ /*
+ * Did it turn free?
+ */
+ ret = __get_any_page(page, pfn, 0);
+ if (!PageLRU(page)) {
+ pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
+ pfn, page->flags);
+ return -EIO;
+ }
+ }
+ return ret;
+}
+
static int soft_offline_huge_page(struct page *page, int flags)
{
int ret;
unsigned long pfn = page_to_pfn(page);
struct page *hpage = compound_head(page);
- ret = get_any_page(page, pfn, flags);
- if (ret < 0)
- return ret;
- if (ret == 0)
- goto done;
-
+ /*
+ * This double-check of PageHWPoison is to avoid the race with
+ * memory_failure(). See also comment in __soft_offline_page().
+ */
+ lock_page(hpage);
if (PageHWPoison(hpage)) {
+ unlock_page(hpage);
put_page(hpage);
pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
return -EBUSY;
}
+ unlock_page(hpage);
/* Keep page count to indicate a given hugepage is isolated. */
- ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false,
+ ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC);
put_page(hpage);
if (ret) {
pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
pfn, ret, page->flags);
- return ret;
- }
-done:
- if (!PageHWPoison(hpage))
+ } else {
+ set_page_hwpoison_huge_page(hpage);
+ dequeue_hwpoisoned_huge_page(hpage);
atomic_long_add(1 << compound_trans_order(hpage),
- &mce_bad_pages);
- set_page_hwpoison_huge_page(hpage);
- dequeue_hwpoisoned_huge_page(hpage);
+ &num_poisoned_pages);
+ }
/* keep elevated page count for bad page */
return ret;
}
+static int __soft_offline_page(struct page *page, int flags);
+
/**
* soft_offline_page - Soft offline a page.
* @page: page to offline
@@ -1478,9 +1528,11 @@ int soft_offline_page(struct page *page, int flags)
unsigned long pfn = page_to_pfn(page);
struct page *hpage = compound_trans_head(page);
- if (PageHuge(page))
- return soft_offline_huge_page(page, flags);
- if (PageTransHuge(hpage)) {
+ if (PageHWPoison(page)) {
+ pr_info("soft offline: %#lx page already poisoned\n", pfn);
+ return -EBUSY;
+ }
+ if (!PageHuge(page) && PageTransHuge(hpage)) {
if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
pr_info("soft offline: %#lx: failed to split THP\n",
pfn);
@@ -1491,47 +1543,45 @@ int soft_offline_page(struct page *page, int flags)
ret = get_any_page(page, pfn, flags);
if (ret < 0)
return ret;
- if (ret == 0)
- goto done;
-
- /*
- * Page cache page we can handle?
- */
- if (!PageLRU(page)) {
- /*
- * Try to free it.
- */
- put_page(page);
- shake_page(page, 1);
-
- /*
- * Did it turn free?
- */
- ret = get_any_page(page, pfn, 0);
- if (ret < 0)
- return ret;
- if (ret == 0)
- goto done;
- }
- if (!PageLRU(page)) {
- pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
- pfn, page->flags);
- return -EIO;
+ if (ret) { /* for in-use pages */
+ if (PageHuge(page))
+ ret = soft_offline_huge_page(page, flags);
+ else
+ ret = __soft_offline_page(page, flags);
+ } else { /* for free pages */
+ if (PageHuge(page)) {
+ set_page_hwpoison_huge_page(hpage);
+ dequeue_hwpoisoned_huge_page(hpage);
+ atomic_long_add(1 << compound_trans_order(hpage),
+ &num_poisoned_pages);
+ } else {
+ SetPageHWPoison(page);
+ atomic_long_inc(&num_poisoned_pages);
+ }
}
+ /* keep elevated page count for bad page */
+ return ret;
+}
- lock_page(page);
- wait_on_page_writeback(page);
+static int __soft_offline_page(struct page *page, int flags)
+{
+ int ret;
+ unsigned long pfn = page_to_pfn(page);
/*
- * Synchronized using the page lock with memory_failure()
+ * Check PageHWPoison again inside page lock because PageHWPoison
+ * is set by memory_failure() outside page lock. Note that
+ * memory_failure() also double-checks PageHWPoison inside page lock,
+ * so there's no race between soft_offline_page() and memory_failure().
*/
+ lock_page(page);
+ wait_on_page_writeback(page);
if (PageHWPoison(page)) {
unlock_page(page);
put_page(page);
pr_info("soft offline: %#lx page already poisoned\n", pfn);
return -EBUSY;
}
-
/*
* Try to invalidate first. This should work for
* non dirty unmapped page cache pages.
@@ -1544,9 +1594,10 @@ int soft_offline_page(struct page *page, int flags)
*/
if (ret == 1) {
put_page(page);
- ret = 0;
pr_info("soft_offline: %#lx: invalidated\n", pfn);
- goto done;
+ SetPageHWPoison(page);
+ atomic_long_inc(&num_poisoned_pages);
+ return 0;
}
/*
@@ -1563,28 +1614,23 @@ int soft_offline_page(struct page *page, int flags)
if (!ret) {
LIST_HEAD(pagelist);
inc_zone_page_state(page, NR_ISOLATED_ANON +
- page_is_file_cache(page));
+ page_is_file_cache(page));
list_add(&page->lru, &pagelist);
ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
- false, MIGRATE_SYNC,
- MR_MEMORY_FAILURE);
+ MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
putback_lru_pages(&pagelist);
pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
pfn, ret, page->flags);
if (ret > 0)
ret = -EIO;
+ } else {
+ SetPageHWPoison(page);
+ atomic_long_inc(&num_poisoned_pages);
}
} else {
pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
pfn, ret, page_count(page), page->flags);
}
- if (ret)
- return ret;
-
-done:
- atomic_long_add(1, &mce_bad_pages);
- SetPageHWPoison(page);
- /* keep elevated page count for bad page */
return ret;
}