authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2018-08-23 17:00:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-23 18:48:43 -0700
commitd4ae9916ea2947341180d2b538f48875ff393a86 (patch)
treee478e5e6e84de25fade8f2995c9ee49cacd5a488 /mm/memory-failure.c
parentmm: fix race on soft-offlining free huge pages (diff)
mm: soft-offline: close the race against page allocation
A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to allocate a page that was just freed on the way of soft-offline. This is undesirable because soft-offline (which is about corrected error) is less aggressive than hard-offline (which is about uncorrected error), and we can make soft-offline fail and keep using the page for good reason like "system is busy." Two main changes of this patch are: - setting migrate type of the target page to MIGRATE_ISOLATE. As done in free_unref_page_commit(), this makes kernel bypass pcplist when freeing the page. So we can assume that the page is in freelist just after put_page() returns, - setting PG_hwpoison on free page under zone->lock which protects freelists, so this allows us to avoid setting PG_hwpoison on a page that is decided to be allocated soon. [akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment] Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Reported-by: Xishi Qiu <xishi.qiuxishi@alibaba-inc.com> Tested-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: <zy.zhengyi@alibaba-inc.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 files changed, 21 insertions, 5 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 49dc32c61137..192d0bbfc9ea 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -57,6 +57,7 @@
#include <linux/mm_inline.h>
#include <linux/kfifo.h>
#include <linux/ratelimit.h>
+#include <linux/page-isolation.h>
#include "internal.h"
#include "ras/ras_event.h"
@@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct page *page, int flags)
static int soft_offline_in_use_page(struct page *page, int flags)
int ret;
+ int mt;
struct page *hpage = compound_head(page);
if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(struct page *page, int flags)
+ /*
+ * Setting MIGRATE_ISOLATE here ensures that the page will be linked
+ * to free list immediately (not via pcplist) when released after
+ * successful page migration. Otherwise we can't guarantee that the
+ * page is really free after put_page() returns, so
+ * set_hwpoison_free_buddy_page() highly likely fails.
+ */
+ mt = get_pageblock_migratetype(page);
+ set_pageblock_migratetype(page, MIGRATE_ISOLATE);
if (PageHuge(page))
ret = soft_offline_huge_page(page, flags);
ret = __soft_offline_page(page, flags);
+ set_pageblock_migratetype(page, mt);
return ret;
-static void soft_offline_free_page(struct page *page)
+static int soft_offline_free_page(struct page *page)
int rc = 0;
struct page *head = compound_head(page);
if (PageHuge(head))
rc = dissolve_free_huge_page(page);
- if (!rc && !TestSetPageHWPoison(page))
- num_poisoned_pages_inc();
+ if (!rc) {
+ if (set_hwpoison_free_buddy_page(page))
+ num_poisoned_pages_inc();
+ else
+ rc = -EBUSY;
+ }
+ return rc;
@@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page, int flags)
if (ret > 0)
ret = soft_offline_in_use_page(page, flags);
else if (ret == 0)
- soft_offline_free_page(page);
+ ret = soft_offline_free_page(page);
return ret;