aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c234
1 files changed, 156 insertions, 78 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e3bc949ebcc..dbd0d5cbbcbb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -50,7 +50,6 @@
#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
#include <linux/page-isolation.h>
-#include <linux/page_ext.h>
#include <linux/debugobjects.h>
#include <linux/kmemleak.h>
#include <linux/compaction.h>
@@ -136,6 +135,55 @@ unsigned long totalcma_pages __read_mostly;
int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
+#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON
+DEFINE_STATIC_KEY_TRUE(init_on_alloc);
+#else
+DEFINE_STATIC_KEY_FALSE(init_on_alloc);
+#endif
+EXPORT_SYMBOL(init_on_alloc);
+
+#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON
+DEFINE_STATIC_KEY_TRUE(init_on_free);
+#else
+DEFINE_STATIC_KEY_FALSE(init_on_free);
+#endif
+EXPORT_SYMBOL(init_on_free);
+
+static int __init early_init_on_alloc(char *buf)
+{
+ int ret;
+ bool bool_result;
+
+ if (!buf)
+ return -EINVAL;
+ ret = kstrtobool(buf, &bool_result);
+ if (bool_result && page_poisoning_enabled())
+ pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n");
+ if (bool_result)
+ static_branch_enable(&init_on_alloc);
+ else
+ static_branch_disable(&init_on_alloc);
+ return ret;
+}
+early_param("init_on_alloc", early_init_on_alloc);
+
+static int __init early_init_on_free(char *buf)
+{
+ int ret;
+ bool bool_result;
+
+ if (!buf)
+ return -EINVAL;
+ ret = kstrtobool(buf, &bool_result);
+ if (bool_result && page_poisoning_enabled())
+ pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n");
+ if (bool_result)
+ static_branch_enable(&init_on_free);
+ else
+ static_branch_disable(&init_on_free);
+ return ret;
+}
+early_param("init_on_free", early_init_on_free);
/*
* A cached value of the page's pageblock's migratetype, used when the page is
@@ -224,8 +272,6 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = {
[ZONE_MOVABLE] = 0,
};
-EXPORT_SYMBOL(totalram_pages);
-
static char * const zone_names[MAX_NR_ZONES] = {
#ifdef CONFIG_ZONE_DMA
"DMA",
@@ -646,30 +692,29 @@ void prep_compound_page(struct page *page, unsigned int order)
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
-bool _debug_pagealloc_enabled __read_mostly
- = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
+DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
+#else
+DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
+#endif
EXPORT_SYMBOL(_debug_pagealloc_enabled);
-bool _debug_guardpage_enabled __read_mostly;
+
+DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
static int __init early_debug_pagealloc(char *buf)
{
- if (!buf)
+ bool enable = false;
+
+ if (kstrtobool(buf, &enable))
return -EINVAL;
- return kstrtobool(buf, &_debug_pagealloc_enabled);
-}
-early_param("debug_pagealloc", early_debug_pagealloc);
-static bool need_debug_guardpage(void)
-{
- /* If we don't use debug_pagealloc, we don't need guard page */
- if (!debug_pagealloc_enabled())
- return false;
+ if (enable)
+ static_branch_enable(&_debug_pagealloc_enabled);
- if (!debug_guardpage_minorder())
- return false;
-
- return true;
+ return 0;
}
+early_param("debug_pagealloc", early_debug_pagealloc);
static void init_debug_guardpage(void)
{
@@ -679,14 +724,9 @@ static void init_debug_guardpage(void)
if (!debug_guardpage_minorder())
return;
- _debug_guardpage_enabled = true;
+ static_branch_enable(&_debug_guardpage_enabled);
}
-struct page_ext_operations debug_guardpage_ops = {
- .need = need_debug_guardpage,
- .init = init_debug_guardpage,
-};
-
static int __init debug_guardpage_minorder_setup(char *buf)
{
unsigned long res;
@@ -704,20 +744,13 @@ early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
static inline bool set_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype)
{
- struct page_ext *page_ext;
-
if (!debug_guardpage_enabled())
return false;
if (order >= debug_guardpage_minorder())
return false;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
- return false;
-
- __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
-
+ __SetPageGuard(page);
INIT_LIST_HEAD(&page->lru);
set_page_private(page, order);
/* Guard pages are not available for any usage */
@@ -729,23 +762,16 @@ static inline bool set_page_guard(struct zone *zone, struct page *page,
static inline void clear_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype)
{
- struct page_ext *page_ext;
-
if (!debug_guardpage_enabled())
return;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
- return;
-
- __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
+ __ClearPageGuard(page);
set_page_private(page, 0);
if (!is_migrate_isolate(migratetype))
__mod_zone_freepage_state(zone, (1 << order), migratetype);
}
#else
-struct page_ext_operations debug_guardpage_ops;
static inline bool set_page_guard(struct zone *zone, struct page *page,
unsigned int order, int migratetype) { return false; }
static inline void clear_page_guard(struct zone *zone, struct page *page,
@@ -1090,6 +1116,14 @@ out:
return ret;
}
+static void kernel_init_free_pages(struct page *page, int numpages)
+{
+ int i;
+
+ for (i = 0; i < numpages; i++)
+ clear_highpage(page + i);
+}
+
static __always_inline bool free_pages_prepare(struct page *page,
unsigned int order, bool check_free)
{
@@ -1141,6 +1175,9 @@ static __always_inline bool free_pages_prepare(struct page *page,
PAGE_SIZE << order);
}
arch_free_page(page, order);
+ if (want_init_on_free())
+ kernel_init_free_pages(page, 1 << order);
+
kernel_poison_pages(page, 1 << order, 0);
if (debug_pagealloc_enabled())
kernel_map_pages(page, 1 << order, 0);
@@ -1151,19 +1188,36 @@ static __always_inline bool free_pages_prepare(struct page *page,
}
#ifdef CONFIG_DEBUG_VM
-static inline bool free_pcp_prepare(struct page *page)
+/*
+ * With DEBUG_VM enabled, order-0 pages are checked immediately when being freed
+ * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when
+ * moved from pcp lists to free lists.
+ */
+static bool free_pcp_prepare(struct page *page)
{
return free_pages_prepare(page, 0, true);
}
-static inline bool bulkfree_pcp_prepare(struct page *page)
+static bool bulkfree_pcp_prepare(struct page *page)
{
- return false;
+ if (debug_pagealloc_enabled())
+ return free_pages_check(page);
+ else
+ return false;
}
#else
+/*
+ * With DEBUG_VM disabled, order-0 pages being freed are checked only when
+ * moving from pcp lists to free list in order to reduce overhead. With
+ * debug_pagealloc enabled, they are checked also immediately when being freed
+ * to the pcp lists.
+ */
static bool free_pcp_prepare(struct page *page)
{
- return free_pages_prepare(page, 0, false);
+ if (debug_pagealloc_enabled())
+ return free_pages_prepare(page, 0, true);
+ else
+ return free_pages_prepare(page, 0, false);
}
static bool bulkfree_pcp_prepare(struct page *page)
@@ -1904,6 +1958,10 @@ void __init page_alloc_init_late(void)
for_each_populated_zone(zone)
set_zone_contiguous(zone);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ init_debug_guardpage();
+#endif
}
#ifdef CONFIG_CMA
@@ -2021,28 +2079,44 @@ static inline int check_new_page(struct page *page)
static inline bool free_pages_prezeroed(void)
{
- return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
- page_poisoning_enabled();
+ return (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
+ page_poisoning_enabled()) || want_init_on_free();
}
#ifdef CONFIG_DEBUG_VM
-static bool check_pcp_refill(struct page *page)
+/*
+ * With DEBUG_VM enabled, order-0 pages are checked for expected state when
+ * being allocated from pcp lists. With debug_pagealloc also enabled, they are
+ * also checked when pcp lists are refilled from the free lists.
+ */
+static inline bool check_pcp_refill(struct page *page)
{
- return false;
+ if (debug_pagealloc_enabled())
+ return check_new_page(page);
+ else
+ return false;
}
-static bool check_new_pcp(struct page *page)
+static inline bool check_new_pcp(struct page *page)
{
return check_new_page(page);
}
#else
-static bool check_pcp_refill(struct page *page)
+/*
+ * With DEBUG_VM disabled, free order-0 pages are checked for expected state
+ * when pcp lists are being refilled from the free lists. With debug_pagealloc
+ * enabled, they are also checked when being allocated from the pcp lists.
+ */
+static inline bool check_pcp_refill(struct page *page)
{
return check_new_page(page);
}
-static bool check_new_pcp(struct page *page)
+static inline bool check_new_pcp(struct page *page)
{
- return false;
+ if (debug_pagealloc_enabled())
+ return check_new_page(page);
+ else
+ return false;
}
#endif /* CONFIG_DEBUG_VM */
@@ -2076,13 +2150,10 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
unsigned int alloc_flags)
{
- int i;
-
post_alloc_hook(page, order, gfp_flags);
- if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO))
- for (i = 0; i < (1 << order); i++)
- clear_highpage(page + i);
+ if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags))
+ kernel_init_free_pages(page, 1 << order);
if (order && (gfp_flags & __GFP_COMP))
prep_compound_page(page, order);
@@ -7520,10 +7591,28 @@ static int page_alloc_cpu_dead(unsigned int cpu)
return 0;
}
+#ifdef CONFIG_NUMA
+int hashdist = HASHDIST_DEFAULT;
+
+static int __init set_hashdist(char *str)
+{
+ if (!str)
+ return 0;
+ hashdist = simple_strtoul(str, &str, 0);
+ return 1;
+}
+__setup("hashdist=", set_hashdist);
+#endif
+
void __init page_alloc_init(void)
{
int ret;
+#ifdef CONFIG_NUMA
+ if (num_node_state(N_MEMORY) == 1)
+ hashdist = 0;
+#endif
+
ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD,
"mm/page_alloc:dead", NULL,
page_alloc_cpu_dead);
@@ -7908,19 +7997,6 @@ out:
return ret;
}
-#ifdef CONFIG_NUMA
-int hashdist = HASHDIST_DEFAULT;
-
-static int __init set_hashdist(char *str)
-{
- if (!str)
- return 0;
- hashdist = simple_strtoul(str, &str, 0);
- return 1;
-}
-__setup("hashdist=", set_hashdist);
-#endif
-
#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
/*
* Returns the number of pages that arch has reserved but
@@ -7967,6 +8043,7 @@ void *__init alloc_large_system_hash(const char *tablename,
unsigned long log2qty, size;
void *table = NULL;
gfp_t gfp_flags;
+ bool virt;
/* allow the kernel cmdline to have a say */
if (!numentries) {
@@ -8023,6 +8100,7 @@ void *__init alloc_large_system_hash(const char *tablename,
gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC;
do {
+ virt = false;
size = bucketsize << log2qty;
if (flags & HASH_EARLY) {
if (flags & HASH_ZERO)
@@ -8030,26 +8108,26 @@ void *__init alloc_large_system_hash(const char *tablename,
else
table = memblock_alloc_raw(size,
SMP_CACHE_BYTES);
- } else if (hashdist) {
+ } else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+ virt = true;
} else {
/*
* If bucketsize is not a power-of-two, we may free
* some pages at the end of hash table which
* alloc_pages_exact() automatically does
*/
- if (get_order(size) < MAX_ORDER) {
- table = alloc_pages_exact(size, gfp_flags);
- kmemleak_alloc(table, size, 1, gfp_flags);
- }
+ table = alloc_pages_exact(size, gfp_flags);
+ kmemleak_alloc(table, size, 1, gfp_flags);
}
} while (!table && size > PAGE_SIZE && --log2qty);
if (!table)
panic("Failed to allocate %s hash table\n", tablename);
- pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size);
+ pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
+ tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ virt ? "vmalloc" : "linear");
if (_hash_shift)
*_hash_shift = log2qty;