aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mm/page_alloc.c298
1 files changed, 187 insertions, 111 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e5486d47406e..218b28ee49ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -27,6 +27,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/kasan.h>
+#include <linux/kmsan.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
@@ -482,6 +483,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
{
static unsigned long prev_end_pfn, nr_initialised;
+ if (early_page_ext_enabled())
+ return false;
/*
* prev_end_pfn static that contains the end of previous zone
* No need to protect because called very early in boot before smp_init.
@@ -542,7 +545,7 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn)
#ifdef CONFIG_SPARSEMEM
pfn &= (PAGES_PER_SECTION-1);
#else
- pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages);
+ pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn);
#endif /* CONFIG_SPARSEMEM */
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
}
@@ -804,6 +807,7 @@ static void prep_compound_tail(struct page *head, int tail_idx)
p->mapping = TAIL_MAPPING;
set_compound_head(p, head);
+ set_page_private(p, 0);
}
void prep_compound_page(struct page *page, unsigned int order)
@@ -870,7 +874,8 @@ static inline bool set_page_guard(struct zone *zone, struct page *page,
INIT_LIST_HEAD(&page->buddy_list);
set_page_private(page, order);
/* Guard pages are not available for any usage */
- __mod_zone_freepage_state(zone, -(1 << order), migratetype);
+ if (!is_migrate_isolate(migratetype))
+ __mod_zone_freepage_state(zone, -(1 << order), migratetype);
return true;
}
@@ -900,7 +905,7 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
* order of appearance. So we need to first gather the full picture of what was
* enabled, and then make decisions.
*/
-void init_mem_debugging_and_hardening(void)
+void __init init_mem_debugging_and_hardening(void)
{
bool page_poisoning_requested = false;
@@ -935,6 +940,10 @@ void init_mem_debugging_and_hardening(void)
else
static_branch_disable(&init_on_free);
+ if (IS_ENABLED(CONFIG_KMSAN) &&
+ (_init_on_alloc_enabled_early || _init_on_free_enabled_early))
+ pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n");
+
#ifdef CONFIG_DEBUG_PAGEALLOC
if (!debug_pagealloc_enabled())
return;
@@ -1105,7 +1114,7 @@ static inline void __free_one_page(struct page *page,
int migratetype, fpi_t fpi_flags)
{
struct capture_control *capc = task_capc(zone);
- unsigned long buddy_pfn;
+ unsigned long buddy_pfn = 0;
unsigned long combined_pfn;
struct page *buddy;
bool to_tail;
@@ -1283,20 +1292,20 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
return bad_reason;
}
-static void check_free_page_bad(struct page *page)
+static void free_page_is_bad_report(struct page *page)
{
bad_page(page,
page_bad_reason(page, PAGE_FLAGS_CHECK_AT_FREE));
}
-static inline int check_free_page(struct page *page)
+static inline bool free_page_is_bad(struct page *page)
{
if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE)))
- return 0;
+ return false;
/* Something has gone sideways, find it */
- check_free_page_bad(page);
- return 1;
+ free_page_is_bad_report(page);
+ return true;
}
static int free_tail_pages_check(struct page *head_page, struct page *page)
@@ -1398,6 +1407,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(PageTail(page), page);
trace_mm_page_free(page, order);
+ kmsan_free_page(page, order);
if (unlikely(PageHWPoison(page)) && !order) {
/*
@@ -1428,7 +1438,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
- if (unlikely(check_free_page(page + i))) {
+ if (unlikely(free_page_is_bad(page + i))) {
bad++;
continue;
}
@@ -1439,8 +1449,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
page->mapping = NULL;
if (memcg_kmem_enabled() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
- if (check_free)
- bad += check_free_page(page);
+ if (check_free && free_page_is_bad(page))
+ bad++;
if (bad)
return false;
@@ -1499,10 +1509,11 @@ static bool free_pcp_prepare(struct page *page, unsigned int order)
return free_pages_prepare(page, order, true, FPI_NONE);
}
+/* return true if this page has an inappropriate state */
static bool bulkfree_pcp_prepare(struct page *page)
{
if (debug_pagealloc_enabled_static())
- return check_free_page(page);
+ return free_page_is_bad(page);
else
return false;
}
@@ -1523,7 +1534,7 @@ static bool free_pcp_prepare(struct page *page, unsigned int order)
static bool bulkfree_pcp_prepare(struct page *page)
{
- return check_free_page(page);
+ return free_page_is_bad(page);
}
#endif /* CONFIG_DEBUG_VM */
@@ -1575,7 +1586,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
order = pindex_to_order(pindex);
nr_pages = 1 << order;
- BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
do {
int mt;
@@ -1804,6 +1814,10 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
{
if (early_page_uninitialised(pfn))
return;
+ if (!kmsan_memblock_free_pages(page, order)) {
+ /* KMSAN will take care of these pages. */
+ return;
+ }
__free_pages_core(page, order);
}
@@ -1855,7 +1869,7 @@ void set_zone_contiguous(struct zone *zone)
unsigned long block_start_pfn = zone->zone_start_pfn;
unsigned long block_end_pfn;
- block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
+ block_end_pfn = pageblock_end_pfn(block_start_pfn);
for (; block_start_pfn < zone_end_pfn(zone);
block_start_pfn = block_end_pfn,
block_end_pfn += pageblock_nr_pages) {
@@ -1890,15 +1904,14 @@ static void __init deferred_free_range(unsigned long pfn,
page = pfn_to_page(pfn);
/* Free a large naturally-aligned chunk if possible */
- if (nr_pages == pageblock_nr_pages &&
- (pfn & (pageblock_nr_pages - 1)) == 0) {
+ if (nr_pages == pageblock_nr_pages && pageblock_aligned(pfn)) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
__free_pages_core(page, pageblock_order);
return;
}
for (i = 0; i < nr_pages; i++, page++, pfn++) {
- if ((pfn & (pageblock_nr_pages - 1)) == 0)
+ if (pageblock_aligned(pfn))
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
__free_pages_core(page, 0);
}
@@ -1917,16 +1930,12 @@ static inline void __init pgdat_init_report_one_done(void)
/*
* Returns true if page needs to be initialized or freed to buddy allocator.
*
- * First we check if pfn is valid on architectures where it is possible to have
- * holes within pageblock_nr_pages. On systems where it is not possible, this
- * function is optimized out.
- *
- * Then, we check if a current large page is valid by only checking the validity
+ * We check if a current large page is valid by only checking the validity
* of the head pfn.
*/
static inline bool __init deferred_pfn_valid(unsigned long pfn)
{
- if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
+ if (pageblock_aligned(pfn) && !pfn_valid(pfn))
return false;
return true;
}
@@ -1938,14 +1947,13 @@ static inline bool __init deferred_pfn_valid(unsigned long pfn)
static void __init deferred_free_pages(unsigned long pfn,
unsigned long end_pfn)
{
- unsigned long nr_pgmask = pageblock_nr_pages - 1;
unsigned long nr_free = 0;
for (; pfn < end_pfn; pfn++) {
if (!deferred_pfn_valid(pfn)) {
deferred_free_range(pfn - nr_free, nr_free);
nr_free = 0;
- } else if (!(pfn & nr_pgmask)) {
+ } else if (pageblock_aligned(pfn)) {
deferred_free_range(pfn - nr_free, nr_free);
nr_free = 1;
} else {
@@ -1965,7 +1973,6 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
unsigned long pfn,
unsigned long end_pfn)
{
- unsigned long nr_pgmask = pageblock_nr_pages - 1;
int nid = zone_to_nid(zone);
unsigned long nr_pages = 0;
int zid = zone_idx(zone);
@@ -1975,7 +1982,7 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
if (!deferred_pfn_valid(pfn)) {
page = NULL;
continue;
- } else if (!page || !(pfn & nr_pgmask)) {
+ } else if (!page || pageblock_aligned(pfn)) {
page = pfn_to_page(pfn);
} else {
page++;
@@ -2651,8 +2658,8 @@ int move_freepages_block(struct zone *zone, struct page *page,
*num_movable = 0;
pfn = page_to_pfn(page);
- start_pfn = pfn & ~(pageblock_nr_pages - 1);
- end_pfn = start_pfn + pageblock_nr_pages - 1;
+ start_pfn = pageblock_start_pfn(pfn);
+ end_pfn = pageblock_end_pfn(pfn) - 1;
/* Do not cross zone boundaries */
if (!zone_spans_pfn(zone, start_pfn))
@@ -3010,7 +3017,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
* i.e. orders < pageblock_order. If there are no local zones free,
* the zonelists will be reiterated without ALLOC_NOFRAGMENT.
*/
- if (alloc_flags & ALLOC_NOFRAGMENT)
+ if (order < pageblock_order && alloc_flags & ALLOC_NOFRAGMENT)
min_order = pageblock_order;
/*
@@ -3440,7 +3447,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
int pindex;
bool free_high;
- __count_vm_event(PGFREE);
+ __count_vm_events(PGFREE, 1 << order);
pindex = order_to_pindex(migratetype, order);
list_add(&page->pcp_list, &pcp->lists[pindex]);
pcp->count += 1 << order;
@@ -3598,16 +3605,11 @@ EXPORT_SYMBOL_GPL(split_page);
int __isolate_free_page(struct page *page, unsigned int order)
{
- unsigned long watermark;
- struct zone *zone;
- int mt;
-
- BUG_ON(!PageBuddy(page));
-
- zone = page_zone(page);
- mt = get_pageblock_migratetype(page);
+ struct zone *zone = page_zone(page);
+ int mt = get_pageblock_migratetype(page);
if (!is_migrate_isolate(mt)) {
+ unsigned long watermark;
/*
* Obey watermarks as if the page was being allocated. We can
* emulate a high-order watermark check with a raised order-0
@@ -3621,8 +3623,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
__mod_zone_freepage_state(zone, -(1UL << order), mt);
}
- /* Remove page from free list */
-
del_page_from_free_list(page, zone, order);
/*
@@ -3643,7 +3643,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
}
}
-
return 1UL << order;
}
@@ -3670,8 +3669,6 @@ void __putback_isolated_page(struct page *page, unsigned int order, int mt)
/*
* Update NUMA hit/miss statistics
- *
- * Must be called with interrupts disabled.
*/
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
long nr_account)
@@ -3777,8 +3774,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
/* Lock and remove page from the per-cpu list */
static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
- gfp_t gfp_flags, int migratetype,
- unsigned int alloc_flags)
+ int migratetype, unsigned int alloc_flags)
{
struct per_cpu_pages *pcp;
struct list_head *list;
@@ -3808,15 +3804,24 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
pcp_spin_unlock_irqrestore(pcp, flags);
pcp_trylock_finish(UP_flags);
if (page) {
- __count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
+ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone, 1);
}
return page;
}
/*
- * Allocate a page from the given zone. Use pcplists for order-0 allocations.
+ * Allocate a page from the given zone.
+ * Use pcplists for THP or "cheap" high-order allocations.
*/
+
+/*
+ * Do not instrument rmqueue() with KMSAN. This function may call
+ * __msan_poison_alloca() through a call to set_pfnblock_flags_mask().
+ * If __msan_poison_alloca() attempts to allocate pages for the stack depot, it
+ * may call rmqueue() again, which will result in a deadlock.
+ */
+__no_sanitize_memory
static inline
struct page *rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
@@ -3839,7 +3844,7 @@ struct page *rmqueue(struct zone *preferred_zone,
if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
migratetype != MIGRATE_MOVABLE) {
page = rmqueue_pcplist(preferred_zone, zone, order,
- gfp_flags, migratetype, alloc_flags);
+ migratetype, alloc_flags);
if (likely(page))
goto out;
}
@@ -4329,7 +4334,7 @@ static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
if (!in_task() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
filter &= ~SHOW_MEM_FILTER_NODES;
- show_mem(filter, nodemask);
+ __show_mem(filter, nodemask, gfp_zone(gfp_mask));
}
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
@@ -4708,6 +4713,30 @@ void fs_reclaim_release(gfp_t gfp_mask)
EXPORT_SYMBOL_GPL(fs_reclaim_release);
#endif
+/*
+ * Zonelists may change due to hotplug during allocation. Detect when zonelists
+ * have been rebuilt so allocation retries. Reader side does not lock and
+ * retries the allocation if zonelist changes. Writer side is protected by the
+ * embedded spin_lock.
+ */
+static DEFINE_SEQLOCK(zonelist_update_seq);
+
+static unsigned int zonelist_iter_begin(void)
+{
+ if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+ return read_seqbegin(&zonelist_update_seq);
+
+ return 0;
+}
+
+static unsigned int check_retry_zonelist(unsigned int seq)
+{
+ if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+ return read_seqretry(&zonelist_update_seq, seq);
+
+ return seq;
+}
+
/* Perform direct synchronous page reclaim */
static unsigned long
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -5001,6 +5030,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
int compaction_retries;
int no_progress_loops;
unsigned int cpuset_mems_cookie;
+ unsigned int zonelist_iter_cookie;
int reserve_flags;
/*
@@ -5011,11 +5041,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
gfp_mask &= ~__GFP_ATOMIC;
-retry_cpuset:
+restart:
compaction_retries = 0;
no_progress_loops = 0;
compact_priority = DEF_COMPACT_PRIORITY;
cpuset_mems_cookie = read_mems_allowed_begin();
+ zonelist_iter_cookie = zonelist_iter_begin();
/*
* The fast path uses conservative alloc_flags to succeed only until
@@ -5121,7 +5152,8 @@ retry:
reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
if (reserve_flags)
- alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags);
+ alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags) |
+ (alloc_flags & ALLOC_KSWAPD);
/*
* Reset the nodemask and zonelist iterators if memory policies can be
@@ -5187,9 +5219,13 @@ retry:
goto retry;
- /* Deal with possible cpuset update races before we start OOM killing */
- if (check_retry_cpuset(cpuset_mems_cookie, ac))
- goto retry_cpuset;
+ /*
+ * Deal with possible cpuset update races or zonelist updates to avoid
+ * a unnecessary OOM kill.
+ */
+ if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
+ check_retry_zonelist(zonelist_iter_cookie))
+ goto restart;
/* Reclaim has failed us, start killing things */
page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
@@ -5209,9 +5245,13 @@ retry:
}
nopage:
- /* Deal with possible cpuset update races before we fail */
- if (check_retry_cpuset(cpuset_mems_cookie, ac))
- goto retry_cpuset;
+ /*
+ * Deal with possible cpuset update races or zonelist updates to avoid
+ * a unnecessary OOM kill.
+ */
+ if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
+ check_retry_zonelist(zonelist_iter_cookie))
+ goto restart;
/*
* Make sure that __GFP_NOFAIL request doesn't leak out and make sure
@@ -5238,7 +5278,7 @@ nopage:
* so that we can identify them and convert them to something
* else.
*/
- WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
+ WARN_ON_ONCE_GFP(costly_order, gfp_mask);
/*
* Help non-failing allocations by giving them access to memory
@@ -5535,6 +5575,7 @@ out:
}
trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
+ kmsan_alloc_page(page, order, alloc_gfp);
return page;
}
@@ -5706,6 +5747,18 @@ refill:
/* reset page count bias and offset to start of new frag */
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
offset = size - fragsz;
+ if (unlikely(offset < 0)) {
+ /*
+ * The caller is trying to allocate a fragment
+ * with fragsz > PAGE_SIZE but the cache isn't big
+ * enough to satisfy the request, this may
+ * happen in low memory conditions.
+ * We don't release the cache page because
+ * it could make memory pressure worse
+ * so we simply return NULL here.
+ */
+ return NULL;
+ }
}
nc->pagecnt_bias--;
@@ -5732,14 +5785,18 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
size_t size)
{
if (addr) {
- unsigned long alloc_end = addr + (PAGE_SIZE << order);
- unsigned long used = addr + PAGE_ALIGN(size);
+ unsigned long nr = DIV_ROUND_UP(size, PAGE_SIZE);
+ struct page *page = virt_to_page((void *)addr);
+ struct page *last = page + nr;
- split_page(virt_to_page((void *)addr), order);
- while (used < alloc_end) {
- free_page(used);
- used += PAGE_SIZE;
- }
+ split_page_owner(page, 1 << order);
+ split_page_memcg(page, 1 << order);
+ while (page < --last)
+ set_page_refcounted(last);
+
+ last = page + (1UL << order);
+ for (page += nr; page < last; page++)
+ __free_pages_ok(page, 0, FPI_TO_TAIL);
}
return (void *)addr;
}
@@ -6011,6 +6068,15 @@ static void show_migration_types(unsigned char type)
printk(KERN_CONT "(%s) ", tmp);
}
+static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx)
+{
+ int zone_idx;
+ for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++)
+ if (zone_managed_pages(pgdat->node_zones + zone_idx))
+ return true;
+ return false;
+}
+
/*
* Show free area list (used inside shift_scroll-lock stuff)
* We also calculate the percentage fragmentation. We do this by counting the
@@ -6020,7 +6086,7 @@ static void show_migration_types(unsigned char type)
* SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
* cpuset.
*/
-void show_free_areas(unsigned int filter, nodemask_t *nodemask)
+void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
{
unsigned long free_pcp = 0;
int cpu, nid;
@@ -6028,6 +6094,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
pg_data_t *pgdat;
for_each_populated_zone(zone) {
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
@@ -6039,7 +6107,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
" unevictable:%lu dirty:%lu writeback:%lu\n"
" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
- " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
+ " mapped:%lu shmem:%lu pagetables:%lu\n"
+ " sec_pagetables:%lu bounce:%lu\n"
" kernel_misc_reclaimable:%lu\n"
" free:%lu free_pcp:%lu free_cma:%lu\n",
global_node_page_state(NR_ACTIVE_ANON),
@@ -6056,6 +6125,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
global_node_page_state(NR_FILE_MAPPED),
global_node_page_state(NR_SHMEM),
global_node_page_state(NR_PAGETABLE),
+ global_node_page_state(NR_SECONDARY_PAGETABLE),
global_zone_page_state(NR_BOUNCE),
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
global_zone_page_state(NR_FREE_PAGES),
@@ -6065,6 +6135,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
for_each_online_pgdat(pgdat) {
if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
continue;
+ if (!node_has_managed_zones(pgdat, max_zone_idx))
+ continue;
printk("Node %d"
" active_anon:%lukB"
@@ -6089,6 +6161,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" shadow_call_stack:%lukB"
#endif
" pagetables:%lukB"
+ " sec_pagetables:%lukB"
" all_unreclaimable? %s"
"\n",
pgdat->node_id,
@@ -6114,6 +6187,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
node_page_state(pgdat, NR_KERNEL_SCS_KB),
#endif
K(node_page_state(pgdat, NR_PAGETABLE)),
+ K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
"yes" : "no");
}
@@ -6121,6 +6195,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
for_each_populated_zone(zone) {
int i;
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
@@ -6182,6 +6258,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
unsigned long nr[MAX_ORDER], flags, total = 0;
unsigned char types[MAX_ORDER];
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
show_node(zone);
@@ -6507,16 +6585,15 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta
#define BOOT_PAGESET_BATCH 1
static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset);
static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats);
-DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
+static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
static void __build_all_zonelists(void *data)
{
int nid;
int __maybe_unused cpu;
pg_data_t *self = data;
- static DEFINE_SPINLOCK(lock);
- spin_lock(&lock);
+ write_seqlock(&zonelist_update_seq);
#ifdef CONFIG_NUMA
memset(node_load, 0, sizeof(node_load));
@@ -6553,7 +6630,7 @@ static void __build_all_zonelists(void *data)
#endif
}
- spin_unlock(&lock);
+ write_sequnlock(&zonelist_update_seq);
}
static noinline void __init
@@ -6704,7 +6781,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
* such that unmovable allocations won't be scattered all
* over the place during system boot.
*/
- if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
+ if (pageblock_aligned(pfn)) {
set_pageblock_migratetype(page, migratetype);
cond_resched();
}
@@ -6747,10 +6824,18 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
* Please note that MEMINIT_HOTPLUG path doesn't clear memmap
* because this is done early in section_activate()
*/
- if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
+ if (pageblock_aligned(pfn)) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
cond_resched();
}
+
+ /*
+ * ZONE_DEVICE pages are released directly to the driver page allocator
+ * which will set the page count to 1 when allocating the page.
+ */
+ if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
+ pgmap->type == MEMORY_DEVICE_COHERENT)
+ set_page_count(page, 0);
}
/*
@@ -6810,7 +6895,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
unsigned long start = jiffies;
int nid = pgdat->node_id;
- if (WARN_ON_ONCE(!pgmap || zone_idx(zone) != ZONE_DEVICE))
+ if (WARN_ON_ONCE(!pgmap || zone_idx != ZONE_DEVICE))
return;
/*
@@ -6879,9 +6964,8 @@ static void __init init_unavailable_range(unsigned long spfn,
u64 pgcnt = 0;
for (pfn = spfn; pfn < epfn; pfn++) {
- if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
- pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
- + pageblock_nr_pages - 1;
+ if (!pfn_valid(pageblock_start_pfn(pfn))) {
+ pfn = pageblock_end_pfn(pfn) - 1;
continue;
}
__init_single_page(pfn_to_page(pfn), pfn, zone, node);
@@ -6986,7 +7070,7 @@ static int zone_batchsize(struct zone *zone)
* size is striking a balance between allocation latency
* and zone lock contention.
*/
- batch = min(zone_managed_pages(zone) >> 10, (1024 * 1024) / PAGE_SIZE);
+ batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE);
batch /= 4; /* We effectively *= 4 below */
if (batch < 1)
batch = 1;
@@ -7171,6 +7255,17 @@ void __meminit setup_zone_pageset(struct zone *zone)
}
/*
+ * The zone indicated has a new number of managed_pages; batch sizes and percpu
+ * page high values need to be recalculated.
+ */
+static void zone_pcp_update(struct zone *zone, int cpu_online)
+{
+ mutex_lock(&pcp_batch_high_lock);
+ zone_set_pageset_high_and_batch(zone, cpu_online);
+ mutex_unlock(&pcp_batch_high_lock);
+}
+
+/*
* Allocate per cpu pagesets and initialize them.
* Before this call only boot pagesets were available.
*/
@@ -7614,6 +7709,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
int i;
pgdat_resize_init(pgdat);
+ pgdat_kswapd_lock_init(pgdat);
pgdat_init_split_queue(pgdat);
pgdat_init_kcompactd(pgdat);
@@ -7908,17 +8004,6 @@ unsigned long __init node_map_pfn_alignment(void)
return ~accl_mask + 1;
}
-/**
- * find_min_pfn_with_active_regions - Find the minimum PFN registered
- *
- * Return: the minimum PFN based on information provided via
- * memblock_set_node().
- */
-unsigned long __init find_min_pfn_with_active_regions(void)
-{
- return PHYS_PFN(memblock_start_of_DRAM());
-}
-
/*
* early_calculate_totalpages()
* Sum pages in active regions for movable zone.
@@ -8211,7 +8296,7 @@ void __init free_area_init(unsigned long *max_zone_pfn)
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
- start_pfn = find_min_pfn_with_active_regions();
+ start_pfn = PHYS_PFN(memblock_start_of_DRAM());
descending = arch_has_descending_max_zone_pfns();
for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -8460,8 +8545,8 @@ void __init mem_init_print_info(void)
#endif
")\n",
K(nr_free_pages()), K(physpages),
- codesize >> 10, datasize >> 10, rosize >> 10,
- (init_data_size + init_code_size) >> 10, bss_size >> 10,
+ codesize / SZ_1K, datasize / SZ_1K, rosize / SZ_1K,
+ (init_data_size + init_code_size) / SZ_1K, bss_size / SZ_1K,
K(physpages - totalram_pages() - totalcma_pages),
K(totalcma_pages)
#ifdef CONFIG_HIGHMEM
@@ -8974,7 +9059,7 @@ void *__init alloc_large_system_hash(const char *tablename,
{
unsigned long long max = high_limit;
unsigned long log2qty, size;
- void *table = NULL;
+ void *table;
gfp_t gfp_flags;
bool virt;
bool huge;
@@ -8986,8 +9071,8 @@ void *__init alloc_large_system_hash(const char *tablename,
numentries -= arch_reserved_kernel_pages();
/* It isn't necessary when PAGE_SIZE >= 1MB */
- if (PAGE_SHIFT < 20)
- numentries = round_up(numentries, (1<<20)/PAGE_SIZE);
+ if (PAGE_SIZE < SZ_1M)
+ numentries = round_up(numentries, SZ_1M / PAGE_SIZE);
#if __BITS_PER_LONG > 32
if (!high_limit) {
@@ -9412,17 +9497,6 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages)
EXPORT_SYMBOL(free_contig_range);
/*
- * The zone indicated has a new number of managed_pages; batch sizes and percpu
- * page high values need to be recalculated.
- */
-void zone_pcp_update(struct zone *zone, int cpu_online)
-{
- mutex_lock(&pcp_batch_high_lock);
- zone_set_pageset_high_and_batch(zone, cpu_online);
- mutex_unlock(&pcp_batch_high_lock);
-}
-
-/*
* Effectively disable pcplists for the zone by setting the high limit to 0
* and draining all cpus. A concurrent page freeing on another CPU that's about
* to put the page on pcplist will either finish before the drain and the page
@@ -9454,9 +9528,11 @@ void zone_pcp_reset(struct zone *zone)
drain_zonestat(zone, pzstats);
}
free_percpu(zone->per_cpu_pageset);
- free_percpu(zone->per_cpu_zonestats);
zone->per_cpu_pageset = &boot_pageset;
- zone->per_cpu_zonestats = &boot_zonestats;
+ if (zone->per_cpu_zonestats != &boot_zonestats) {
+ free_percpu(zone->per_cpu_zonestats);
+ zone->per_cpu_zonestats = &boot_zonestats;
+ }
}
}