aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 17:53:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 17:53:04 -0700
commit27bc50fc90647bbf7b734c3fc306a5e61350da53 (patch)
tree75fc525fbfec8c07a97a7875a89592317bcad4ca /mm/page_alloc.c
parentMerge tag 'x86_mm_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parenthugetlb: allocate vma lock for all sharable vmas (diff)
downloadlinux-dev-27bc50fc90647bbf7b734c3fc306a5e61350da53.tar.xz
linux-dev-27bc50fc90647bbf7b734c3fc306a5e61350da53.zip
Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Yu Zhao's Multi-Gen LRU patches are here. They've been under test in linux-next for a couple of months without, to my knowledge, any negative reports (or any positive ones, come to that). - Also the Maple Tree from Liam Howlett. An overlapping range-based tree for vmas. It it apparently slightly more efficient in its own right, but is mainly targeted at enabling work to reduce mmap_lock contention. Liam has identified a number of other tree users in the kernel which could be beneficially onverted to mapletrees. Yu Zhao has identified a hard-to-hit but "easy to fix" lockdep splat at [1]. This has yet to be addressed due to Liam's unfortunately timed vacation. He is now back and we'll get this fixed up. - Dmitry Vyukov introduces KMSAN: the Kernel Memory Sanitizer. It uses clang-generated instrumentation to detect used-unintialized bugs down to the single bit level. KMSAN keeps finding bugs. New ones, as well as the legacy ones. - Yang Shi adds a userspace mechanism (madvise) to induce a collapse of memory into THPs. - Zach O'Keefe has expanded Yang Shi's madvise(MADV_COLLAPSE) to support file/shmem-backed pages. - userfaultfd updates from Axel Rasmussen - zsmalloc cleanups from Alexey Romanov - cleanups from Miaohe Lin: vmscan, hugetlb_cgroup, hugetlb and memory-failure - Huang Ying adds enhancements to NUMA balancing memory tiering mode's page promotion, with a new way of detecting hot pages. - memcg updates from Shakeel Butt: charging optimizations and reduced memory consumption. - memcg cleanups from Kairui Song. - memcg fixes and cleanups from Johannes Weiner. - Vishal Moola provides more folio conversions - Zhang Yi removed ll_rw_block() :( - migration enhancements from Peter Xu - migration error-path bugfixes from Huang Ying - Aneesh Kumar added ability for a device driver to alter the memory tiering promotion paths. For optimizations by PMEM drivers, DRM drivers, etc. - vma merging improvements from Jakub Matěn. - NUMA hinting cleanups from David Hildenbrand. - xu xin added aditional userspace visibility into KSM merging activity. - THP & KSM code consolidation from Qi Zheng. - more folio work from Matthew Wilcox. - KASAN updates from Andrey Konovalov. - DAMON cleanups from Kaixu Xia. - DAMON work from SeongJae Park: fixes, cleanups. - hugetlb sysfs cleanups from Muchun Song. - Mike Kravetz fixes locking issues in hugetlbfs and in hugetlb core. Link: https://lkml.kernel.org/r/CAOUHufZabH85CeUN-MEMgL8gJGzJEWUrkiM58JkTbBhh-jew0Q@mail.gmail.com [1] * tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (555 commits) hugetlb: allocate vma lock for all sharable vmas hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer hugetlb: fix vma lock handling during split vma and range unmapping mglru: mm/vmscan.c: fix imprecise comments mm/mglru: don't sync disk for each aging cycle mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol mm: memcontrol: use do_memsw_account() in a few more places mm: memcontrol: deprecate swapaccounting=0 mode mm: memcontrol: don't allocate cgroup swap arrays when memcg is disabled mm/secretmem: remove reduntant return value mm/hugetlb: add available_huge_pages() func mm: remove unused inline functions from include/linux/mm_inline.h selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd selftests/vm: add thp collapse shmem testing selftests/vm: add thp collapse file and tmpfs testing selftests/vm: modularize thp collapse memory operations selftests/vm: dedup THP helpers mm/khugepaged: add tracepoint to hpage_collapse_scan_file() mm/madvise: add file and shmem support to MADV_COLLAPSE ...
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c196
1 files changed, 105 insertions, 91 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 08522a831c7a..ac2c9f12a7b2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -27,6 +27,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/kasan.h>
+#include <linux/kmsan.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
@@ -482,6 +483,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
{
static unsigned long prev_end_pfn, nr_initialised;
+ if (early_page_ext_enabled())
+ return false;
/*
* prev_end_pfn static that contains the end of previous zone
* No need to protect because called very early in boot before smp_init.
@@ -542,7 +545,7 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn)
#ifdef CONFIG_SPARSEMEM
pfn &= (PAGES_PER_SECTION-1);
#else
- pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages);
+ pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn);
#endif /* CONFIG_SPARSEMEM */
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
}
@@ -870,7 +873,8 @@ static inline bool set_page_guard(struct zone *zone, struct page *page,
INIT_LIST_HEAD(&page->buddy_list);
set_page_private(page, order);
/* Guard pages are not available for any usage */
- __mod_zone_freepage_state(zone, -(1 << order), migratetype);
+ if (!is_migrate_isolate(migratetype))
+ __mod_zone_freepage_state(zone, -(1 << order), migratetype);
return true;
}
@@ -900,7 +904,7 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
* order of appearance. So we need to first gather the full picture of what was
* enabled, and then make decisions.
*/
-void init_mem_debugging_and_hardening(void)
+void __init init_mem_debugging_and_hardening(void)
{
bool page_poisoning_requested = false;
@@ -935,6 +939,10 @@ void init_mem_debugging_and_hardening(void)
else
static_branch_disable(&init_on_free);
+ if (IS_ENABLED(CONFIG_KMSAN) &&
+ (_init_on_alloc_enabled_early || _init_on_free_enabled_early))
+ pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n");
+
#ifdef CONFIG_DEBUG_PAGEALLOC
if (!debug_pagealloc_enabled())
return;
@@ -1105,7 +1113,7 @@ static inline void __free_one_page(struct page *page,
int migratetype, fpi_t fpi_flags)
{
struct capture_control *capc = task_capc(zone);
- unsigned long buddy_pfn;
+ unsigned long buddy_pfn = 0;
unsigned long combined_pfn;
struct page *buddy;
bool to_tail;
@@ -1283,20 +1291,20 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
return bad_reason;
}
-static void check_free_page_bad(struct page *page)
+static void free_page_is_bad_report(struct page *page)
{
bad_page(page,
page_bad_reason(page, PAGE_FLAGS_CHECK_AT_FREE));
}
-static inline int check_free_page(struct page *page)
+static inline bool free_page_is_bad(struct page *page)
{
if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE)))
- return 0;
+ return false;
/* Something has gone sideways, find it */
- check_free_page_bad(page);
- return 1;
+ free_page_is_bad_report(page);
+ return true;
}
static int free_tail_pages_check(struct page *head_page, struct page *page)
@@ -1398,6 +1406,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(PageTail(page), page);
trace_mm_page_free(page, order);
+ kmsan_free_page(page, order);
if (unlikely(PageHWPoison(page)) && !order) {
/*
@@ -1428,7 +1437,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
- if (unlikely(check_free_page(page + i))) {
+ if (unlikely(free_page_is_bad(page + i))) {
bad++;
continue;
}
@@ -1439,8 +1448,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
page->mapping = NULL;
if (memcg_kmem_enabled() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
- if (check_free)
- bad += check_free_page(page);
+ if (check_free && free_page_is_bad(page))
+ bad++;
if (bad)
return false;
@@ -1499,10 +1508,11 @@ static bool free_pcp_prepare(struct page *page, unsigned int order)
return free_pages_prepare(page, order, true, FPI_NONE);
}
+/* return true if this page has an inappropriate state */
static bool bulkfree_pcp_prepare(struct page *page)
{
if (debug_pagealloc_enabled_static())
- return check_free_page(page);
+ return free_page_is_bad(page);
else
return false;
}
@@ -1523,7 +1533,7 @@ static bool free_pcp_prepare(struct page *page, unsigned int order)
static bool bulkfree_pcp_prepare(struct page *page)
{
- return check_free_page(page);
+ return free_page_is_bad(page);
}
#endif /* CONFIG_DEBUG_VM */
@@ -1575,7 +1585,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
order = pindex_to_order(pindex);
nr_pages = 1 << order;
- BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
do {
int mt;
@@ -1804,6 +1813,10 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
{
if (early_page_uninitialised(pfn))
return;
+ if (!kmsan_memblock_free_pages(page, order)) {
+ /* KMSAN will take care of these pages. */
+ return;
+ }
__free_pages_core(page, order);
}
@@ -1855,7 +1868,7 @@ void set_zone_contiguous(struct zone *zone)
unsigned long block_start_pfn = zone->zone_start_pfn;
unsigned long block_end_pfn;
- block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
+ block_end_pfn = pageblock_end_pfn(block_start_pfn);
for (; block_start_pfn < zone_end_pfn(zone);
block_start_pfn = block_end_pfn,
block_end_pfn += pageblock_nr_pages) {
@@ -1890,15 +1903,14 @@ static void __init deferred_free_range(unsigned long pfn,
page = pfn_to_page(pfn);
/* Free a large naturally-aligned chunk if possible */
- if (nr_pages == pageblock_nr_pages &&
- (pfn & (pageblock_nr_pages - 1)) == 0) {
+ if (nr_pages == pageblock_nr_pages && pageblock_aligned(pfn)) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
__free_pages_core(page, pageblock_order);
return;
}
for (i = 0; i < nr_pages; i++, page++, pfn++) {
- if ((pfn & (pageblock_nr_pages - 1)) == 0)
+ if (pageblock_aligned(pfn))
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
__free_pages_core(page, 0);
}
@@ -1917,16 +1929,12 @@ static inline void __init pgdat_init_report_one_done(void)
/*
* Returns true if page needs to be initialized or freed to buddy allocator.
*
- * First we check if pfn is valid on architectures where it is possible to have
- * holes within pageblock_nr_pages. On systems where it is not possible, this
- * function is optimized out.
- *
- * Then, we check if a current large page is valid by only checking the validity
+ * We check if a current large page is valid by only checking the validity
* of the head pfn.
*/
static inline bool __init deferred_pfn_valid(unsigned long pfn)
{
- if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
+ if (pageblock_aligned(pfn) && !pfn_valid(pfn))
return false;
return true;
}
@@ -1938,14 +1946,13 @@ static inline bool __init deferred_pfn_valid(unsigned long pfn)
static void __init deferred_free_pages(unsigned long pfn,
unsigned long end_pfn)
{
- unsigned long nr_pgmask = pageblock_nr_pages - 1;
unsigned long nr_free = 0;
for (; pfn < end_pfn; pfn++) {
if (!deferred_pfn_valid(pfn)) {
deferred_free_range(pfn - nr_free, nr_free);
nr_free = 0;
- } else if (!(pfn & nr_pgmask)) {
+ } else if (pageblock_aligned(pfn)) {
deferred_free_range(pfn - nr_free, nr_free);
nr_free = 1;
} else {
@@ -1965,7 +1972,6 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
unsigned long pfn,
unsigned long end_pfn)
{
- unsigned long nr_pgmask = pageblock_nr_pages - 1;
int nid = zone_to_nid(zone);
unsigned long nr_pages = 0;
int zid = zone_idx(zone);
@@ -1975,7 +1981,7 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
if (!deferred_pfn_valid(pfn)) {
page = NULL;
continue;
- } else if (!page || !(pfn & nr_pgmask)) {
+ } else if (!page || pageblock_aligned(pfn)) {
page = pfn_to_page(pfn);
} else {
page++;
@@ -2651,8 +2657,8 @@ int move_freepages_block(struct zone *zone, struct page *page,
*num_movable = 0;
pfn = page_to_pfn(page);
- start_pfn = pfn & ~(pageblock_nr_pages - 1);
- end_pfn = start_pfn + pageblock_nr_pages - 1;
+ start_pfn = pageblock_start_pfn(pfn);
+ end_pfn = pageblock_end_pfn(pfn) - 1;
/* Do not cross zone boundaries */
if (!zone_spans_pfn(zone, start_pfn))
@@ -3010,7 +3016,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
* i.e. orders < pageblock_order. If there are no local zones free,
* the zonelists will be reiterated without ALLOC_NOFRAGMENT.
*/
- if (alloc_flags & ALLOC_NOFRAGMENT)
+ if (order < pageblock_order && alloc_flags & ALLOC_NOFRAGMENT)
min_order = pageblock_order;
/*
@@ -3598,16 +3604,11 @@ EXPORT_SYMBOL_GPL(split_page);
int __isolate_free_page(struct page *page, unsigned int order)
{
- unsigned long watermark;
- struct zone *zone;
- int mt;
-
- BUG_ON(!PageBuddy(page));
-
- zone = page_zone(page);
- mt = get_pageblock_migratetype(page);
+ struct zone *zone = page_zone(page);
+ int mt = get_pageblock_migratetype(page);
if (!is_migrate_isolate(mt)) {
+ unsigned long watermark;
/*
* Obey watermarks as if the page was being allocated. We can
* emulate a high-order watermark check with a raised order-0
@@ -3621,8 +3622,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
__mod_zone_freepage_state(zone, -(1UL << order), mt);
}
- /* Remove page from free list */
-
del_page_from_free_list(page, zone, order);
/*
@@ -3643,7 +3642,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
}
}
-
return 1UL << order;
}
@@ -3670,8 +3668,6 @@ void __putback_isolated_page(struct page *page, unsigned int order, int mt)
/*
* Update NUMA hit/miss statistics
- *
- * Must be called with interrupts disabled.
*/
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
long nr_account)
@@ -3777,8 +3773,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
/* Lock and remove page from the per-cpu list */
static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
- gfp_t gfp_flags, int migratetype,
- unsigned int alloc_flags)
+ int migratetype, unsigned int alloc_flags)
{
struct per_cpu_pages *pcp;
struct list_head *list;
@@ -3815,8 +3810,17 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
}
/*
- * Allocate a page from the given zone. Use pcplists for order-0 allocations.
+ * Allocate a page from the given zone.
+ * Use pcplists for THP or "cheap" high-order allocations.
+ */
+
+/*
+ * Do not instrument rmqueue() with KMSAN. This function may call
+ * __msan_poison_alloca() through a call to set_pfnblock_flags_mask().
+ * If __msan_poison_alloca() attempts to allocate pages for the stack depot, it
+ * may call rmqueue() again, which will result in a deadlock.
*/
+__no_sanitize_memory
static inline
struct page *rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
@@ -3839,7 +3843,7 @@ struct page *rmqueue(struct zone *preferred_zone,
if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
migratetype != MIGRATE_MOVABLE) {
page = rmqueue_pcplist(preferred_zone, zone, order,
- gfp_flags, migratetype, alloc_flags);
+ migratetype, alloc_flags);
if (likely(page))
goto out;
}
@@ -4329,7 +4333,7 @@ static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
if (!in_task() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
filter &= ~SHOW_MEM_FILTER_NODES;
- show_mem(filter, nodemask);
+ __show_mem(filter, nodemask, gfp_zone(gfp_mask));
}
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
@@ -5147,7 +5151,8 @@ retry:
reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
if (reserve_flags)
- alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags);
+ alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags) |
+ (alloc_flags & ALLOC_KSWAPD);
/*
* Reset the nodemask and zonelist iterators if memory policies can be
@@ -5272,7 +5277,7 @@ nopage:
* so that we can identify them and convert them to something
* else.
*/
- WARN_ON_ONCE_GFP(order > PAGE_ALLOC_COSTLY_ORDER, gfp_mask);
+ WARN_ON_ONCE_GFP(costly_order, gfp_mask);
/*
* Help non-failing allocations by giving them access to memory
@@ -5569,6 +5574,7 @@ out:
}
trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
+ kmsan_alloc_page(page, order, alloc_gfp);
return page;
}
@@ -6057,6 +6063,15 @@ static void show_migration_types(unsigned char type)
printk(KERN_CONT "(%s) ", tmp);
}
+static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx)
+{
+ int zone_idx;
+ for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++)
+ if (zone_managed_pages(pgdat->node_zones + zone_idx))
+ return true;
+ return false;
+}
+
/*
* Show free area list (used inside shift_scroll-lock stuff)
* We also calculate the percentage fragmentation. We do this by counting the
@@ -6066,7 +6081,7 @@ static void show_migration_types(unsigned char type)
* SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
* cpuset.
*/
-void show_free_areas(unsigned int filter, nodemask_t *nodemask)
+void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
{
unsigned long free_pcp = 0;
int cpu, nid;
@@ -6074,6 +6089,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
pg_data_t *pgdat;
for_each_populated_zone(zone) {
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
@@ -6113,6 +6130,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
for_each_online_pgdat(pgdat) {
if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
continue;
+ if (!node_has_managed_zones(pgdat, max_zone_idx))
+ continue;
printk("Node %d"
" active_anon:%lukB"
@@ -6171,6 +6190,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
for_each_populated_zone(zone) {
int i;
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
@@ -6232,6 +6253,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
unsigned long nr[MAX_ORDER], flags, total = 0;
unsigned char types[MAX_ORDER];
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
show_node(zone);
@@ -6557,7 +6580,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta
#define BOOT_PAGESET_BATCH 1
static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset);
static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats);
-DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
+static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
static void __build_all_zonelists(void *data)
{
@@ -6753,7 +6776,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
* such that unmovable allocations won't be scattered all
* over the place during system boot.
*/
- if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
+ if (pageblock_aligned(pfn)) {
set_pageblock_migratetype(page, migratetype);
cond_resched();
}
@@ -6796,7 +6819,7 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
* Please note that MEMINIT_HOTPLUG path doesn't clear memmap
* because this is done early in section_activate()
*/
- if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
+ if (pageblock_aligned(pfn)) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
cond_resched();
}
@@ -6859,7 +6882,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
unsigned long start = jiffies;
int nid = pgdat->node_id;
- if (WARN_ON_ONCE(!pgmap || zone_idx(zone) != ZONE_DEVICE))
+ if (WARN_ON_ONCE(!pgmap || zone_idx != ZONE_DEVICE))
return;
/*
@@ -6928,9 +6951,8 @@ static void __init init_unavailable_range(unsigned long spfn,
u64 pgcnt = 0;
for (pfn = spfn; pfn < epfn; pfn++) {
- if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
- pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
- + pageblock_nr_pages - 1;
+ if (!pfn_valid(pageblock_start_pfn(pfn))) {
+ pfn = pageblock_end_pfn(pfn) - 1;
continue;
}
__init_single_page(pfn_to_page(pfn), pfn, zone, node);
@@ -7035,7 +7057,7 @@ static int zone_batchsize(struct zone *zone)
* size is striking a balance between allocation latency
* and zone lock contention.
*/
- batch = min(zone_managed_pages(zone) >> 10, (1024 * 1024) / PAGE_SIZE);
+ batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE);
batch /= 4; /* We effectively *= 4 below */
if (batch < 1)
batch = 1;
@@ -7220,6 +7242,17 @@ void __meminit setup_zone_pageset(struct zone *zone)
}
/*
+ * The zone indicated has a new number of managed_pages; batch sizes and percpu
+ * page high values need to be recalculated.
+ */
+static void zone_pcp_update(struct zone *zone, int cpu_online)
+{
+ mutex_lock(&pcp_batch_high_lock);
+ zone_set_pageset_high_and_batch(zone, cpu_online);
+ mutex_unlock(&pcp_batch_high_lock);
+}
+
+/*
* Allocate per cpu pagesets and initialize them.
* Before this call only boot pagesets were available.
*/
@@ -7663,6 +7696,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
int i;
pgdat_resize_init(pgdat);
+ pgdat_kswapd_lock_init(pgdat);
pgdat_init_split_queue(pgdat);
pgdat_init_kcompactd(pgdat);
@@ -7957,17 +7991,6 @@ unsigned long __init node_map_pfn_alignment(void)
return ~accl_mask + 1;
}
-/**
- * find_min_pfn_with_active_regions - Find the minimum PFN registered
- *
- * Return: the minimum PFN based on information provided via
- * memblock_set_node().
- */
-unsigned long __init find_min_pfn_with_active_regions(void)
-{
- return PHYS_PFN(memblock_start_of_DRAM());
-}
-
/*
* early_calculate_totalpages()
* Sum pages in active regions for movable zone.
@@ -8260,7 +8283,7 @@ void __init free_area_init(unsigned long *max_zone_pfn)
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
- start_pfn = find_min_pfn_with_active_regions();
+ start_pfn = PHYS_PFN(memblock_start_of_DRAM());
descending = arch_has_descending_max_zone_pfns();
for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -8509,8 +8532,8 @@ void __init mem_init_print_info(void)
#endif
")\n",
K(nr_free_pages()), K(physpages),
- codesize >> 10, datasize >> 10, rosize >> 10,
- (init_data_size + init_code_size) >> 10, bss_size >> 10,
+ codesize / SZ_1K, datasize / SZ_1K, rosize / SZ_1K,
+ (init_data_size + init_code_size) / SZ_1K, bss_size / SZ_1K,
K(physpages - totalram_pages() - totalcma_pages),
K(totalcma_pages)
#ifdef CONFIG_HIGHMEM
@@ -9023,7 +9046,7 @@ void *__init alloc_large_system_hash(const char *tablename,
{
unsigned long long max = high_limit;
unsigned long log2qty, size;
- void *table = NULL;
+ void *table;
gfp_t gfp_flags;
bool virt;
bool huge;
@@ -9035,8 +9058,8 @@ void *__init alloc_large_system_hash(const char *tablename,
numentries -= arch_reserved_kernel_pages();
/* It isn't necessary when PAGE_SIZE >= 1MB */
- if (PAGE_SHIFT < 20)
- numentries = round_up(numentries, (1<<20)/PAGE_SIZE);
+ if (PAGE_SIZE < SZ_1M)
+ numentries = round_up(numentries, SZ_1M / PAGE_SIZE);
#if __BITS_PER_LONG > 32
if (!high_limit) {
@@ -9461,17 +9484,6 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages)
EXPORT_SYMBOL(free_contig_range);
/*
- * The zone indicated has a new number of managed_pages; batch sizes and percpu
- * page high values need to be recalculated.
- */
-void zone_pcp_update(struct zone *zone, int cpu_online)
-{
- mutex_lock(&pcp_batch_high_lock);
- zone_set_pageset_high_and_batch(zone, cpu_online);
- mutex_unlock(&pcp_batch_high_lock);
-}
-
-/*
* Effectively disable pcplists for the zone by setting the high limit to 0
* and draining all cpus. A concurrent page freeing on another CPU that's about
* to put the page on pcplist will either finish before the drain and the page
@@ -9503,9 +9515,11 @@ void zone_pcp_reset(struct zone *zone)
drain_zonestat(zone, pzstats);
}
free_percpu(zone->per_cpu_pageset);
- free_percpu(zone->per_cpu_zonestats);
zone->per_cpu_pageset = &boot_pageset;
- zone->per_cpu_zonestats = &boot_zonestats;
+ if (zone->per_cpu_zonestats != &boot_zonestats) {
+ free_percpu(zone->per_cpu_zonestats);
+ zone->per_cpu_zonestats = &boot_zonestats;
+ }
}
}