aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c121
1 files changed, 62 insertions, 59 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0e2bab486fea..05fe1ddb033c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -69,6 +69,7 @@
#include <linux/nmi.h>
#include <linux/psi.h>
#include <linux/padata.h>
+#include <linux/khugepaged.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -155,16 +156,16 @@ static int __init early_init_on_alloc(char *buf)
int ret;
bool bool_result;
- if (!buf)
- return -EINVAL;
ret = kstrtobool(buf, &bool_result);
+ if (ret)
+ return ret;
if (bool_result && page_poisoning_enabled())
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n");
if (bool_result)
static_branch_enable(&init_on_alloc);
else
static_branch_disable(&init_on_alloc);
- return ret;
+ return 0;
}
early_param("init_on_alloc", early_init_on_alloc);
@@ -173,16 +174,16 @@ static int __init early_init_on_free(char *buf)
int ret;
bool bool_result;
- if (!buf)
- return -EINVAL;
ret = kstrtobool(buf, &bool_result);
+ if (ret)
+ return ret;
if (bool_result && page_poisoning_enabled())
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n");
if (bool_result)
static_branch_enable(&init_on_free);
else
static_branch_disable(&init_on_free);
- return ret;
+ return 0;
}
early_param("init_on_free", early_init_on_free);
@@ -1302,6 +1303,11 @@ static void free_pcppages_bulk(struct zone *zone, int count,
struct page *page, *tmp;
LIST_HEAD(head);
+ /*
+ * Ensure proper count is passed which otherwise would stuck in the
+ * below while (list_empty(list)) loop.
+ */
+ count = min(pcp->count, count);
while (count) {
struct list_head *list;
@@ -3362,9 +3368,16 @@ struct page *rmqueue(struct zone *preferred_zone,
struct page *page;
if (likely(order == 0)) {
- page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
+ /*
+ * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
+ * we need to skip it when CMA area isn't allowed.
+ */
+ if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
+ migratetype != MIGRATE_MOVABLE) {
+ page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
migratetype, alloc_flags);
- goto out;
+ goto out;
+ }
}
/*
@@ -3376,7 +3389,13 @@ struct page *rmqueue(struct zone *preferred_zone,
do {
page = NULL;
- if (alloc_flags & ALLOC_HARDER) {
+ /*
+ * order-0 request can reach here when the pcplist is skipped
+ * due to non-CMA allocation context. HIGHATOMIC area is
+ * reserved for high-order atomic allocation, so order-0
+ * request should skip it.
+ */
+ if (order > 0 && alloc_flags & ALLOC_HARDER) {
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (page)
trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -3722,8 +3741,8 @@ retry:
*/
no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
z = ac->preferred_zoneref;
- for_next_zone_zonelist_nodemask(zone, z, ac->zonelist,
- ac->highest_zoneidx, ac->nodemask) {
+ for_next_zone_zonelist_nodemask(zone, z, ac->highest_zoneidx,
+ ac->nodemask) {
struct page *page;
unsigned long mark;
@@ -3967,8 +3986,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
* success so it is time to admit defeat. We will skip the OOM killer
* because it is very likely that the caller has a more reasonable
* fallback than shooting a random task.
+ *
+ * The OOM killer may not free memory on a specific node.
*/
- if (gfp_mask & __GFP_RETRY_MAYFAIL)
+ if (gfp_mask & (__GFP_RETRY_MAYFAIL | __GFP_THISNODE))
goto out;
/* The OOM killer does not needlessly kill tasks for lowmem */
if (ac->highest_zoneidx < ZONE_NORMAL)
@@ -3985,10 +4006,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
* failures more gracefully we should just bail out here.
*/
- /* The OOM killer may not free memory on a specific node */
- if (gfp_mask & __GFP_THISNODE)
- goto out;
-
/* Exhausted what can be done so it's blame time */
if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
*did_some_progress = 1;
@@ -4236,13 +4253,12 @@ EXPORT_SYMBOL_GPL(fs_reclaim_release);
#endif
/* Perform direct synchronous page reclaim */
-static int
+static unsigned long
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac)
{
- int progress;
unsigned int noreclaim_flag;
- unsigned long pflags;
+ unsigned long pflags, progress;
cond_resched();
@@ -4821,12 +4837,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
*alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags);
- return true;
-}
-
-/* Determine whether to spread dirty pages and what the first usable zone */
-static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
-{
/* Dirty zone balancing only done in the fast path */
ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
@@ -4837,6 +4847,8 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
*/
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->highest_zoneidx, ac->nodemask);
+
+ return true;
}
/*
@@ -4865,8 +4877,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
return NULL;
- finalise_ac(gfp_mask, &ac);
-
/*
* Forbid the first pass from falling back to types that fragment
* memory until all local zones are considered.
@@ -4942,6 +4952,9 @@ void __free_pages(struct page *page, unsigned int order)
{
if (put_page_testzero(page))
free_the_page(page, order);
+ else if (!PageHead(page))
+ while (order-- > 0)
+ free_the_page(page + (1 << order), order);
}
EXPORT_SYMBOL(__free_pages);
@@ -5632,7 +5645,6 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
int n, val;
int min_val = INT_MAX;
int best_node = NUMA_NO_NODE;
- const struct cpumask *tmp = cpumask_of_node(0);
/* Use the local node if we haven't already */
if (!node_isset(node, *used_node_mask)) {
@@ -5653,8 +5665,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
val += (n < node);
/* Give preference to headless and unused nodes */
- tmp = cpumask_of_node(n);
- if (!cpumask_empty(tmp))
+ if (!cpumask_empty(cpumask_of_node(n)))
val += PENALTY_FOR_NODE_WITH_CPUS;
/* Slight preference for less loaded node */
@@ -5950,7 +5961,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
if (!r || *pfn >= memblock_region_memory_end_pfn(r)) {
- for_each_memblock(memory, r) {
+ for_each_mem_region(r) {
if (*pfn < memblock_region_memory_end_pfn(r))
break;
}
@@ -5970,7 +5981,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
* done. Non-atomic initialization, single-pass.
*/
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn, enum memmap_context context,
+ unsigned long start_pfn, enum meminit_context context,
struct vmem_altmap *altmap)
{
unsigned long pfn, end_pfn = start_pfn + size;
@@ -6002,7 +6013,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
* There can be holes in boot-time mem_map[]s handed to this
* function. They do not exist on hotplugged memory.
*/
- if (context == MEMMAP_EARLY) {
+ if (context == MEMINIT_EARLY) {
if (overlap_memmap_init(zone, &pfn))
continue;
if (defer_init(nid, pfn, end_pfn))
@@ -6011,7 +6022,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
page = pfn_to_page(pfn);
__init_single_page(page, pfn, zone, nid);
- if (context == MEMMAP_HOTPLUG)
+ if (context == MEMINIT_HOTPLUG)
__SetPageReserved(page);
/*
@@ -6094,7 +6105,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
* check here not to call set_pageblock_migratetype() against
* pfn out of zone.
*
- * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
+ * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
* because this is done early in section_activate()
*/
if (!(pfn & (pageblock_nr_pages - 1))) {
@@ -6132,7 +6143,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid,
if (end_pfn > start_pfn) {
size = end_pfn - start_pfn;
memmap_init_zone(size, nid, zone, start_pfn,
- MEMMAP_EARLY, NULL);
+ MEMINIT_EARLY, NULL);
}
}
}
@@ -6535,7 +6546,7 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
unsigned long start_pfn, end_pfn;
struct memblock_region *r;
- for_each_memblock(memory, r) {
+ for_each_mem_region(r) {
start_pfn = clamp(memblock_region_memory_base_pfn(r),
zone_start_pfn, zone_end_pfn);
end_pfn = clamp(memblock_region_memory_end_pfn(r),
@@ -6979,8 +6990,7 @@ static void __init init_unavailable_mem(void)
* Loop through unavailable ranges not covered by memblock.memory.
*/
pgcnt = 0;
- for_each_mem_range(i, &memblock.memory, NULL,
- NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) {
+ for_each_mem_range(i, &start, &end) {
if (next < start)
pgcnt += init_unavailable_range(PFN_DOWN(next),
PFN_UP(start));
@@ -7130,7 +7140,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
* options.
*/
if (movable_node_is_enabled()) {
- for_each_memblock(memory, r) {
+ for_each_mem_region(r) {
if (!memblock_is_hotpluggable(r))
continue;
@@ -7151,7 +7161,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
if (mirrored_kernelcore) {
bool mem_below_4gb_not_mirrored = false;
- for_each_memblock(memory, r) {
+ for_each_mem_region(r) {
if (memblock_is_mirror(r))
continue;
@@ -7886,9 +7896,11 @@ int __meminit init_per_zone_wmark_min(void)
setup_min_slab_ratio();
#endif
+ khugepaged_min_free_kbytes_update();
+
return 0;
}
-core_initcall(init_per_zone_wmark_min)
+postcore_initcall(init_per_zone_wmark_min)
/*
* min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
@@ -8213,14 +8225,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
{
unsigned long iter = 0;
unsigned long pfn = page_to_pfn(page);
-
- /*
- * TODO we could make this much more efficient by not checking every
- * page in the range if we know all of them are in MOVABLE_ZONE and
- * that the movable zone guarantees that pages are migratable but
- * the later is not the case right now unfortunatelly. E.g. movablecore
- * can still lead to having bootmem allocations in zone_movable.
- */
+ unsigned long offset = pfn % pageblock_nr_pages;
if (is_migrate_cma_page(page)) {
/*
@@ -8234,12 +8239,18 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
return page;
}
- for (; iter < pageblock_nr_pages; iter++) {
+ for (; iter < pageblock_nr_pages - offset; iter++) {
if (!pfn_valid_within(pfn + iter))
continue;
page = pfn_to_page(pfn + iter);
+ /*
+ * Both, bootmem allocations and memory holes are marked
+ * PG_reserved and are unmovable. We can even have unmovable
+ * allocations inside ZONE_MOVABLE, for example when
+ * specifying "movablecore".
+ */
if (PageReserved(page))
return page;
@@ -8313,14 +8324,6 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
* it. But now, memory offline itself doesn't call
* shrink_node_slabs() and it still to be fixed.
*/
- /*
- * If the page is not RAM, page_count()should be 0.
- * we don't need more check. This is an _used_ not-movable page.
- *
- * The problematic thing here is PG_reserved pages. PG_reserved
- * is set to both of a memory hole page and a _used_ kernel
- * page at boot.
- */
return page;
}
return NULL;