aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c141
1 files changed, 79 insertions, 62 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b37435c274cf..c5952749ad40 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -677,10 +677,8 @@ static inline int pindex_to_order(unsigned int pindex)
int order = pindex / MIGRATE_PCPTYPES;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (order > PAGE_ALLOC_COSTLY_ORDER) {
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
order = pageblock_order;
- VM_BUG_ON(order != pageblock_order);
- }
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
#endif
@@ -724,7 +722,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
void free_compound_page(struct page *page)
{
- mem_cgroup_uncharge(page);
+ mem_cgroup_uncharge(page_folio(page));
free_the_page(page, compound_order(page));
}
@@ -1312,8 +1310,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
- if (compound)
+ if (compound) {
ClearPageDoubleMap(page);
+ ClearPageHasHWPoisoned(page);
+ }
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
@@ -1428,14 +1428,8 @@ static inline void prefetch_buddy(struct page *page)
/*
* Frees a number of pages from the PCP lists
- * Assumes all pages on list are in same zone, and of same order.
+ * Assumes all pages on list are in same zone.
* count is the number of pages to free.
- *
- * If the zone was previously in an "all pages pinned" state then look to
- * see if this freeing clears that state.
- *
- * And clear the zone's pages_scanned counter, to hold off the "all pages are
- * pinned" detection logic.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
struct per_cpu_pages *pcp)
@@ -1589,7 +1583,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
struct zone *zone = &pgdat->node_zones[zid];
- if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
+ if (zone_spans_pfn(zone, pfn))
break;
}
__init_single_page(pfn_to_page(pfn), pfn, zid, nid);
@@ -3147,9 +3141,9 @@ static void drain_local_pages_wq(struct work_struct *work)
* cpu which is alright but we also have to make sure to not move to
* a different one.
*/
- preempt_disable();
+ migrate_disable();
drain_local_pages(drain->zone);
- preempt_enable();
+ migrate_enable();
}
/*
@@ -3966,6 +3960,8 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
}
#ifdef CONFIG_NUMA
+int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
+
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
@@ -4795,30 +4791,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
trace_reclaim_retry_zone(z, order, reclaimable,
available, min_wmark, *no_progress_loops, wmark);
if (wmark) {
- /*
- * If we didn't make any progress and have a lot of
- * dirty + writeback pages then we should wait for
- * an IO to complete to slow down the reclaim and
- * prevent from pre mature OOM
- */
- if (!did_some_progress) {
- unsigned long write_pending;
-
- write_pending = zone_page_state_snapshot(zone,
- NR_ZONE_WRITE_PENDING);
-
- if (2 * write_pending > reclaimable) {
- congestion_wait(BLK_RW_ASYNC, HZ/10);
- return true;
- }
- }
-
ret = true;
- goto out;
+ break;
}
}
-out:
/*
* Memory allocation/reclaim might be called from a WQ context and the
* current implementation of the WQ concurrency control doesn't
@@ -4914,6 +4891,19 @@ retry_cpuset:
if (!ac->preferred_zoneref->zone)
goto nopage;
+ /*
+ * Check for insane configurations where the cpuset doesn't contain
+ * any suitable zone to satisfy the request - e.g. non-movable
+ * GFP_HIGHUSER allocations from MOVABLE nodes only.
+ */
+ if (cpusets_insane_config() && (gfp_mask & __GFP_HARDWALL)) {
+ struct zoneref *z = first_zones_zonelist(ac->zonelist,
+ ac->highest_zoneidx,
+ &cpuset_current_mems_allowed);
+ if (!z->zone)
+ goto nopage;
+ }
+
if (alloc_flags & ALLOC_KSWAPD)
wake_all_kswapds(order, gfp_mask, ac);
@@ -5223,6 +5213,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(page_array && nr_pages - nr_populated == 0))
goto out;
+ /* Bulk allocator does not support memcg accounting. */
+ if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT))
+ goto failed;
+
/* Use the single page allocator for one page. */
if (nr_pages - nr_populated == 1)
goto failed;
@@ -5400,6 +5394,18 @@ out:
}
EXPORT_SYMBOL(__alloc_pages);
+struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+ nodemask_t *nodemask)
+{
+ struct page *page = __alloc_pages(gfp | __GFP_COMP, order,
+ preferred_nid, nodemask);
+
+ if (page && order > 1)
+ prep_transhuge_page(page);
+ return (struct folio *)page;
+}
+EXPORT_SYMBOL(__folio_alloc);
+
/*
* Common helper functions. Never use with __GFP_HIGHMEM because the returned
* address cannot represent highmem pages. Use alloc_pages and then kmap if
@@ -5612,8 +5618,8 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
unsigned int order = get_order(size);
unsigned long addr;
- if (WARN_ON_ONCE(gfp_mask & __GFP_COMP))
- gfp_mask &= ~__GFP_COMP;
+ if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
+ gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
addr = __get_free_pages(gfp_mask, order);
return make_alloc_exact(addr, order, size);
@@ -5637,8 +5643,8 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
unsigned int order = get_order(size);
struct page *p;
- if (WARN_ON_ONCE(gfp_mask & __GFP_COMP))
- gfp_mask &= ~__GFP_COMP;
+ if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
+ gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
p = alloc_pages_node(nid, gfp_mask, order);
if (!p)
@@ -5980,6 +5986,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
printk(KERN_CONT
"%s"
" free:%lukB"
+ " boost:%lukB"
" min:%lukB"
" low:%lukB"
" high:%lukB"
@@ -6000,6 +6007,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
"\n",
zone->name,
K(zone_page_state(zone, NR_FREE_PAGES)),
+ K(zone->watermark_boost),
K(min_wmark_pages(zone)),
K(low_wmark_pages(zone)),
K(high_wmark_pages(zone)),
@@ -6255,7 +6263,7 @@ static void build_zonelists(pg_data_t *pgdat)
*/
if (node_distance(local_node, node) !=
node_distance(local_node, prev_node))
- node_load[node] = load;
+ node_load[node] += load;
node_order[nr_nodes++] = node;
prev_node = node;
@@ -6264,6 +6272,10 @@ static void build_zonelists(pg_data_t *pgdat)
build_zonelists_in_node_order(pgdat, node_order, nr_nodes);
build_thisnode_zonelists(pgdat);
+ pr_info("Fallback order for Node %d: ", local_node);
+ for (node = 0; node < nr_nodes; node++)
+ pr_cont("%d ", node_order[node]);
+ pr_cont("\n");
}
#ifdef CONFIG_HAVE_MEMORYLESS_NODES
@@ -7389,6 +7401,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
{
+ int i;
+
pgdat_resize_init(pgdat);
pgdat_init_split_queue(pgdat);
@@ -7397,6 +7411,9 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait);
+ for (i = 0; i < NR_VMSCAN_THROTTLE; i++)
+ init_waitqueue_head(&pgdat->reclaim_wait[i]);
+
pgdat_page_ext_init(pgdat);
lruvec_init(&pgdat->__lruvec);
}
@@ -8126,8 +8143,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
}
if (pages && s)
- pr_info("Freeing %s memory: %ldK\n",
- s, pages << (PAGE_SHIFT - 10));
+ pr_info("Freeing %s memory: %ldK\n", s, K(pages));
return pages;
}
@@ -8172,14 +8188,13 @@ void __init mem_init_print_info(void)
", %luK highmem"
#endif
")\n",
- nr_free_pages() << (PAGE_SHIFT - 10),
- physpages << (PAGE_SHIFT - 10),
+ K(nr_free_pages()), K(physpages),
codesize >> 10, datasize >> 10, rosize >> 10,
(init_data_size + init_code_size) >> 10, bss_size >> 10,
- (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10),
- totalcma_pages << (PAGE_SHIFT - 10)
+ K(physpages - totalram_pages() - totalcma_pages),
+ K(totalcma_pages)
#ifdef CONFIG_HIGHMEM
- , totalhigh_pages() << (PAGE_SHIFT - 10)
+ , K(totalhigh_pages())
#endif
);
}
@@ -8452,7 +8467,7 @@ void setup_per_zone_wmarks(void)
* 8192MB: 11584k
* 16384MB: 16384k
*/
-int __meminit init_per_zone_wmark_min(void)
+void calculate_min_free_kbytes(void)
{
unsigned long lowmem_kbytes;
int new_min_free_kbytes;
@@ -8460,16 +8475,17 @@ int __meminit init_per_zone_wmark_min(void)
lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10);
new_min_free_kbytes = int_sqrt(lowmem_kbytes * 16);
- if (new_min_free_kbytes > user_min_free_kbytes) {
- min_free_kbytes = new_min_free_kbytes;
- if (min_free_kbytes < 128)
- min_free_kbytes = 128;
- if (min_free_kbytes > 262144)
- min_free_kbytes = 262144;
- } else {
+ if (new_min_free_kbytes > user_min_free_kbytes)
+ min_free_kbytes = clamp(new_min_free_kbytes, 128, 262144);
+ else
pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
new_min_free_kbytes, user_min_free_kbytes);
- }
+
+}
+
+int __meminit init_per_zone_wmark_min(void)
+{
+ calculate_min_free_kbytes();
setup_per_zone_wmarks();
refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
@@ -8756,7 +8772,8 @@ void *__init alloc_large_system_hash(const char *tablename,
} else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags);
virt = true;
- huge = is_vm_area_hugepages(table);
+ if (table)
+ huge = is_vm_area_hugepages(table);
} else {
/*
* If bucketsize is not a power-of-two, we may free
@@ -9353,21 +9370,21 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
}
#endif
+/*
+ * This function returns a stable result only if called under zone lock.
+ */
bool is_free_buddy_page(struct page *page)
{
- struct zone *zone = page_zone(page);
unsigned long pfn = page_to_pfn(page);
- unsigned long flags;
unsigned int order;
- spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct page *page_head = page - (pfn & ((1 << order) - 1));
- if (PageBuddy(page_head) && buddy_order(page_head) >= order)
+ if (PageBuddy(page_head) &&
+ buddy_order_unsafe(page_head) >= order)
break;
}
- spin_unlock_irqrestore(&zone->lock, flags);
return order < MAX_ORDER;
}