aboutsummaryrefslogtreecommitdiffstats
path: root/mm/compaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c270
1 files changed, 138 insertions, 132 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 79bfe0e06907..9affb2908304 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -15,11 +15,11 @@
#include <linux/backing-dev.h>
#include <linux/sysctl.h>
#include <linux/sysfs.h>
-#include <linux/balloon_compaction.h>
#include <linux/page-isolation.h>
#include <linux/kasan.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/page_owner.h>
#include "internal.h"
#ifdef CONFIG_COMPACTION
@@ -65,13 +65,27 @@ static unsigned long release_freepages(struct list_head *freelist)
static void map_pages(struct list_head *list)
{
- struct page *page;
+ unsigned int i, order, nr_pages;
+ struct page *page, *next;
+ LIST_HEAD(tmp_list);
+
+ list_for_each_entry_safe(page, next, list, lru) {
+ list_del(&page->lru);
+
+ order = page_private(page);
+ nr_pages = 1 << order;
- list_for_each_entry(page, list, lru) {
- arch_alloc_page(page, 0);
- kernel_map_pages(page, 1, 1);
- kasan_alloc_pages(page, 0);
+ post_alloc_hook(page, order, __GFP_MOVABLE);
+ if (order)
+ split_page(page, order);
+
+ for (i = 0; i < nr_pages; i++) {
+ list_add(&page->lru, &tmp_list);
+ page++;
+ }
}
+
+ list_splice(&tmp_list, list);
}
static inline bool migrate_async_suitable(int migratetype)
@@ -81,6 +95,44 @@ static inline bool migrate_async_suitable(int migratetype)
#ifdef CONFIG_COMPACTION
+int PageMovable(struct page *page)
+{
+ struct address_space *mapping;
+
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ if (!__PageMovable(page))
+ return 0;
+
+ mapping = page_mapping(page);
+ if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(PageMovable);
+
+void __SetPageMovable(struct page *page, struct address_space *mapping)
+{
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
+ page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
+}
+EXPORT_SYMBOL(__SetPageMovable);
+
+void __ClearPageMovable(struct page *page)
+{
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(!PageMovable(page), page);
+ /*
+ * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
+ * flag so that VM can catch up released page by driver after isolation.
+ * With it, VM migration doesn't try to put it back.
+ */
+ page->mapping = (void *)((unsigned long)page->mapping &
+ PAGE_MAPPING_MOVABLE);
+}
+EXPORT_SYMBOL(__ClearPageMovable);
+
/* Do not skip compaction more than 64 times */
#define COMPACT_MAX_DEFER_SHIFT 6
@@ -279,7 +331,7 @@ static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
{
if (cc->mode == MIGRATE_ASYNC) {
if (!spin_trylock_irqsave(lock, *flags)) {
- cc->contended = COMPACT_CONTENDED_LOCK;
+ cc->contended = true;
return false;
}
} else {
@@ -313,13 +365,13 @@ static bool compact_unlock_should_abort(spinlock_t *lock,
}
if (fatal_signal_pending(current)) {
- cc->contended = COMPACT_CONTENDED_SCHED;
+ cc->contended = true;
return true;
}
if (need_resched()) {
if (cc->mode == MIGRATE_ASYNC) {
- cc->contended = COMPACT_CONTENDED_SCHED;
+ cc->contended = true;
return true;
}
cond_resched();
@@ -342,7 +394,7 @@ static inline bool compact_should_abort(struct compact_control *cc)
/* async compaction aborts if contended */
if (need_resched()) {
if (cc->mode == MIGRATE_ASYNC) {
- cc->contended = COMPACT_CONTENDED_SCHED;
+ cc->contended = true;
return true;
}
@@ -368,12 +420,13 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
unsigned long flags = 0;
bool locked = false;
unsigned long blockpfn = *start_pfn;
+ unsigned int order;
cursor = pfn_to_page(blockpfn);
/* Isolate free pages. */
for (; blockpfn < end_pfn; blockpfn++, cursor++) {
- int isolated, i;
+ int isolated;
struct page *page = cursor;
/*
@@ -439,17 +492,17 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
goto isolate_fail;
}
- /* Found a free page, break it into order-0 pages */
- isolated = split_free_page(page);
+ /* Found a free page, will break it into order-0 pages */
+ order = page_order(page);
+ isolated = __isolate_free_page(page, order);
if (!isolated)
break;
+ set_page_private(page, order);
total_isolated += isolated;
cc->nr_freepages += isolated;
- for (i = 0; i < isolated; i++) {
- list_add(&page->lru, freelist);
- page++;
- }
+ list_add_tail(&page->lru, freelist);
+
if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
blockpfn += isolated;
break;
@@ -568,7 +621,7 @@ isolate_freepages_range(struct compact_control *cc,
*/
}
- /* split_free_page does not map the pages */
+ /* __isolate_free_page() does not map the pages */
map_pages(&freelist);
if (pfn < end_pfn) {
@@ -593,8 +646,8 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
list_for_each_entry(page, &cc->migratepages, lru)
count[!!page_is_file_cache(page)]++;
- mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
- mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+ mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]);
+ mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]);
}
/* Similar to reclaim, but different enough that they don't share logic */
@@ -602,12 +655,12 @@ static bool too_many_isolated(struct zone *zone)
{
unsigned long active, inactive, isolated;
- inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
- zone_page_state(zone, NR_INACTIVE_ANON);
- active = zone_page_state(zone, NR_ACTIVE_FILE) +
- zone_page_state(zone, NR_ACTIVE_ANON);
- isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
- zone_page_state(zone, NR_ISOLATED_ANON);
+ inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
+ node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
+ active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) +
+ node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON);
+ isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) +
+ node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON);
return isolated > (inactive + active) / 2;
}
@@ -670,7 +723,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/* Time to isolate some pages for migration */
for (; low_pfn < end_pfn; low_pfn++) {
- bool is_lru;
if (skip_on_failure && low_pfn >= next_skip_pfn) {
/*
@@ -700,7 +752,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* if contended.
*/
if (!(low_pfn % SWAP_CLUSTER_MAX)
- && compact_unlock_should_abort(&zone->lru_lock, flags,
+ && compact_unlock_should_abort(zone_lru_lock(zone), flags,
&locked, cc))
break;
@@ -733,21 +785,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
}
/*
- * Check may be lockless but that's ok as we recheck later.
- * It's possible to migrate LRU pages and balloon pages
- * Skip any other type of page
- */
- is_lru = PageLRU(page);
- if (!is_lru) {
- if (unlikely(balloon_page_movable(page))) {
- if (balloon_page_isolate(page)) {
- /* Successfully isolated */
- goto isolate_success;
- }
- }
- }
-
- /*
* Regardless of being on LRU, compound pages such as THP and
* hugetlbfs are not to be compacted. We can potentially save
* a lot of iterations if we skip them at once. The check is
@@ -763,8 +800,30 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
goto isolate_fail;
}
- if (!is_lru)
+ /*
+ * Check may be lockless but that's ok as we recheck later.
+ * It's possible to migrate LRU and non-lru movable pages.
+ * Skip any other type of page
+ */
+ if (!PageLRU(page)) {
+ /*
+ * __PageMovable can return false positive so we need
+ * to verify it under page_lock.
+ */
+ if (unlikely(__PageMovable(page)) &&
+ !PageIsolated(page)) {
+ if (locked) {
+ spin_unlock_irqrestore(zone_lru_lock(zone),
+ flags);
+ locked = false;
+ }
+
+ if (isolate_movable_page(page, isolate_mode))
+ goto isolate_success;
+ }
+
goto isolate_fail;
+ }
/*
* Migration will fail if an anonymous page is pinned in memory,
@@ -777,7 +836,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/* If we already hold the lock, we can skip some rechecking */
if (!locked) {
- locked = compact_trylock_irqsave(&zone->lru_lock,
+ locked = compact_trylock_irqsave(zone_lru_lock(zone),
&flags, cc);
if (!locked)
break;
@@ -797,7 +856,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
}
}
- lruvec = mem_cgroup_page_lruvec(page, zone);
+ lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
/* Try isolate the page */
if (__isolate_lru_page(page, isolate_mode) != 0)
@@ -840,7 +899,7 @@ isolate_fail:
*/
if (nr_isolated) {
if (locked) {
- spin_unlock_irqrestore(&zone->lru_lock, flags);
+ spin_unlock_irqrestore(zone_lru_lock(zone), flags);
locked = false;
}
acct_isolated(zone, cc);
@@ -868,7 +927,7 @@ isolate_fail:
low_pfn = end_pfn;
if (locked)
- spin_unlock_irqrestore(&zone->lru_lock, flags);
+ spin_unlock_irqrestore(zone_lru_lock(zone), flags);
/*
* Update the pageblock-skip information and cached scanner pfn,
@@ -1009,8 +1068,6 @@ static void isolate_freepages(struct compact_control *cc)
block_end_pfn = block_start_pfn,
block_start_pfn -= pageblock_nr_pages,
isolate_start_pfn = block_start_pfn) {
- unsigned long isolated;
-
/*
* This can iterate a massively long zone without finding any
* suitable migration targets, so periodically check if we need
@@ -1034,40 +1091,34 @@ static void isolate_freepages(struct compact_control *cc)
continue;
/* Found a block suitable for isolating free pages from. */
- isolated = isolate_freepages_block(cc, &isolate_start_pfn,
- block_end_pfn, freelist, false);
- /* If isolation failed early, do not continue needlessly */
- if (!isolated && isolate_start_pfn < block_end_pfn &&
- cc->nr_migratepages > cc->nr_freepages)
- break;
+ isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
+ freelist, false);
/*
- * If we isolated enough freepages, or aborted due to async
- * compaction being contended, terminate the loop.
- * Remember where the free scanner should restart next time,
- * which is where isolate_freepages_block() left off.
- * But if it scanned the whole pageblock, isolate_start_pfn
- * now points at block_end_pfn, which is the start of the next
- * pageblock.
- * In that case we will however want to restart at the start
- * of the previous pageblock.
+ * If we isolated enough freepages, or aborted due to lock
+ * contention, terminate.
*/
if ((cc->nr_freepages >= cc->nr_migratepages)
|| cc->contended) {
- if (isolate_start_pfn >= block_end_pfn)
+ if (isolate_start_pfn >= block_end_pfn) {
+ /*
+ * Restart at previous pageblock if more
+ * freepages can be isolated next time.
+ */
isolate_start_pfn =
block_start_pfn - pageblock_nr_pages;
+ }
break;
- } else {
+ } else if (isolate_start_pfn < block_end_pfn) {
/*
- * isolate_freepages_block() should not terminate
- * prematurely unless contended, or isolated enough
+ * If isolation failed early, do not continue
+ * needlessly.
*/
- VM_BUG_ON(isolate_start_pfn < block_end_pfn);
+ break;
}
}
- /* split_free_page does not map the pages */
+ /* __isolate_free_page() does not map the pages */
map_pages(freelist);
/*
@@ -1149,7 +1200,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
struct page *page;
const isolate_mode_t isolate_mode =
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
- (cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0);
+ (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
/*
* Start at where we last stopped, or beginning of the zone as
@@ -1568,14 +1619,11 @@ out:
trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
cc->free_pfn, end_pfn, sync, ret);
- if (ret == COMPACT_CONTENDED)
- ret = COMPACT_PARTIAL;
-
return ret;
}
static enum compact_result compact_zone_order(struct zone *zone, int order,
- gfp_t gfp_mask, enum migrate_mode mode, int *contended,
+ gfp_t gfp_mask, enum compact_priority prio,
unsigned int alloc_flags, int classzone_idx)
{
enum compact_result ret;
@@ -1585,7 +1633,8 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
.order = order,
.gfp_mask = gfp_mask,
.zone = zone,
- .mode = mode,
+ .mode = (prio == COMPACT_PRIO_ASYNC) ?
+ MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT,
.alloc_flags = alloc_flags,
.classzone_idx = classzone_idx,
.direct_compaction = true,
@@ -1598,7 +1647,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
- *contended = cc.contended;
return ret;
}
@@ -1611,50 +1659,38 @@ int sysctl_extfrag_threshold = 500;
* @alloc_flags: The allocation flags of the current allocation
* @ac: The context of current allocation
* @mode: The migration mode for async, sync light, or sync migration
- * @contended: Return value that determines if compaction was aborted due to
- * need_resched() or lock contention
*
* This is the main entry point for direct page compaction.
*/
enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
- enum migrate_mode mode, int *contended)
+ enum compact_priority prio)
{
int may_enter_fs = gfp_mask & __GFP_FS;
int may_perform_io = gfp_mask & __GFP_IO;
struct zoneref *z;
struct zone *zone;
enum compact_result rc = COMPACT_SKIPPED;
- int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
-
- *contended = COMPACT_CONTENDED_NONE;
/* Check if the GFP flags allow compaction */
- if (!order || !may_enter_fs || !may_perform_io)
+ if (!may_enter_fs || !may_perform_io)
return COMPACT_SKIPPED;
- trace_mm_compaction_try_to_compact_pages(order, gfp_mask, mode);
+ trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
/* Compact each zone in the list */
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
ac->nodemask) {
enum compact_result status;
- int zone_contended;
if (compaction_deferred(zone, order)) {
rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
continue;
}
- status = compact_zone_order(zone, order, gfp_mask, mode,
- &zone_contended, alloc_flags,
- ac_classzone_idx(ac));
+ status = compact_zone_order(zone, order, gfp_mask, prio,
+ alloc_flags, ac_classzone_idx(ac));
rc = max(status, rc);
- /*
- * It takes at least one zone that wasn't lock contended
- * to clear all_zones_contended.
- */
- all_zones_contended &= zone_contended;
/* If a normal allocation would succeed, stop compacting */
if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
@@ -1666,59 +1702,29 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
* succeeds in this zone.
*/
compaction_defer_reset(zone, order, false);
- /*
- * It is possible that async compaction aborted due to
- * need_resched() and the watermarks were ok thanks to
- * somebody else freeing memory. The allocation can
- * however still fail so we better signal the
- * need_resched() contention anyway (this will not
- * prevent the allocation attempt).
- */
- if (zone_contended == COMPACT_CONTENDED_SCHED)
- *contended = COMPACT_CONTENDED_SCHED;
- goto break_loop;
+ break;
}
- if (mode != MIGRATE_ASYNC && (status == COMPACT_COMPLETE ||
- status == COMPACT_PARTIAL_SKIPPED)) {
+ if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
+ status == COMPACT_PARTIAL_SKIPPED))
/*
* We think that allocation won't succeed in this zone
* so we defer compaction there. If it ends up
* succeeding after all, it will be reset.
*/
defer_compaction(zone, order);
- }
/*
* We might have stopped compacting due to need_resched() in
* async compaction, or due to a fatal signal detected. In that
- * case do not try further zones and signal need_resched()
- * contention.
- */
- if ((zone_contended == COMPACT_CONTENDED_SCHED)
- || fatal_signal_pending(current)) {
- *contended = COMPACT_CONTENDED_SCHED;
- goto break_loop;
- }
-
- continue;
-break_loop:
- /*
- * We might not have tried all the zones, so be conservative
- * and assume they are not all lock contended.
+ * case do not try further zones
*/
- all_zones_contended = 0;
- break;
+ if ((prio == COMPACT_PRIO_ASYNC && need_resched())
+ || fatal_signal_pending(current))
+ break;
}
- /*
- * If at least one zone wasn't deferred or skipped, we report if all
- * zones that were tried were lock contended.
- */
- if (rc > COMPACT_INACTIVE && all_zones_contended)
- *contended = COMPACT_CONTENDED_LOCK;
-
return rc;
}