aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c756
1 files changed, 218 insertions, 538 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1a518684a32f..661576324c7f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -53,24 +53,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/vmscan.h>
-/*
- * reclaim_mode determines how the inactive list is shrunk
- * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages
- * RECLAIM_MODE_ASYNC: Do not block
- * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback
- * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference
- * page from the LRU and reclaim all pages within a
- * naturally aligned range
- * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of
- * order-0 pages and then compact the zone
- */
-typedef unsigned __bitwise__ reclaim_mode_t;
-#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u)
-#define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u)
-#define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u)
-#define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u)
-#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u)
-
struct scan_control {
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
@@ -96,11 +78,8 @@ struct scan_control {
int order;
- /*
- * Intend to reclaim enough continuous memory rather than reclaim
- * enough amount of memory. i.e, mode for high order allocation.
- */
- reclaim_mode_t reclaim_mode;
+ /* Scan (total_size >> priority) pages at once */
+ int priority;
/*
* The memory cgroup that hit its limit and as a result is the
@@ -115,11 +94,6 @@ struct scan_control {
nodemask_t *nodemask;
};
-struct mem_cgroup_zone {
- struct mem_cgroup *mem_cgroup;
- struct zone *zone;
-};
-
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
@@ -164,44 +138,21 @@ static bool global_reclaim(struct scan_control *sc)
{
return !sc->target_mem_cgroup;
}
-
-static bool scanning_global_lru(struct mem_cgroup_zone *mz)
-{
- return !mz->mem_cgroup;
-}
#else
static bool global_reclaim(struct scan_control *sc)
{
return true;
}
-
-static bool scanning_global_lru(struct mem_cgroup_zone *mz)
-{
- return true;
-}
#endif
-static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
+static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
{
- if (!scanning_global_lru(mz))
- return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
+ if (!mem_cgroup_disabled())
+ return mem_cgroup_get_lru_size(lruvec, lru);
- return &mz->zone->reclaim_stat;
+ return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
}
-static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
- enum lru_list lru)
-{
- if (!scanning_global_lru(mz))
- return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
- zone_to_nid(mz->zone),
- zone_idx(mz->zone),
- BIT(lru));
-
- return zone_page_state(mz->zone, NR_LRU_BASE + lru);
-}
-
-
/*
* Add a shrinker callback to be called from the vm
*/
@@ -364,39 +315,6 @@ out:
return ret;
}
-static void set_reclaim_mode(int priority, struct scan_control *sc,
- bool sync)
-{
- reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC;
-
- /*
- * Initially assume we are entering either lumpy reclaim or
- * reclaim/compaction.Depending on the order, we will either set the
- * sync mode or just reclaim order-0 pages later.
- */
- if (COMPACTION_BUILD)
- sc->reclaim_mode = RECLAIM_MODE_COMPACTION;
- else
- sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM;
-
- /*
- * Avoid using lumpy reclaim or reclaim/compaction if possible by
- * restricting when its set to either costly allocations or when
- * under memory pressure
- */
- if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
- sc->reclaim_mode |= syncmode;
- else if (sc->order && priority < DEF_PRIORITY - 2)
- sc->reclaim_mode |= syncmode;
- else
- sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
-}
-
-static void reset_reclaim_mode(struct scan_control *sc)
-{
- sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
-}
-
static inline int is_page_cache_freeable(struct page *page)
{
/*
@@ -416,10 +334,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
return 1;
if (bdi == current->backing_dev_info)
return 1;
-
- /* lumpy reclaim for hugepage often need a lot of write */
- if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
- return 1;
return 0;
}
@@ -523,8 +437,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
- trace_mm_vmscan_writepage(page,
- trace_reclaim_flags(page, sc->reclaim_mode));
+ trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
inc_zone_page_state(page, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
@@ -701,19 +614,15 @@ enum page_references {
};
static enum page_references page_check_references(struct page *page,
- struct mem_cgroup_zone *mz,
struct scan_control *sc)
{
int referenced_ptes, referenced_page;
unsigned long vm_flags;
- referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
+ referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
+ &vm_flags);
referenced_page = TestClearPageReferenced(page);
- /* Lumpy reclaim - ignore references */
- if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
- return PAGEREF_RECLAIM;
-
/*
* Mlock lost the isolation race with us. Let try_to_unmap()
* move the page to the unevictable list.
@@ -722,7 +631,7 @@ static enum page_references page_check_references(struct page *page,
return PAGEREF_RECLAIM;
if (referenced_ptes) {
- if (PageAnon(page))
+ if (PageSwapBacked(page))
return PAGEREF_ACTIVATE;
/*
* All mapped pages start out with page table
@@ -763,9 +672,8 @@ static enum page_references page_check_references(struct page *page,
* shrink_page_list() returns the number of reclaimed pages
*/
static unsigned long shrink_page_list(struct list_head *page_list,
- struct mem_cgroup_zone *mz,
+ struct zone *zone,
struct scan_control *sc,
- int priority,
unsigned long *ret_nr_dirty,
unsigned long *ret_nr_writeback)
{
@@ -794,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto keep;
VM_BUG_ON(PageActive(page));
- VM_BUG_ON(page_zone(page) != mz->zone);
+ VM_BUG_ON(page_zone(page) != zone);
sc->nr_scanned++;
@@ -813,22 +721,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (PageWriteback(page)) {
nr_writeback++;
- /*
- * Synchronous reclaim cannot queue pages for
- * writeback due to the possibility of stack overflow
- * but if it encounters a page under writeback, wait
- * for the IO to complete.
- */
- if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) &&
- may_enter_fs)
- wait_on_page_writeback(page);
- else {
- unlock_page(page);
- goto keep_lumpy;
- }
+ unlock_page(page);
+ goto keep;
}
- references = page_check_references(page, mz, sc);
+ references = page_check_references(page, sc);
switch (references) {
case PAGEREF_ACTIVATE:
goto activate_locked;
@@ -879,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* unless under significant pressure.
*/
if (page_is_file_cache(page) &&
- (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) {
+ (!current_is_kswapd() ||
+ sc->priority >= DEF_PRIORITY - 2)) {
/*
* Immediately reclaim when written back.
* Similar in principal to deactivate_page()
@@ -908,7 +806,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto activate_locked;
case PAGE_SUCCESS:
if (PageWriteback(page))
- goto keep_lumpy;
+ goto keep;
if (PageDirty(page))
goto keep;
@@ -994,7 +892,6 @@ cull_mlocked:
try_to_free_swap(page);
unlock_page(page);
putback_lru_page(page);
- reset_reclaim_mode(sc);
continue;
activate_locked:
@@ -1007,8 +904,6 @@ activate_locked:
keep_locked:
unlock_page(page);
keep:
- reset_reclaim_mode(sc);
-keep_lumpy:
list_add(&page->lru, &ret_pages);
VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
}
@@ -1020,7 +915,7 @@ keep_lumpy:
* will encounter the same problem
*/
if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
- zone_set_flag(mz->zone, ZONE_CONGESTED);
+ zone_set_flag(zone, ZONE_CONGESTED);
free_hot_cold_page_list(&free_pages, 1);
@@ -1041,34 +936,15 @@ keep_lumpy:
*
* returns 0 on success, -ve errno on failure.
*/
-int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
+int __isolate_lru_page(struct page *page, isolate_mode_t mode)
{
- bool all_lru_mode;
int ret = -EINVAL;
/* Only take pages on the LRU. */
if (!PageLRU(page))
return ret;
- all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) ==
- (ISOLATE_ACTIVE|ISOLATE_INACTIVE);
-
- /*
- * When checking the active state, we need to be sure we are
- * dealing with comparible boolean values. Take the logical not
- * of each.
- */
- if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE))
- return ret;
-
- if (!all_lru_mode && !!page_is_file_cache(page) != file)
- return ret;
-
- /*
- * When this function is being called for lumpy reclaim, we
- * initially look into all LRU pages, active, inactive and
- * unevictable; only give shrink_page_list evictable pages.
- */
+ /* Do not give back unevictable pages for compaction */
if (PageUnevictable(page))
return ret;
@@ -1135,54 +1011,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
- * @mz: The mem_cgroup_zone to pull pages from.
+ * @lruvec: The LRU vector to pull pages from.
* @dst: The temp list to put pages on to.
* @nr_scanned: The number of pages that were scanned.
* @sc: The scan_control struct for this reclaim session
* @mode: One of the LRU isolation modes
- * @active: True [1] if isolating active pages
- * @file: True [1] if isolating file [!anon] pages
+ * @lru: LRU list id for isolating
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
- struct mem_cgroup_zone *mz, struct list_head *dst,
+ struct lruvec *lruvec, struct list_head *dst,
unsigned long *nr_scanned, struct scan_control *sc,
- isolate_mode_t mode, int active, int file)
+ isolate_mode_t mode, enum lru_list lru)
{
- struct lruvec *lruvec;
- struct list_head *src;
+ struct list_head *src = &lruvec->lists[lru];
unsigned long nr_taken = 0;
- unsigned long nr_lumpy_taken = 0;
- unsigned long nr_lumpy_dirty = 0;
- unsigned long nr_lumpy_failed = 0;
unsigned long scan;
- int lru = LRU_BASE;
-
- lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
- if (active)
- lru += LRU_ACTIVE;
- if (file)
- lru += LRU_FILE;
- src = &lruvec->lists[lru];
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
struct page *page;
- unsigned long pfn;
- unsigned long end_pfn;
- unsigned long page_pfn;
- int zone_id;
+ int nr_pages;
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
VM_BUG_ON(!PageLRU(page));
- switch (__isolate_lru_page(page, mode, file)) {
+ switch (__isolate_lru_page(page, mode)) {
case 0:
- mem_cgroup_lru_del(page);
+ nr_pages = hpage_nr_pages(page);
+ mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
list_move(&page->lru, dst);
- nr_taken += hpage_nr_pages(page);
+ nr_taken += nr_pages;
break;
case -EBUSY:
@@ -1193,93 +1054,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
default:
BUG();
}
-
- if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM))
- continue;
-
- /*
- * Attempt to take all pages in the order aligned region
- * surrounding the tag page. Only take those pages of
- * the same active state as that tag page. We may safely
- * round the target page pfn down to the requested order
- * as the mem_map is guaranteed valid out to MAX_ORDER,
- * where that page is in a different zone we will detect
- * it from its zone id and abort this block scan.
- */
- zone_id = page_zone_id(page);
- page_pfn = page_to_pfn(page);
- pfn = page_pfn & ~((1 << sc->order) - 1);
- end_pfn = pfn + (1 << sc->order);
- for (; pfn < end_pfn; pfn++) {
- struct page *cursor_page;
-
- /* The target page is in the block, ignore it. */
- if (unlikely(pfn == page_pfn))
- continue;
-
- /* Avoid holes within the zone. */
- if (unlikely(!pfn_valid_within(pfn)))
- break;
-
- cursor_page = pfn_to_page(pfn);
-
- /* Check that we have not crossed a zone boundary. */
- if (unlikely(page_zone_id(cursor_page) != zone_id))
- break;
-
- /*
- * If we don't have enough swap space, reclaiming of
- * anon page which don't already have a swap slot is
- * pointless.
- */
- if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
- !PageSwapCache(cursor_page))
- break;
-
- if (__isolate_lru_page(cursor_page, mode, file) == 0) {
- unsigned int isolated_pages;
-
- mem_cgroup_lru_del(cursor_page);
- list_move(&cursor_page->lru, dst);
- isolated_pages = hpage_nr_pages(cursor_page);
- nr_taken += isolated_pages;
- nr_lumpy_taken += isolated_pages;
- if (PageDirty(cursor_page))
- nr_lumpy_dirty += isolated_pages;
- scan++;
- pfn += isolated_pages - 1;
- } else {
- /*
- * Check if the page is freed already.
- *
- * We can't use page_count() as that
- * requires compound_head and we don't
- * have a pin on the page here. If a
- * page is tail, we may or may not
- * have isolated the head, so assume
- * it's not free, it'd be tricky to
- * track the head status without a
- * page pin.
- */
- if (!PageTail(cursor_page) &&
- !atomic_read(&cursor_page->_count))
- continue;
- break;
- }
- }
-
- /* If we break out of the loop above, lumpy reclaim failed */
- if (pfn < end_pfn)
- nr_lumpy_failed++;
}
*nr_scanned = scan;
-
- trace_mm_vmscan_lru_isolate(sc->order,
- nr_to_scan, scan,
- nr_taken,
- nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
- mode, file);
+ trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
+ nr_taken, mode, is_file_lru(lru));
return nr_taken;
}
@@ -1316,15 +1095,16 @@ int isolate_lru_page(struct page *page)
if (PageLRU(page)) {
struct zone *zone = page_zone(page);
+ struct lruvec *lruvec;
spin_lock_irq(&zone->lru_lock);
+ lruvec = mem_cgroup_page_lruvec(page, zone);
if (PageLRU(page)) {
int lru = page_lru(page);
- ret = 0;
get_page(page);
ClearPageLRU(page);
-
- del_page_from_lru_list(zone, page, lru);
+ del_page_from_lru_list(page, lruvec, lru);
+ ret = 0;
}
spin_unlock_irq(&zone->lru_lock);
}
@@ -1357,11 +1137,10 @@ static int too_many_isolated(struct zone *zone, int file,
}
static noinline_for_stack void
-putback_inactive_pages(struct mem_cgroup_zone *mz,
- struct list_head *page_list)
+putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
{
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
- struct zone *zone = mz->zone;
+ struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
+ struct zone *zone = lruvec_zone(lruvec);
LIST_HEAD(pages_to_free);
/*
@@ -1379,9 +1158,13 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
spin_lock_irq(&zone->lru_lock);
continue;
}
+
+ lruvec = mem_cgroup_page_lruvec(page, zone);
+
SetPageLRU(page);
lru = page_lru(page);
- add_page_to_lru_list(zone, page, lru);
+ add_page_to_lru_list(page, lruvec, lru);
+
if (is_active_lru(lru)) {
int file = is_file_lru(lru);
int numpages = hpage_nr_pages(page);
@@ -1390,7 +1173,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
if (put_page_testzero(page)) {
__ClearPageLRU(page);
__ClearPageActive(page);
- del_page_from_lru_list(zone, page, lru);
+ del_page_from_lru_list(page, lruvec, lru);
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
@@ -1407,112 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
list_splice(&pages_to_free, page_list);
}
-static noinline_for_stack void
-update_isolated_counts(struct mem_cgroup_zone *mz,
- struct list_head *page_list,
- unsigned long *nr_anon,
- unsigned long *nr_file)
-{
- struct zone *zone = mz->zone;
- unsigned int count[NR_LRU_LISTS] = { 0, };
- unsigned long nr_active = 0;
- struct page *page;
- int lru;
-
- /*
- * Count pages and clear active flags
- */
- list_for_each_entry(page, page_list, lru) {
- int numpages = hpage_nr_pages(page);
- lru = page_lru_base_type(page);
- if (PageActive(page)) {
- lru += LRU_ACTIVE;
- ClearPageActive(page);
- nr_active += numpages;
- }
- count[lru] += numpages;
- }
-
- preempt_disable();
- __count_vm_events(PGDEACTIVATE, nr_active);
-
- __mod_zone_page_state(zone, NR_ACTIVE_FILE,
- -count[LRU_ACTIVE_FILE]);
- __mod_zone_page_state(zone, NR_INACTIVE_FILE,
- -count[LRU_INACTIVE_FILE]);
- __mod_zone_page_state(zone, NR_ACTIVE_ANON,
- -count[LRU_ACTIVE_ANON]);
- __mod_zone_page_state(zone, NR_INACTIVE_ANON,
- -count[LRU_INACTIVE_ANON]);
-
- *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
- *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
-
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
- preempt_enable();
-}
-
-/*
- * Returns true if a direct reclaim should wait on pages under writeback.
- *
- * If we are direct reclaiming for contiguous pages and we do not reclaim
- * everything in the list, try again and wait for writeback IO to complete.
- * This will stall high-order allocations noticeably. Only do that when really
- * need to free the pages under high memory pressure.
- */
-static inline bool should_reclaim_stall(unsigned long nr_taken,
- unsigned long nr_freed,
- int priority,
- struct scan_control *sc)
-{
- int lumpy_stall_priority;
-
- /* kswapd should not stall on sync IO */
- if (current_is_kswapd())
- return false;
-
- /* Only stall on lumpy reclaim */
- if (sc->reclaim_mode & RECLAIM_MODE_SINGLE)
- return false;
-
- /* If we have reclaimed everything on the isolated list, no stall */
- if (nr_freed == nr_taken)
- return false;
-
- /*
- * For high-order allocations, there are two stall thresholds.
- * High-cost allocations stall immediately where as lower
- * order allocations such as stacks require the scanning
- * priority to be much higher before stalling.
- */
- if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
- lumpy_stall_priority = DEF_PRIORITY;
- else
- lumpy_stall_priority = DEF_PRIORITY / 3;
-
- return priority <= lumpy_stall_priority;
-}
-
/*
* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
* of reclaimed pages
*/
static noinline_for_stack unsigned long
-shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
- struct scan_control *sc, int priority, int file)
+shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+ struct scan_control *sc, enum lru_list lru)
{
LIST_HEAD(page_list);
unsigned long nr_scanned;
unsigned long nr_reclaimed = 0;
unsigned long nr_taken;
- unsigned long nr_anon;
- unsigned long nr_file;
unsigned long nr_dirty = 0;
unsigned long nr_writeback = 0;
- isolate_mode_t isolate_mode = ISOLATE_INACTIVE;
- struct zone *zone = mz->zone;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ isolate_mode_t isolate_mode = 0;
+ int file = is_file_lru(lru);
+ struct zone *zone = lruvec_zone(lruvec);
+ struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
while (unlikely(too_many_isolated(zone, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1522,10 +1217,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
return SWAP_CLUSTER_MAX;
}
- set_reclaim_mode(priority, sc, false);
- if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
- isolate_mode |= ISOLATE_ACTIVE;
-
lru_add_drain();
if (!sc->may_unmap)
@@ -1535,47 +1226,43 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
spin_lock_irq(&zone->lru_lock);
- nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned,
- sc, isolate_mode, 0, file);
+ nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
+ &nr_scanned, sc, isolate_mode, lru);
+
+ __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
+
if (global_reclaim(sc)) {
zone->pages_scanned += nr_scanned;
if (current_is_kswapd())
- __count_zone_vm_events(PGSCAN_KSWAPD, zone,
- nr_scanned);
+ __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
else
- __count_zone_vm_events(PGSCAN_DIRECT, zone,
- nr_scanned);
+ __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned);
}
spin_unlock_irq(&zone->lru_lock);
if (nr_taken == 0)
return 0;
- update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
-
- nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
+ nr_reclaimed = shrink_page_list(&page_list, zone, sc,
&nr_dirty, &nr_writeback);
- /* Check if we should syncronously wait for writeback */
- if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
- set_reclaim_mode(priority, sc, true);
- nr_reclaimed += shrink_page_list(&page_list, mz, sc,
- priority, &nr_dirty, &nr_writeback);
- }
-
spin_lock_irq(&zone->lru_lock);
- reclaim_stat->recent_scanned[0] += nr_anon;
- reclaim_stat->recent_scanned[1] += nr_file;
+ reclaim_stat->recent_scanned[file] += nr_taken;
- if (current_is_kswapd())
- __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
- __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
+ if (global_reclaim(sc)) {
+ if (current_is_kswapd())
+ __count_zone_vm_events(PGSTEAL_KSWAPD, zone,
+ nr_reclaimed);
+ else
+ __count_zone_vm_events(PGSTEAL_DIRECT, zone,
+ nr_reclaimed);
+ }
- putback_inactive_pages(mz, &page_list);
+ putback_inactive_pages(lruvec, &page_list);
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
@@ -1604,14 +1291,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
* DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any
* isolated page is PageWriteback
*/
- if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority)))
+ if (nr_writeback && nr_writeback >=
+ (nr_taken >> (DEF_PRIORITY - sc->priority)))
wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
zone_idx(zone),
nr_scanned, nr_reclaimed,
- priority,
- trace_shrink_flags(file, sc->reclaim_mode));
+ sc->priority,
+ trace_shrink_flags(file));
return nr_reclaimed;
}
@@ -1633,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
* But we had to alter page->flags anyway.
*/
-static void move_active_pages_to_lru(struct zone *zone,
+static void move_active_pages_to_lru(struct lruvec *lruvec,
struct list_head *list,
struct list_head *pages_to_free,
enum lru_list lru)
{
+ struct zone *zone = lruvec_zone(lruvec);
unsigned long pgmoved = 0;
struct page *page;
+ int nr_pages;
while (!list_empty(list)) {
- struct lruvec *lruvec;
-
page = lru_to_page(list);
+ lruvec = mem_cgroup_page_lruvec(page, zone);
VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+ nr_pages = hpage_nr_pages(page);
+ mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
list_move(&page->lru, &lruvec->lists[lru]);
- pgmoved += hpage_nr_pages(page);
+ pgmoved += nr_pages;
if (put_page_testzero(page)) {
__ClearPageLRU(page);
__ClearPageActive(page);
- del_page_from_lru_list(zone, page, lru);
+ del_page_from_lru_list(page, lruvec, lru);
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
@@ -1672,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone,
}
static void shrink_active_list(unsigned long nr_to_scan,
- struct mem_cgroup_zone *mz,
+ struct lruvec *lruvec,
struct scan_control *sc,
- int priority, int file)
+ enum lru_list lru)
{
unsigned long nr_taken;
unsigned long nr_scanned;
@@ -1683,15 +1373,14 @@ static void shrink_active_list(unsigned long nr_to_scan,
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct page *page;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
unsigned long nr_rotated = 0;
- isolate_mode_t isolate_mode = ISOLATE_ACTIVE;
- struct zone *zone = mz->zone;
+ isolate_mode_t isolate_mode = 0;
+ int file = is_file_lru(lru);
+ struct zone *zone = lruvec_zone(lruvec);
lru_add_drain();
- reset_reclaim_mode(sc);
-
if (!sc->may_unmap)
isolate_mode |= ISOLATE_UNMAPPED;
if (!sc->may_writepage)
@@ -1699,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
spin_lock_irq(&zone->lru_lock);
- nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc,
- isolate_mode, 1, file);
+ nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
+ &nr_scanned, sc, isolate_mode, lru);
if (global_reclaim(sc))
zone->pages_scanned += nr_scanned;
reclaim_stat->recent_scanned[file] += nr_taken;
__count_zone_vm_events(PGREFILL, zone, nr_scanned);
- if (file)
- __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
- else
- __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
+ __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
spin_unlock_irq(&zone->lru_lock);
@@ -1732,7 +1418,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
}
}
- if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
+ if (page_referenced(page, 0, sc->target_mem_cgroup,
+ &vm_flags)) {
nr_rotated += hpage_nr_pages(page);
/*
* Identify referenced, file-backed active pages and
@@ -1765,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
- move_active_pages_to_lru(zone, &l_active, &l_hold,
- LRU_ACTIVE + file * LRU_FILE);
- move_active_pages_to_lru(zone, &l_inactive, &l_hold,
- LRU_BASE + file * LRU_FILE);
+ move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
+ move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
@@ -1791,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone)
/**
* inactive_anon_is_low - check if anonymous pages need to be deactivated
- * @zone: zone to check
- * @sc: scan control of this context
+ * @lruvec: LRU vector to check
*
* Returns true if the zone does not have enough inactive anon pages,
* meaning some active anon pages need to be deactivated.
*/
-static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
+static int inactive_anon_is_low(struct lruvec *lruvec)
{
/*
* If we don't have swap space, anonymous page deactivation
@@ -1806,14 +1490,13 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
if (!total_swap_pages)
return 0;
- if (!scanning_global_lru(mz))
- return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
- mz->zone);
+ if (!mem_cgroup_disabled())
+ return mem_cgroup_inactive_anon_is_low(lruvec);
- return inactive_anon_is_low_global(mz->zone);
+ return inactive_anon_is_low_global(lruvec_zone(lruvec));
}
#else
-static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
+static inline int inactive_anon_is_low(struct lruvec *lruvec)
{
return 0;
}
@@ -1831,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone)
/**
* inactive_file_is_low - check if file pages need to be deactivated
- * @mz: memory cgroup and zone to check
+ * @lruvec: LRU vector to check
*
* When the system is doing streaming IO, memory pressure here
* ensures that active file pages get deactivated, until more
@@ -1843,44 +1526,39 @@ static int inactive_file_is_low_global(struct zone *zone)
* This uses a different ratio than the anonymous pages, because
* the page cache uses a use-once replacement algorithm.
*/
-static int inactive_file_is_low(struct mem_cgroup_zone *mz)
+static int inactive_file_is_low(struct lruvec *lruvec)
{
- if (!scanning_global_lru(mz))
- return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
- mz->zone);
+ if (!mem_cgroup_disabled())
+ return mem_cgroup_inactive_file_is_low(lruvec);
- return inactive_file_is_low_global(mz->zone);
+ return inactive_file_is_low_global(lruvec_zone(lruvec));
}
-static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
+static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru)
{
- if (file)
- return inactive_file_is_low(mz);
+ if (is_file_lru(lru))
+ return inactive_file_is_low(lruvec);
else
- return inactive_anon_is_low(mz);
+ return inactive_anon_is_low(lruvec);
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct mem_cgroup_zone *mz,
- struct scan_control *sc, int priority)
+ struct lruvec *lruvec, struct scan_control *sc)
{
- int file = is_file_lru(lru);
-
if (is_active_lru(lru)) {
- if (inactive_list_is_low(mz, file))
- shrink_active_list(nr_to_scan, mz, sc, priority, file);
+ if (inactive_list_is_low(lruvec, lru))
+ shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
- return shrink_inactive_list(nr_to_scan, mz, sc, priority, file);
+ return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
}
-static int vmscan_swappiness(struct mem_cgroup_zone *mz,
- struct scan_control *sc)
+static int vmscan_swappiness(struct scan_control *sc)
{
if (global_reclaim(sc))
return vm_swappiness;
- return mem_cgroup_swappiness(mz->mem_cgroup);
+ return mem_cgroup_swappiness(sc->target_mem_cgroup);
}
/*
@@ -1891,17 +1569,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz,
*
* nr[0] = anon pages to scan; nr[1] = file pages to scan
*/
-static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
- unsigned long *nr, int priority)
+static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
+ unsigned long *nr)
{
unsigned long anon, file, free;
unsigned long anon_prio, file_prio;
unsigned long ap, fp;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2], denominator;
enum lru_list lru;
int noswap = 0;
bool force_scan = false;
+ struct zone *zone = lruvec_zone(lruvec);
/*
* If the zone or memcg is small, nr[l] can be 0. This
@@ -1913,7 +1592,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
* latencies, so it's better to scan a minimum amount there as
* well.
*/
- if (current_is_kswapd() && mz->zone->all_unreclaimable)
+ if (current_is_kswapd() && zone->all_unreclaimable)
force_scan = true;
if (!global_reclaim(sc))
force_scan = true;
@@ -1927,16 +1606,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
goto out;
}
- anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
- zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
- file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
- zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+ anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+ get_lru_size(lruvec, LRU_INACTIVE_ANON);
+ file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+ get_lru_size(lruvec, LRU_INACTIVE_FILE);
if (global_reclaim(sc)) {
- free = zone_page_state(mz->zone, NR_FREE_PAGES);
+ free = zone_page_state(zone, NR_FREE_PAGES);
/* If we have very few page cache pages,
force-scan anon pages. */
- if (unlikely(file + free <= high_wmark_pages(mz->zone))) {
+ if (unlikely(file + free <= high_wmark_pages(zone))) {
fraction[0] = 1;
fraction[1] = 0;
denominator = 1;
@@ -1948,8 +1627,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = vmscan_swappiness(mz, sc);
- file_prio = 200 - vmscan_swappiness(mz, sc);
+ anon_prio = vmscan_swappiness(sc);
+ file_prio = 200 - anon_prio;
/*
* OK, so we have swap space and a fair amount of page cache
@@ -1962,7 +1641,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
*
* anon in [0], file in [1]
*/
- spin_lock_irq(&mz->zone->lru_lock);
+ spin_lock_irq(&zone->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
reclaim_stat->recent_scanned[0] /= 2;
reclaim_stat->recent_rotated[0] /= 2;
@@ -1978,12 +1657,12 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
* proportional to the fraction of recently scanned pages on
* each list that were recently referenced and in active use.
*/
- ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
+ ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
ap /= reclaim_stat->recent_rotated[0] + 1;
- fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
+ fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
fp /= reclaim_stat->recent_rotated[1] + 1;
- spin_unlock_irq(&mz->zone->lru_lock);
+ spin_unlock_irq(&zone->lru_lock);
fraction[0] = ap;
fraction[1] = fp;
@@ -1993,9 +1672,9 @@ out:
int file = is_file_lru(lru);
unsigned long scan;
- scan = zone_nr_lru_pages(mz, lru);
- if (priority || noswap) {
- scan >>= priority;
+ scan = get_lru_size(lruvec, lru);
+ if (sc->priority || noswap || !vmscan_swappiness(sc)) {
+ scan >>= sc->priority;
if (!scan && force_scan)
scan = SWAP_CLUSTER_MAX;
scan = div64_u64(scan * fraction[file], denominator);
@@ -2004,14 +1683,25 @@ out:
}
}
+/* Use reclaim/compaction for costly allocs or under memory pressure */
+static bool in_reclaim_compaction(struct scan_control *sc)
+{
+ if (COMPACTION_BUILD && sc->order &&
+ (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
+ sc->priority < DEF_PRIORITY - 2))
+ return true;
+
+ return false;
+}
+
/*
- * Reclaim/compaction depends on a number of pages being freed. To avoid
- * disruption to the system, a small number of order-0 pages continue to be
- * rotated and reclaimed in the normal fashion. However, by the time we get
- * back to the allocator and call try_to_compact_zone(), we ensure that
- * there are enough free pages for it to be likely successful
+ * Reclaim/compaction is used for high-order allocation requests. It reclaims
+ * order-0 pages before compacting the zone. should_continue_reclaim() returns
+ * true if more pages should be reclaimed such that when the page allocator
+ * calls try_to_compact_zone() that it will have enough free pages to succeed.
+ * It will give up earlier than that if there is difficulty reclaiming pages.
*/
-static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
+static inline bool should_continue_reclaim(struct lruvec *lruvec,
unsigned long nr_reclaimed,
unsigned long nr_scanned,
struct scan_control *sc)
@@ -2020,7 +1710,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
unsigned long inactive_lru_pages;
/* If not in reclaim/compaction mode, stop */
- if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
+ if (!in_reclaim_compaction(sc))
return false;
/* Consider stopping depending on scan and reclaim activity */
@@ -2051,15 +1741,15 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
* inactive lists are large enough, continue reclaiming
*/
pages_for_compaction = (2UL << sc->order);
- inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+ inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
if (nr_swap_pages > 0)
- inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
+ inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
if (sc->nr_reclaimed < pages_for_compaction &&
inactive_lru_pages > pages_for_compaction)
return true;
/* If compaction would go ahead or the allocation would succeed, stop */
- switch (compaction_suitable(mz->zone, sc->order)) {
+ switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) {
case COMPACT_PARTIAL:
case COMPACT_CONTINUE:
return false;
@@ -2071,8 +1761,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
- struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long nr_to_scan;
@@ -2084,7 +1773,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
restart:
nr_reclaimed = 0;
nr_scanned = sc->nr_scanned;
- get_scan_count(mz, sc, nr, priority);
+ get_scan_count(lruvec, sc, nr);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2096,7 +1785,7 @@ restart:
nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan,
- mz, sc, priority);
+ lruvec, sc);
}
}
/*
@@ -2107,7 +1796,8 @@ restart:
* with multiple processes reclaiming pages, the total
* freeing target can get unreasonably large.
*/
- if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY)
+ if (nr_reclaimed >= nr_to_reclaim &&
+ sc->priority < DEF_PRIORITY)
break;
}
blk_finish_plug(&plug);
@@ -2117,35 +1807,33 @@ restart:
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_anon_is_low(mz))
- shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0);
+ if (inactive_anon_is_low(lruvec))
+ shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+ sc, LRU_ACTIVE_ANON);
/* reclaim/compaction might need reclaim to continue */
- if (should_continue_reclaim(mz, nr_reclaimed,
- sc->nr_scanned - nr_scanned, sc))
+ if (should_continue_reclaim(lruvec, nr_reclaimed,
+ sc->nr_scanned - nr_scanned, sc))
goto restart;
throttle_vm_writeout(sc->gfp_mask);
}
-static void shrink_zone(int priority, struct zone *zone,
- struct scan_control *sc)
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
{
struct mem_cgroup *root = sc->target_mem_cgroup;
struct mem_cgroup_reclaim_cookie reclaim = {
.zone = zone,
- .priority = priority,
+ .priority = sc->priority,
};
struct mem_cgroup *memcg;
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
- struct mem_cgroup_zone mz = {
- .mem_cgroup = memcg,
- .zone = zone,
- };
+ struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+
+ shrink_lruvec(lruvec, sc);
- shrink_mem_cgroup_zone(priority, &mz, sc);
/*
* Limit reclaim has historically picked one memcg and
* scanned it with decreasing priority levels until
@@ -2221,8 +1909,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
* the caller that it should consider retrying the allocation instead of
* further reclaim.
*/
-static bool shrink_zones(int priority, struct zonelist *zonelist,
- struct scan_control *sc)
+static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
struct zoneref *z;
struct zone *zone;
@@ -2249,7 +1936,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
if (global_reclaim(sc)) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable &&
+ sc->priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */
if (COMPACTION_BUILD) {
/*
@@ -2281,7 +1969,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
/* need some check for avoid more shrink_zone() */
}
- shrink_zone(priority, zone, sc);
+ shrink_zone(zone, sc);
}
return aborted_reclaim;
@@ -2332,7 +2020,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct scan_control *sc,
struct shrink_control *shrink)
{
- int priority;
unsigned long total_scanned = 0;
struct reclaim_state *reclaim_state = current->reclaim_state;
struct zoneref *z;
@@ -2345,11 +2032,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
if (global_reclaim(sc))
count_vm_event(ALLOCSTALL);
- for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ do {
sc->nr_scanned = 0;
- if (!priority)
- disable_swap_token(sc->target_mem_cgroup);
- aborted_reclaim = shrink_zones(priority, zonelist, sc);
+ aborted_reclaim = shrink_zones(zonelist, sc);
/*
* Don't shrink slabs when reclaiming memory from
@@ -2391,7 +2076,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
/* Take a nap, wait for some writeback to complete */
if (!sc->hibernation_mode && sc->nr_scanned &&
- priority < DEF_PRIORITY - 2) {
+ sc->priority < DEF_PRIORITY - 2) {
struct zone *preferred_zone;
first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
@@ -2399,7 +2084,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
&preferred_zone);
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
}
- }
+ } while (--sc->priority >= 0);
out:
delayacct_freepages_end();
@@ -2437,6 +2122,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.may_unmap = 1,
.may_swap = 1,
.order = order,
+ .priority = DEF_PRIORITY,
.target_mem_cgroup = NULL,
.nodemask = nodemask,
};
@@ -2469,17 +2155,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
.may_unmap = 1,
.may_swap = !noswap,
.order = 0,
+ .priority = 0,
.target_mem_cgroup = memcg,
};
- struct mem_cgroup_zone mz = {
- .mem_cgroup = memcg,
- .zone = zone,
- };
+ struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
- trace_mm_vmscan_memcg_softlimit_reclaim_begin(0,
+ trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
sc.may_writepage,
sc.gfp_mask);
@@ -2490,7 +2174,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_mem_cgroup_zone(0, &mz, &sc);
+ shrink_lruvec(lruvec, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2511,6 +2195,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_swap = !noswap,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
.order = 0,
+ .priority = DEF_PRIORITY,
.target_mem_cgroup = memcg,
.nodemask = NULL, /* we don't care the placement */
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2541,8 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
}
#endif
-static void age_active_anon(struct zone *zone, struct scan_control *sc,
- int priority)
+static void age_active_anon(struct zone *zone, struct scan_control *sc)
{
struct mem_cgroup *memcg;
@@ -2551,14 +2235,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc,
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
- struct mem_cgroup_zone mz = {
- .mem_cgroup = memcg,
- .zone = zone,
- };
+ struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
- if (inactive_anon_is_low(&mz))
- shrink_active_list(SWAP_CLUSTER_MAX, &mz,
- sc, priority, 0);
+ if (inactive_anon_is_low(lruvec))
+ shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+ sc, LRU_ACTIVE_ANON);
memcg = mem_cgroup_iter(NULL, memcg, NULL);
} while (memcg);
@@ -2667,7 +2348,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
{
int all_zones_ok;
unsigned long balanced;
- int priority;
int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long total_scanned;
@@ -2691,18 +2371,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
};
loop_again:
total_scanned = 0;
+ sc.priority = DEF_PRIORITY;
sc.nr_reclaimed = 0;
sc.may_writepage = !laptop_mode;
count_vm_event(PAGEOUTRUN);
- for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ do {
unsigned long lru_pages = 0;
int has_under_min_watermark_zone = 0;
- /* The swap token gets in the way of swapout... */
- if (!priority)
- disable_swap_token(NULL);
-
all_zones_ok = 1;
balanced = 0;
@@ -2716,14 +2393,15 @@ loop_again:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable &&
+ sc.priority != DEF_PRIORITY)
continue;
/*
* Do some background aging of the anon list, to give
* pages a chance to be referenced before reclaiming.
*/
- age_active_anon(zone, &sc, priority);
+ age_active_anon(zone, &sc);
/*
* If the number of buffer_heads in the machine
@@ -2771,7 +2449,8 @@ loop_again:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable &&
+ sc.priority != DEF_PRIORITY)
continue;
sc.nr_scanned = 0;
@@ -2815,7 +2494,7 @@ loop_again:
!zone_watermark_ok_safe(zone, testorder,
high_wmark_pages(zone) + balance_gap,
end_zone, 0)) {
- shrink_zone(priority, zone, &sc);
+ shrink_zone(zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
@@ -2872,7 +2551,7 @@ loop_again:
* OK, kswapd is getting into trouble. Take a nap, then take
* another pass across the zones.
*/
- if (total_scanned && (priority < DEF_PRIORITY - 2)) {
+ if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) {
if (has_under_min_watermark_zone)
count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
else
@@ -2887,7 +2566,7 @@ loop_again:
*/
if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
break;
- }
+ } while (--sc.priority >= 0);
out:
/*
@@ -2937,7 +2616,8 @@ out:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable &&
+ sc.priority != DEF_PRIORITY)
continue;
/* Would compaction fail due to lack of free memory? */
@@ -3204,6 +2884,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
.nr_to_reclaim = nr_to_reclaim,
.hibernation_mode = 1,
.order = 0,
+ .priority = DEF_PRIORITY,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -3274,14 +2955,17 @@ int kswapd_run(int nid)
}
/*
- * Called by memory hotplug when all memory in a node is offlined.
+ * Called by memory hotplug when all memory in a node is offlined. Caller must
+ * hold lock_memory_hotplug().
*/
void kswapd_stop(int nid)
{
struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
- if (kswapd)
+ if (kswapd) {
kthread_stop(kswapd);
+ NODE_DATA(nid)->kswapd = NULL;
+ }
}
static int __init kswapd_init(void)
@@ -3381,7 +3065,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
struct reclaim_state reclaim_state;
- int priority;
struct scan_control sc = {
.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -3390,6 +3073,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
SWAP_CLUSTER_MAX),
.gfp_mask = gfp_mask,
.order = order,
+ .priority = ZONE_RECLAIM_PRIORITY,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -3412,11 +3096,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* Free memory by calling shrink zone with increasing
* priorities until we have enough memory freed.
*/
- priority = ZONE_RECLAIM_PRIORITY;
do {
- shrink_zone(priority, zone, &sc);
- priority--;
- } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
+ shrink_zone(zone, &sc);
+ } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
}
nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
@@ -3531,7 +3213,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
if (mapping_unevictable(page_mapping(page)))
return 0;
- if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
+ if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page)))
return 0;
return 1;
@@ -3567,6 +3249,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
zone = pagezone;
spin_lock_irq(&zone->lru_lock);
}
+ lruvec = mem_cgroup_page_lruvec(page, zone);
if (!PageLRU(page) || !PageUnevictable(page))
continue;
@@ -3576,11 +3259,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
VM_BUG_ON(PageActive(page));
ClearPageUnevictable(page);
- __dec_zone_state(zone, NR_UNEVICTABLE);
- lruvec = mem_cgroup_lru_move_lists(zone, page,
- LRU_UNEVICTABLE, lru);
- list_move(&page->lru, &lruvec->lists[lru]);
- __inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+ del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
+ add_page_to_lru_list(page, lruvec, lru);
pgrescued++;
}
}