aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c462
1 files changed, 184 insertions, 278 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8deb5f4da4d9..347b3ff2a478 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -78,6 +78,9 @@ struct scan_control {
int order;
+ /* Scan (total_size >> priority) pages at once */
+ int priority;
+
/*
* The memory cgroup that hit its limit and as a result is the
* primary target of this reclaim invocation.
@@ -91,11 +94,6 @@ struct scan_control {
nodemask_t *nodemask;
};
-struct mem_cgroup_zone {
- struct mem_cgroup *mem_cgroup;
- struct zone *zone;
-};
-
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
@@ -147,24 +145,14 @@ static bool global_reclaim(struct scan_control *sc)
}
#endif
-static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
-{
- return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat;
-}
-
-static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
- enum lru_list lru)
+static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
{
if (!mem_cgroup_disabled())
- return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
- zone_to_nid(mz->zone),
- zone_idx(mz->zone),
- BIT(lru));
+ return mem_cgroup_get_lru_size(lruvec, lru);
- return zone_page_state(mz->zone, NR_LRU_BASE + lru);
+ return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
}
-
/*
* Add a shrinker callback to be called from the vm
*/
@@ -626,7 +614,6 @@ enum page_references {
};
static enum page_references page_check_references(struct page *page,
- struct mem_cgroup_zone *mz,
struct scan_control *sc)
{
int referenced_ptes, referenced_page;
@@ -685,9 +672,8 @@ static enum page_references page_check_references(struct page *page,
* shrink_page_list() returns the number of reclaimed pages
*/
static unsigned long shrink_page_list(struct list_head *page_list,
- struct mem_cgroup_zone *mz,
+ struct zone *zone,
struct scan_control *sc,
- int priority,
unsigned long *ret_nr_dirty,
unsigned long *ret_nr_writeback)
{
@@ -716,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto keep;
VM_BUG_ON(PageActive(page));
- VM_BUG_ON(page_zone(page) != mz->zone);
+ VM_BUG_ON(page_zone(page) != zone);
sc->nr_scanned++;
@@ -739,7 +725,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto keep;
}
- references = page_check_references(page, mz, sc);
+ references = page_check_references(page, sc);
switch (references) {
case PAGEREF_ACTIVATE:
goto activate_locked;
@@ -790,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* unless under significant pressure.
*/
if (page_is_file_cache(page) &&
- (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) {
+ (!current_is_kswapd() ||
+ sc->priority >= DEF_PRIORITY - 2)) {
/*
* Immediately reclaim when written back.
* Similar in principal to deactivate_page()
@@ -928,7 +915,7 @@ keep:
* will encounter the same problem
*/
if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
- zone_set_flag(mz->zone, ZONE_CONGESTED);
+ zone_set_flag(zone, ZONE_CONGESTED);
free_hot_cold_page_list(&free_pages, 1);
@@ -949,29 +936,14 @@ keep:
*
* returns 0 on success, -ve errno on failure.
*/
-int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
+int __isolate_lru_page(struct page *page, isolate_mode_t mode)
{
- bool all_lru_mode;
int ret = -EINVAL;
/* Only take pages on the LRU. */
if (!PageLRU(page))
return ret;
- all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) ==
- (ISOLATE_ACTIVE|ISOLATE_INACTIVE);
-
- /*
- * When checking the active state, we need to be sure we are
- * dealing with comparible boolean values. Take the logical not
- * of each.
- */
- if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE))
- return ret;
-
- if (!all_lru_mode && !!page_is_file_cache(page) != file)
- return ret;
-
/* Do not give back unevictable pages for compaction */
if (PageUnevictable(page))
return ret;
@@ -1039,47 +1011,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
- * @mz: The mem_cgroup_zone to pull pages from.
+ * @lruvec: The LRU vector to pull pages from.
* @dst: The temp list to put pages on to.
* @nr_scanned: The number of pages that were scanned.
* @sc: The scan_control struct for this reclaim session
* @mode: One of the LRU isolation modes
- * @active: True [1] if isolating active pages
- * @file: True [1] if isolating file [!anon] pages
+ * @lru: LRU list id for isolating
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
- struct mem_cgroup_zone *mz, struct list_head *dst,
+ struct lruvec *lruvec, struct list_head *dst,
unsigned long *nr_scanned, struct scan_control *sc,
- isolate_mode_t mode, int active, int file)
+ isolate_mode_t mode, enum lru_list lru)
{
- struct lruvec *lruvec;
- struct list_head *src;
+ struct list_head *src = &lruvec->lists[lru];
unsigned long nr_taken = 0;
unsigned long scan;
- int lru = LRU_BASE;
-
- lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
- if (active)
- lru += LRU_ACTIVE;
- if (file)
- lru += LRU_FILE;
- src = &lruvec->lists[lru];
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
struct page *page;
+ int nr_pages;
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
VM_BUG_ON(!PageLRU(page));
- switch (__isolate_lru_page(page, mode, file)) {
+ switch (__isolate_lru_page(page, mode)) {
case 0:
- mem_cgroup_lru_del(page);
+ nr_pages = hpage_nr_pages(page);
+ mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
list_move(&page->lru, dst);
- nr_taken += hpage_nr_pages(page);
+ nr_taken += nr_pages;
break;
case -EBUSY:
@@ -1093,11 +1057,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
}
*nr_scanned = scan;
-
- trace_mm_vmscan_lru_isolate(sc->order,
- nr_to_scan, scan,
- nr_taken,
- mode, file);
+ trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
+ nr_taken, mode, is_file_lru(lru));
return nr_taken;
}
@@ -1134,15 +1095,16 @@ int isolate_lru_page(struct page *page)
if (PageLRU(page)) {
struct zone *zone = page_zone(page);
+ struct lruvec *lruvec;
spin_lock_irq(&zone->lru_lock);
+ lruvec = mem_cgroup_page_lruvec(page, zone);
if (PageLRU(page)) {
int lru = page_lru(page);
- ret = 0;
get_page(page);
ClearPageLRU(page);
-
- del_page_from_lru_list(zone, page, lru);
+ del_page_from_lru_list(page, lruvec, lru);
+ ret = 0;
}
spin_unlock_irq(&zone->lru_lock);
}
@@ -1175,11 +1137,10 @@ static int too_many_isolated(struct zone *zone, int file,
}
static noinline_for_stack void
-putback_inactive_pages(struct mem_cgroup_zone *mz,
- struct list_head *page_list)
+putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
{
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
- struct zone *zone = mz->zone;
+ struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
+ struct zone *zone = lruvec_zone(lruvec);
LIST_HEAD(pages_to_free);
/*
@@ -1197,9 +1158,13 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
spin_lock_irq(&zone->lru_lock);
continue;
}
+
+ lruvec = mem_cgroup_page_lruvec(page, zone);
+
SetPageLRU(page);
lru = page_lru(page);
- add_page_to_lru_list(zone, page, lru);
+ add_page_to_lru_list(page, lruvec, lru);
+
if (is_active_lru(lru)) {
int file = is_file_lru(lru);
int numpages = hpage_nr_pages(page);
@@ -1208,7 +1173,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
if (put_page_testzero(page)) {
__ClearPageLRU(page);
__ClearPageActive(page);
- del_page_from_lru_list(zone, page, lru);
+ del_page_from_lru_list(page, lruvec, lru);
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
@@ -1225,71 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
list_splice(&pages_to_free, page_list);
}
-static noinline_for_stack void
-update_isolated_counts(struct mem_cgroup_zone *mz,
- struct list_head *page_list,
- unsigned long *nr_anon,
- unsigned long *nr_file)
-{
- struct zone *zone = mz->zone;
- unsigned int count[NR_LRU_LISTS] = { 0, };
- unsigned long nr_active = 0;
- struct page *page;
- int lru;
-
- /*
- * Count pages and clear active flags
- */
- list_for_each_entry(page, page_list, lru) {
- int numpages = hpage_nr_pages(page);
- lru = page_lru_base_type(page);
- if (PageActive(page)) {
- lru += LRU_ACTIVE;
- ClearPageActive(page);
- nr_active += numpages;
- }
- count[lru] += numpages;
- }
-
- preempt_disable();
- __count_vm_events(PGDEACTIVATE, nr_active);
-
- __mod_zone_page_state(zone, NR_ACTIVE_FILE,
- -count[LRU_ACTIVE_FILE]);
- __mod_zone_page_state(zone, NR_INACTIVE_FILE,
- -count[LRU_INACTIVE_FILE]);
- __mod_zone_page_state(zone, NR_ACTIVE_ANON,
- -count[LRU_ACTIVE_ANON]);
- __mod_zone_page_state(zone, NR_INACTIVE_ANON,
- -count[LRU_INACTIVE_ANON]);
-
- *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
- *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
-
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
- preempt_enable();
-}
-
/*
* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
* of reclaimed pages
*/
static noinline_for_stack unsigned long
-shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
- struct scan_control *sc, int priority, int file)
+shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+ struct scan_control *sc, enum lru_list lru)
{
LIST_HEAD(page_list);
unsigned long nr_scanned;
unsigned long nr_reclaimed = 0;
unsigned long nr_taken;
- unsigned long nr_anon;
- unsigned long nr_file;
unsigned long nr_dirty = 0;
unsigned long nr_writeback = 0;
- isolate_mode_t isolate_mode = ISOLATE_INACTIVE;
- struct zone *zone = mz->zone;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ isolate_mode_t isolate_mode = 0;
+ int file = is_file_lru(lru);
+ struct zone *zone = lruvec_zone(lruvec);
+ struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
while (unlikely(too_many_isolated(zone, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1308,31 +1226,30 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
spin_lock_irq(&zone->lru_lock);
- nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned,
- sc, isolate_mode, 0, file);
+ nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
+ &nr_scanned, sc, isolate_mode, lru);
+
+ __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
+
if (global_reclaim(sc)) {
zone->pages_scanned += nr_scanned;
if (current_is_kswapd())
- __count_zone_vm_events(PGSCAN_KSWAPD, zone,
- nr_scanned);
+ __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
else
- __count_zone_vm_events(PGSCAN_DIRECT, zone,
- nr_scanned);
+ __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned);
}
spin_unlock_irq(&zone->lru_lock);
if (nr_taken == 0)
return 0;
- update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
-
- nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
+ nr_reclaimed = shrink_page_list(&page_list, zone, sc,
&nr_dirty, &nr_writeback);
spin_lock_irq(&zone->lru_lock);
- reclaim_stat->recent_scanned[0] += nr_anon;
- reclaim_stat->recent_scanned[1] += nr_file;
+ reclaim_stat->recent_scanned[file] += nr_taken;
if (global_reclaim(sc)) {
if (current_is_kswapd())
@@ -1343,10 +1260,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
nr_reclaimed);
}
- putback_inactive_pages(mz, &page_list);
+ putback_inactive_pages(lruvec, &page_list);
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
@@ -1375,13 +1291,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
* DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any
* isolated page is PageWriteback
*/
- if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority)))
+ if (nr_writeback && nr_writeback >=
+ (nr_taken >> (DEF_PRIORITY - sc->priority)))
wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
zone_idx(zone),
nr_scanned, nr_reclaimed,
- priority,
+ sc->priority,
trace_shrink_flags(file));
return nr_reclaimed;
}
@@ -1404,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
* But we had to alter page->flags anyway.
*/
-static void move_active_pages_to_lru(struct zone *zone,
+static void move_active_pages_to_lru(struct lruvec *lruvec,
struct list_head *list,
struct list_head *pages_to_free,
enum lru_list lru)
{
+ struct zone *zone = lruvec_zone(lruvec);
unsigned long pgmoved = 0;
struct page *page;
+ int nr_pages;
while (!list_empty(list)) {
- struct lruvec *lruvec;
-
page = lru_to_page(list);
+ lruvec = mem_cgroup_page_lruvec(page, zone);
VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+ nr_pages = hpage_nr_pages(page);
+ mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
list_move(&page->lru, &lruvec->lists[lru]);
- pgmoved += hpage_nr_pages(page);
+ pgmoved += nr_pages;
if (put_page_testzero(page)) {
__ClearPageLRU(page);
__ClearPageActive(page);
- del_page_from_lru_list(zone, page, lru);
+ del_page_from_lru_list(page, lruvec, lru);
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
@@ -1443,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone,
}
static void shrink_active_list(unsigned long nr_to_scan,
- struct mem_cgroup_zone *mz,
+ struct lruvec *lruvec,
struct scan_control *sc,
- int priority, int file)
+ enum lru_list lru)
{
unsigned long nr_taken;
unsigned long nr_scanned;
@@ -1454,10 +1373,11 @@ static void shrink_active_list(unsigned long nr_to_scan,
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct page *page;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
unsigned long nr_rotated = 0;
- isolate_mode_t isolate_mode = ISOLATE_ACTIVE;
- struct zone *zone = mz->zone;
+ isolate_mode_t isolate_mode = 0;
+ int file = is_file_lru(lru);
+ struct zone *zone = lruvec_zone(lruvec);
lru_add_drain();
@@ -1468,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
spin_lock_irq(&zone->lru_lock);
- nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc,
- isolate_mode, 1, file);
+ nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
+ &nr_scanned, sc, isolate_mode, lru);
if (global_reclaim(sc))
zone->pages_scanned += nr_scanned;
reclaim_stat->recent_scanned[file] += nr_taken;
__count_zone_vm_events(PGREFILL, zone, nr_scanned);
- if (file)
- __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
- else
- __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
+ __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
spin_unlock_irq(&zone->lru_lock);
@@ -1535,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
- move_active_pages_to_lru(zone, &l_active, &l_hold,
- LRU_ACTIVE + file * LRU_FILE);
- move_active_pages_to_lru(zone, &l_inactive, &l_hold,
- LRU_BASE + file * LRU_FILE);
+ move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
+ move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
@@ -1561,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone)
/**
* inactive_anon_is_low - check if anonymous pages need to be deactivated
- * @zone: zone to check
- * @sc: scan control of this context
+ * @lruvec: LRU vector to check
*
* Returns true if the zone does not have enough inactive anon pages,
* meaning some active anon pages need to be deactivated.
*/
-static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
+static int inactive_anon_is_low(struct lruvec *lruvec)
{
/*
* If we don't have swap space, anonymous page deactivation
@@ -1577,13 +1491,12 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
return 0;
if (!mem_cgroup_disabled())
- return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
- mz->zone);
+ return mem_cgroup_inactive_anon_is_low(lruvec);
- return inactive_anon_is_low_global(mz->zone);
+ return inactive_anon_is_low_global(lruvec_zone(lruvec));
}
#else
-static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
+static inline int inactive_anon_is_low(struct lruvec *lruvec)
{
return 0;
}
@@ -1601,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone)
/**
* inactive_file_is_low - check if file pages need to be deactivated
- * @mz: memory cgroup and zone to check
+ * @lruvec: LRU vector to check
*
* When the system is doing streaming IO, memory pressure here
* ensures that active file pages get deactivated, until more
@@ -1613,44 +1526,39 @@ static int inactive_file_is_low_global(struct zone *zone)
* This uses a different ratio than the anonymous pages, because
* the page cache uses a use-once replacement algorithm.
*/
-static int inactive_file_is_low(struct mem_cgroup_zone *mz)
+static int inactive_file_is_low(struct lruvec *lruvec)
{
if (!mem_cgroup_disabled())
- return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
- mz->zone);
+ return mem_cgroup_inactive_file_is_low(lruvec);
- return inactive_file_is_low_global(mz->zone);
+ return inactive_file_is_low_global(lruvec_zone(lruvec));
}
-static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
+static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru)
{
- if (file)
- return inactive_file_is_low(mz);
+ if (is_file_lru(lru))
+ return inactive_file_is_low(lruvec);
else
- return inactive_anon_is_low(mz);
+ return inactive_anon_is_low(lruvec);
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct mem_cgroup_zone *mz,
- struct scan_control *sc, int priority)
+ struct lruvec *lruvec, struct scan_control *sc)
{
- int file = is_file_lru(lru);
-
if (is_active_lru(lru)) {
- if (inactive_list_is_low(mz, file))
- shrink_active_list(nr_to_scan, mz, sc, priority, file);
+ if (inactive_list_is_low(lruvec, lru))
+ shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
- return shrink_inactive_list(nr_to_scan, mz, sc, priority, file);
+ return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
}
-static int vmscan_swappiness(struct mem_cgroup_zone *mz,
- struct scan_control *sc)
+static int vmscan_swappiness(struct scan_control *sc)
{
if (global_reclaim(sc))
return vm_swappiness;
- return mem_cgroup_swappiness(mz->mem_cgroup);
+ return mem_cgroup_swappiness(sc->target_mem_cgroup);
}
/*
@@ -1662,17 +1570,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz,
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
-static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
- unsigned long *nr, int priority)
+static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
+ unsigned long *nr)
{
unsigned long anon, file, free;
unsigned long anon_prio, file_prio;
unsigned long ap, fp;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2], denominator;
enum lru_list lru;
int noswap = 0;
bool force_scan = false;
+ struct zone *zone = lruvec_zone(lruvec);
/*
* If the zone or memcg is small, nr[l] can be 0. This
@@ -1684,7 +1593,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
* latencies, so it's better to scan a minimum amount there as
* well.
*/
- if (current_is_kswapd() && mz->zone->all_unreclaimable)
+ if (current_is_kswapd() && zone->all_unreclaimable)
force_scan = true;
if (!global_reclaim(sc))
force_scan = true;
@@ -1698,16 +1607,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
goto out;
}
- anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
- zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
- file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
- zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+ anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+ get_lru_size(lruvec, LRU_INACTIVE_ANON);
+ file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+ get_lru_size(lruvec, LRU_INACTIVE_FILE);
if (global_reclaim(sc)) {
- free = zone_page_state(mz->zone, NR_FREE_PAGES);
+ free = zone_page_state(zone, NR_FREE_PAGES);
/* If we have very few page cache pages,
force-scan anon pages. */
- if (unlikely(file + free <= high_wmark_pages(mz->zone))) {
+ if (unlikely(file + free <= high_wmark_pages(zone))) {
fraction[0] = 1;
fraction[1] = 0;
denominator = 1;
@@ -1719,8 +1628,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = vmscan_swappiness(mz, sc);
- file_prio = 200 - vmscan_swappiness(mz, sc);
+ anon_prio = vmscan_swappiness(sc);
+ file_prio = 200 - anon_prio;
/*
* OK, so we have swap space and a fair amount of page cache
@@ -1733,7 +1642,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
*
* anon in [0], file in [1]
*/
- spin_lock_irq(&mz->zone->lru_lock);
+ spin_lock_irq(&zone->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
reclaim_stat->recent_scanned[0] /= 2;
reclaim_stat->recent_rotated[0] /= 2;
@@ -1754,7 +1663,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
fp /= reclaim_stat->recent_rotated[1] + 1;
- spin_unlock_irq(&mz->zone->lru_lock);
+ spin_unlock_irq(&zone->lru_lock);
fraction[0] = ap;
fraction[1] = fp;
@@ -1764,9 +1673,9 @@ out:
int file = is_file_lru(lru);
unsigned long scan;
- scan = zone_nr_lru_pages(mz, lru);
- if (priority || noswap || !vmscan_swappiness(mz, sc)) {
- scan >>= priority;
+ scan = get_lru_size(lruvec, lru);
+ if (sc->priority || noswap || !vmscan_swappiness(sc)) {
+ scan >>= sc->priority;
if (!scan && force_scan)
scan = SWAP_CLUSTER_MAX;
scan = div64_u64(scan * fraction[file], denominator);
@@ -1776,11 +1685,11 @@ out:
}
/* Use reclaim/compaction for costly allocs or under memory pressure */
-static bool in_reclaim_compaction(int priority, struct scan_control *sc)
+static bool in_reclaim_compaction(struct scan_control *sc)
{
if (COMPACTION_BUILD && sc->order &&
(sc->order > PAGE_ALLOC_COSTLY_ORDER ||
- priority < DEF_PRIORITY - 2))
+ sc->priority < DEF_PRIORITY - 2))
return true;
return false;
@@ -1793,17 +1702,16 @@ static bool in_reclaim_compaction(int priority, struct scan_control *sc)
* calls try_to_compact_zone() that it will have enough free pages to succeed.
* It will give up earlier than that if there is difficulty reclaiming pages.
*/
-static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
+static inline bool should_continue_reclaim(struct lruvec *lruvec,
unsigned long nr_reclaimed,
unsigned long nr_scanned,
- int priority,
struct scan_control *sc)
{
unsigned long pages_for_compaction;
unsigned long inactive_lru_pages;
/* If not in reclaim/compaction mode, stop */
- if (!in_reclaim_compaction(priority, sc))
+ if (!in_reclaim_compaction(sc))
return false;
/* Consider stopping depending on scan and reclaim activity */
@@ -1834,15 +1742,15 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
* inactive lists are large enough, continue reclaiming
*/
pages_for_compaction = (2UL << sc->order);
- inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+ inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
if (nr_swap_pages > 0)
- inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
+ inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
if (sc->nr_reclaimed < pages_for_compaction &&
inactive_lru_pages > pages_for_compaction)
return true;
/* If compaction would go ahead or the allocation would succeed, stop */
- switch (compaction_suitable(mz->zone, sc->order)) {
+ switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) {
case COMPACT_PARTIAL:
case COMPACT_CONTINUE:
return false;
@@ -1854,8 +1762,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
- struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long nr_to_scan;
@@ -1867,7 +1774,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
restart:
nr_reclaimed = 0;
nr_scanned = sc->nr_scanned;
- get_scan_count(mz, sc, nr, priority);
+ get_scan_count(lruvec, sc, nr);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -1879,7 +1786,7 @@ restart:
nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan,
- mz, sc, priority);
+ lruvec, sc);
}
}
/*
@@ -1890,7 +1797,8 @@ restart:
* with multiple processes reclaiming pages, the total
* freeing target can get unreasonably large.
*/
- if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY)
+ if (nr_reclaimed >= nr_to_reclaim &&
+ sc->priority < DEF_PRIORITY)
break;
}
blk_finish_plug(&plug);
@@ -1900,36 +1808,33 @@ restart:
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_anon_is_low(mz))
- shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0);
+ if (inactive_anon_is_low(lruvec))
+ shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+ sc, LRU_ACTIVE_ANON);
/* reclaim/compaction might need reclaim to continue */
- if (should_continue_reclaim(mz, nr_reclaimed,
- sc->nr_scanned - nr_scanned,
- priority, sc))
+ if (should_continue_reclaim(lruvec, nr_reclaimed,
+ sc->nr_scanned - nr_scanned, sc))
goto restart;
throttle_vm_writeout(sc->gfp_mask);
}
-static void shrink_zone(int priority, struct zone *zone,
- struct scan_control *sc)
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
{
struct mem_cgroup *root = sc->target_mem_cgroup;
struct mem_cgroup_reclaim_cookie reclaim = {
.zone = zone,
- .priority = priority,
+ .priority = sc->priority,
};
struct mem_cgroup *memcg;
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
- struct mem_cgroup_zone mz = {
- .mem_cgroup = memcg,
- .zone = zone,
- };
+ struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+
+ shrink_lruvec(lruvec, sc);
- shrink_mem_cgroup_zone(priority, &mz, sc);
/*
* Limit reclaim has historically picked one memcg and
* scanned it with decreasing priority levels until
@@ -2005,8 +1910,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
* the caller that it should consider retrying the allocation instead of
* further reclaim.
*/
-static bool shrink_zones(int priority, struct zonelist *zonelist,
- struct scan_control *sc)
+static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
struct zoneref *z;
struct zone *zone;
@@ -2033,7 +1937,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
if (global_reclaim(sc)) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable &&
+ sc->priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */
if (COMPACTION_BUILD) {
/*
@@ -2065,7 +1970,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
/* need some check for avoid more shrink_zone() */
}
- shrink_zone(priority, zone, sc);
+ shrink_zone(zone, sc);
}
return aborted_reclaim;
@@ -2116,7 +2021,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct scan_control *sc,
struct shrink_control *shrink)
{
- int priority;
unsigned long total_scanned = 0;
struct reclaim_state *reclaim_state = current->reclaim_state;
struct zoneref *z;
@@ -2129,9 +2033,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
if (global_reclaim(sc))
count_vm_event(ALLOCSTALL);
- for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ do {
sc->nr_scanned = 0;
- aborted_reclaim = shrink_zones(priority, zonelist, sc);
+ aborted_reclaim = shrink_zones(zonelist, sc);
/*
* Don't shrink slabs when reclaiming memory from
@@ -2173,7 +2077,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
/* Take a nap, wait for some writeback to complete */
if (!sc->hibernation_mode && sc->nr_scanned &&
- priority < DEF_PRIORITY - 2) {
+ sc->priority < DEF_PRIORITY - 2) {
struct zone *preferred_zone;
first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
@@ -2181,7 +2085,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
&preferred_zone);
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
}
- }
+ } while (--sc->priority >= 0);
out:
delayacct_freepages_end();
@@ -2219,6 +2123,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.may_unmap = 1,
.may_swap = 1,
.order = order,
+ .priority = DEF_PRIORITY,
.target_mem_cgroup = NULL,
.nodemask = nodemask,
};
@@ -2251,17 +2156,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
.may_unmap = 1,
.may_swap = !noswap,
.order = 0,
+ .priority = 0,
.target_mem_cgroup = memcg,
};
- struct mem_cgroup_zone mz = {
- .mem_cgroup = memcg,
- .zone = zone,
- };
+ struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
- trace_mm_vmscan_memcg_softlimit_reclaim_begin(0,
+ trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
sc.may_writepage,
sc.gfp_mask);
@@ -2272,7 +2175,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_mem_cgroup_zone(0, &mz, &sc);
+ shrink_lruvec(lruvec, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2293,6 +2196,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_swap = !noswap,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
.order = 0,
+ .priority = DEF_PRIORITY,
.target_mem_cgroup = memcg,
.nodemask = NULL, /* we don't care the placement */
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2323,8 +2227,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
}
#endif
-static void age_active_anon(struct zone *zone, struct scan_control *sc,
- int priority)
+static void age_active_anon(struct zone *zone, struct scan_control *sc)
{
struct mem_cgroup *memcg;
@@ -2333,14 +2236,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc,
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
- struct mem_cgroup_zone mz = {
- .mem_cgroup = memcg,
- .zone = zone,
- };
+ struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
- if (inactive_anon_is_low(&mz))
- shrink_active_list(SWAP_CLUSTER_MAX, &mz,
- sc, priority, 0);
+ if (inactive_anon_is_low(lruvec))
+ shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+ sc, LRU_ACTIVE_ANON);
memcg = mem_cgroup_iter(NULL, memcg, NULL);
} while (memcg);
@@ -2449,7 +2349,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
{
int all_zones_ok;
unsigned long balanced;
- int priority;
int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long total_scanned;
@@ -2473,11 +2372,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
};
loop_again:
total_scanned = 0;
+ sc.priority = DEF_PRIORITY;
sc.nr_reclaimed = 0;
sc.may_writepage = !laptop_mode;
count_vm_event(PAGEOUTRUN);
- for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+ do {
unsigned long lru_pages = 0;
int has_under_min_watermark_zone = 0;
@@ -2494,14 +2394,15 @@ loop_again:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable &&
+ sc.priority != DEF_PRIORITY)
continue;
/*
* Do some background aging of the anon list, to give
* pages a chance to be referenced before reclaiming.
*/
- age_active_anon(zone, &sc, priority);
+ age_active_anon(zone, &sc);
/*
* If the number of buffer_heads in the machine
@@ -2549,7 +2450,8 @@ loop_again:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable &&
+ sc.priority != DEF_PRIORITY)
continue;
sc.nr_scanned = 0;
@@ -2593,7 +2495,7 @@ loop_again:
!zone_watermark_ok_safe(zone, testorder,
high_wmark_pages(zone) + balance_gap,
end_zone, 0)) {
- shrink_zone(priority, zone, &sc);
+ shrink_zone(zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
@@ -2650,7 +2552,7 @@ loop_again:
* OK, kswapd is getting into trouble. Take a nap, then take
* another pass across the zones.
*/
- if (total_scanned && (priority < DEF_PRIORITY - 2)) {
+ if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) {
if (has_under_min_watermark_zone)
count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
else
@@ -2665,7 +2567,7 @@ loop_again:
*/
if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
break;
- }
+ } while (--sc.priority >= 0);
out:
/*
@@ -2715,7 +2617,8 @@ out:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable &&
+ sc.priority != DEF_PRIORITY)
continue;
/* Would compaction fail due to lack of free memory? */
@@ -2786,7 +2689,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
* them before going back to sleep.
*/
set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
- schedule();
+
+ if (!kthread_should_stop())
+ schedule();
+
set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
} else {
if (remaining)
@@ -2982,6 +2888,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
.nr_to_reclaim = nr_to_reclaim,
.hibernation_mode = 1,
.order = 0,
+ .priority = DEF_PRIORITY,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -3052,14 +2959,17 @@ int kswapd_run(int nid)
}
/*
- * Called by memory hotplug when all memory in a node is offlined.
+ * Called by memory hotplug when all memory in a node is offlined. Caller must
+ * hold lock_memory_hotplug().
*/
void kswapd_stop(int nid)
{
struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
- if (kswapd)
+ if (kswapd) {
kthread_stop(kswapd);
+ NODE_DATA(nid)->kswapd = NULL;
+ }
}
static int __init kswapd_init(void)
@@ -3159,7 +3069,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
struct reclaim_state reclaim_state;
- int priority;
struct scan_control sc = {
.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -3168,6 +3077,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
SWAP_CLUSTER_MAX),
.gfp_mask = gfp_mask,
.order = order,
+ .priority = ZONE_RECLAIM_PRIORITY,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -3190,11 +3100,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* Free memory by calling shrink zone with increasing
* priorities until we have enough memory freed.
*/
- priority = ZONE_RECLAIM_PRIORITY;
do {
- shrink_zone(priority, zone, &sc);
- priority--;
- } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
+ shrink_zone(zone, &sc);
+ } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
}
nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
@@ -3345,6 +3253,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
zone = pagezone;
spin_lock_irq(&zone->lru_lock);
}
+ lruvec = mem_cgroup_page_lruvec(page, zone);
if (!PageLRU(page) || !PageUnevictable(page))
continue;
@@ -3354,11 +3263,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
VM_BUG_ON(PageActive(page));
ClearPageUnevictable(page);
- __dec_zone_state(zone, NR_UNEVICTABLE);
- lruvec = mem_cgroup_lru_move_lists(zone, page,
- LRU_UNEVICTABLE, lru);
- list_move(&page->lru, &lruvec->lists[lru]);
- __inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+ del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
+ add_page_to_lru_list(page, lruvec, lru);
pgrescued++;
}
}