aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c93
1 files changed, 59 insertions, 34 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2aec4241b42a..eb3dd37ccd7c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -46,6 +46,7 @@
#include <linux/oom.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
+#include <linux/dax.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -106,8 +107,6 @@ struct scan_control {
unsigned long nr_reclaimed;
};
-#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
-
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
do { \
@@ -197,11 +196,13 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
unsigned long nr;
nr = zone_page_state(zone, NR_ACTIVE_FILE) +
- zone_page_state(zone, NR_INACTIVE_FILE);
+ zone_page_state(zone, NR_INACTIVE_FILE) +
+ zone_page_state(zone, NR_ISOLATED_FILE);
if (get_nr_swap_pages() > 0)
nr += zone_page_state(zone, NR_ACTIVE_ANON) +
- zone_page_state(zone, NR_INACTIVE_ANON);
+ zone_page_state(zone, NR_INACTIVE_ANON) +
+ zone_page_state(zone, NR_ISOLATED_ANON);
return nr;
}
@@ -411,7 +412,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
struct shrinker *shrinker;
unsigned long freed = 0;
- if (memcg && !memcg_kmem_is_active(memcg))
+ if (memcg && !memcg_kmem_online(memcg))
return 0;
if (nr_scanned == 0)
@@ -594,7 +595,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
- trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
+ trace_mm_vmscan_writepage(page);
inc_zone_page_state(page, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* inode reclaim needs to empty out the radix tree or
* the nodes are lost. Don't plant shadows behind its
* back.
+ *
+ * We also don't store shadows for DAX mappings because the
+ * only page cache pages found in these are zero pages
+ * covering holes, and because we don't want to mix DAX
+ * exceptional entries and shadow exceptional entries in the
+ * same page_tree.
*/
if (reclaimed && page_is_file_cache(page) &&
- !mapping_exiting(mapping))
+ !mapping_exiting(mapping) && !dax_mapping(mapping))
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow, memcg);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -906,6 +913,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
int may_enter_fs;
enum page_references references = PAGEREF_RECLAIM_CLEAN;
bool dirty, writeback;
+ bool lazyfree = false;
+ int ret = SWAP_SUCCESS;
cond_resched();
@@ -1049,6 +1058,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto keep_locked;
if (!add_to_swap(page, page_list))
goto activate_locked;
+ lazyfree = true;
may_enter_fs = 1;
/* Adding to swap updated mapping */
@@ -1060,14 +1070,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {
- switch (try_to_unmap(page,
- ttu_flags|TTU_BATCH_FLUSH)) {
+ switch (ret = try_to_unmap(page, lazyfree ?
+ (ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) :
+ (ttu_flags | TTU_BATCH_FLUSH))) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
goto keep_locked;
case SWAP_MLOCK:
goto cull_mlocked;
+ case SWAP_LZFREE:
+ goto lazyfree;
case SWAP_SUCCESS:
; /* try to free the page below */
}
@@ -1174,6 +1187,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
}
}
+lazyfree:
if (!mapping || !__remove_mapping(mapping, page, true))
goto keep_locked;
@@ -1184,8 +1198,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* we obviously don't have to worry about waking up a process
* waiting on the page lock, because there are no references.
*/
- __clear_page_locked(page);
+ __ClearPageLocked(page);
free_it:
+ if (ret == SWAP_LZFREE)
+ count_vm_event(PGLAZYFREED);
+
nr_reclaimed++;
/*
@@ -1204,7 +1221,7 @@ cull_mlocked:
activate_locked:
/* Not a candidate for swapping, so reclaim swap space. */
- if (PageSwapCache(page) && vm_swap_full())
+ if (PageSwapCache(page) && mem_cgroup_swap_full(page))
try_to_free_swap(page);
VM_BUG_ON_PAGE(PageActive(page), page);
SetPageActive(page);
@@ -1426,6 +1443,7 @@ int isolate_lru_page(struct page *page)
int ret = -EBUSY;
VM_BUG_ON_PAGE(!page_count(page), page);
+ VM_BUG_ON_PAGE(PageTail(page), page);
if (PageLRU(page)) {
struct zone *zone = page_zone(page);
@@ -1691,11 +1709,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
current_may_throttle())
wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
- trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
- zone_idx(zone),
- nr_scanned, nr_reclaimed,
- sc->priority,
- trace_shrink_flags(file));
+ trace_mm_vmscan_lru_shrink_inactive(zone, nr_scanned, nr_reclaimed,
+ sc->priority, file);
return nr_reclaimed;
}
@@ -1958,10 +1973,11 @@ enum scan_balance {
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
-static void get_scan_count(struct lruvec *lruvec, int swappiness,
+static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
struct scan_control *sc, unsigned long *nr,
unsigned long *lru_pages)
{
+ int swappiness = mem_cgroup_swappiness(memcg);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
u64 denominator = 0; /* gcc */
@@ -1988,14 +2004,14 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness,
if (current_is_kswapd()) {
if (!zone_reclaimable(zone))
force_scan = true;
- if (!mem_cgroup_lruvec_online(lruvec))
+ if (!mem_cgroup_online(memcg))
force_scan = true;
}
if (!global_reclaim(sc))
force_scan = true;
/* If we have no swap space, do not bother scanning anon pages. */
- if (!sc->may_swap || (get_nr_swap_pages() <= 0)) {
+ if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -2046,10 +2062,16 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness,
}
/*
- * There is enough inactive page cache, do not reclaim
- * anything from the anonymous working set right now.
+ * If there is enough inactive page cache, i.e. if the size of the
+ * inactive list is greater than that of the active list *and* the
+ * inactive list actually has some pages to scan on this priority, we
+ * do not reclaim anything from the anonymous working set right now.
+ * Without the second condition we could end up never scanning an
+ * lruvec even if it has plenty of old anonymous pages unless the
+ * system is under heavy pressure.
*/
- if (!inactive_file_is_low(lruvec)) {
+ if (!inactive_file_is_low(lruvec) &&
+ get_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -2179,9 +2201,10 @@ static inline void init_tlb_ubc(void)
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
- struct scan_control *sc, unsigned long *lru_pages)
+static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg,
+ struct scan_control *sc, unsigned long *lru_pages)
{
+ struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS];
unsigned long nr_to_scan;
@@ -2191,7 +2214,7 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
struct blk_plug plug;
bool scan_adjusted;
- get_scan_count(lruvec, swappiness, sc, nr, lru_pages);
+ get_scan_count(lruvec, memcg, sc, nr, lru_pages);
/* Record the original scan target for proportional adjustments later */
memcpy(targets, nr, sizeof(nr));
@@ -2393,9 +2416,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
unsigned long lru_pages;
+ unsigned long reclaimed;
unsigned long scanned;
- struct lruvec *lruvec;
- int swappiness;
if (mem_cgroup_low(root, memcg)) {
if (!sc->may_thrash)
@@ -2403,11 +2425,10 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
mem_cgroup_events(memcg, MEMCG_LOW, 1);
}
- lruvec = mem_cgroup_zone_lruvec(zone, memcg);
- swappiness = mem_cgroup_swappiness(memcg);
+ reclaimed = sc->nr_reclaimed;
scanned = sc->nr_scanned;
- shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
+ shrink_zone_memcg(zone, memcg, sc, &lru_pages);
zone_lru_pages += lru_pages;
if (memcg && is_classzone)
@@ -2415,6 +2436,11 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
memcg, sc->nr_scanned - scanned,
lru_pages);
+ /* Record the group's reclaim efficiency */
+ vmpressure(sc->gfp_mask, memcg, false,
+ sc->nr_scanned - scanned,
+ sc->nr_reclaimed - reclaimed);
+
/*
* Direct reclaim and kswapd have to scan all memory
* cgroups to fulfill the overall scan target for the
@@ -2446,7 +2472,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
reclaim_state->reclaimed_slab = 0;
}
- vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
+ /* Record the subtree's reclaim efficiency */
+ vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);
@@ -2871,8 +2898,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
.may_unmap = 1,
.may_swap = !noswap,
};
- struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
- int swappiness = mem_cgroup_swappiness(memcg);
unsigned long lru_pages;
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2889,7 +2914,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_lruvec(lruvec, swappiness, &sc, &lru_pages);
+ shrink_zone_memcg(zone, memcg, &sc, &lru_pages);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);