aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h1
-rw-r--r--include/linux/swap.h6
-rw-r--r--mm/memcontrol.c118
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/swap_state.c5
-rw-r--r--mm/swapfile.c4
6 files changed, 128 insertions, 10 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 27123e597eca..6e0126230878 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -163,6 +163,7 @@ struct mem_cgroup {
/* Accounted resources */
struct page_counter memory;
+ struct page_counter swap;
/* Legacy consumer-oriented counters */
struct page_counter memsw;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 414e101cd061..83b95f343ab1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -368,11 +368,17 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
#endif
#ifdef CONFIG_MEMCG_SWAP
extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
+extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
#else
static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
}
+static inline int mem_cgroup_try_charge_swap(struct page *page,
+ swp_entry_t entry)
+{
+ return 0;
+}
static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
{
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f6bc78f4ed13..1ff552e3722b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1220,7 +1220,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
pr_cont(":");
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
- if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account())
+ if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
K(mem_cgroup_read_stat(iter, i)));
@@ -1259,9 +1259,12 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
limit = memcg->memory.limit;
if (mem_cgroup_swappiness(memcg)) {
unsigned long memsw_limit;
+ unsigned long swap_limit;
memsw_limit = memcg->memsw.limit;
- limit = min(limit + total_swap_pages, memsw_limit);
+ swap_limit = memcg->swap.limit;
+ swap_limit = min(swap_limit, (unsigned long)total_swap_pages);
+ limit = min(limit + swap_limit, memsw_limit);
}
return limit;
}
@@ -4201,11 +4204,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (parent && parent->use_hierarchy) {
memcg->use_hierarchy = true;
page_counter_init(&memcg->memory, &parent->memory);
+ page_counter_init(&memcg->swap, &parent->swap);
page_counter_init(&memcg->memsw, &parent->memsw);
page_counter_init(&memcg->kmem, &parent->kmem);
page_counter_init(&memcg->tcpmem, &parent->tcpmem);
} else {
page_counter_init(&memcg->memory, NULL);
+ page_counter_init(&memcg->swap, NULL);
page_counter_init(&memcg->memsw, NULL);
page_counter_init(&memcg->kmem, NULL);
page_counter_init(&memcg->tcpmem, NULL);
@@ -5224,7 +5229,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
if (page->mem_cgroup)
goto out;
- if (do_memsw_account()) {
+ if (do_swap_account) {
swp_entry_t ent = { .val = page_private(page), };
unsigned short id = lookup_swap_cgroup_id(ent);
@@ -5677,26 +5682,66 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
memcg_check_events(memcg, page);
}
+/*
+ * mem_cgroup_try_charge_swap - try charging a swap entry
+ * @page: page being added to swap
+ * @entry: swap entry to charge
+ *
+ * Try to charge @entry to the memcg that @page belongs to.
+ *
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+{
+ struct mem_cgroup *memcg;
+ struct page_counter *counter;
+ unsigned short oldid;
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) || !do_swap_account)
+ return 0;
+
+ memcg = page->mem_cgroup;
+
+ /* Readahead page, never charged */
+ if (!memcg)
+ return 0;
+
+ if (!mem_cgroup_is_root(memcg) &&
+ !page_counter_try_charge(&memcg->swap, 1, &counter))
+ return -ENOMEM;
+
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
+
+ css_get(&memcg->css);
+ return 0;
+}
+
/**
* mem_cgroup_uncharge_swap - uncharge a swap entry
* @entry: swap entry to uncharge
*
- * Drop the memsw charge associated with @entry.
+ * Drop the swap charge associated with @entry.
*/
void mem_cgroup_uncharge_swap(swp_entry_t entry)
{
struct mem_cgroup *memcg;
unsigned short id;
- if (!do_memsw_account())
+ if (!do_swap_account)
return;
id = swap_cgroup_record(entry, 0);
rcu_read_lock();
memcg = mem_cgroup_from_id(id);
if (memcg) {
- if (!mem_cgroup_is_root(memcg))
- page_counter_uncharge(&memcg->memsw, 1);
+ if (!mem_cgroup_is_root(memcg)) {
+ if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ page_counter_uncharge(&memcg->swap, 1);
+ else
+ page_counter_uncharge(&memcg->memsw, 1);
+ }
mem_cgroup_swap_statistics(memcg, false);
css_put(&memcg->css);
}
@@ -5720,6 +5765,63 @@ static int __init enable_swap_account(char *s)
}
__setup("swapaccount=", enable_swap_account);
+static u64 swap_current_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+ return (u64)page_counter_read(&memcg->swap) * PAGE_SIZE;
+}
+
+static int swap_max_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+ unsigned long max = READ_ONCE(memcg->swap.limit);
+
+ if (max == PAGE_COUNTER_MAX)
+ seq_puts(m, "max\n");
+ else
+ seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE);
+
+ return 0;
+}
+
+static ssize_t swap_max_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ unsigned long max;
+ int err;
+
+ buf = strstrip(buf);
+ err = page_counter_memparse(buf, "max", &max);
+ if (err)
+ return err;
+
+ mutex_lock(&memcg_limit_mutex);
+ err = page_counter_limit(&memcg->swap, max);
+ mutex_unlock(&memcg_limit_mutex);
+ if (err)
+ return err;
+
+ return nbytes;
+}
+
+static struct cftype swap_files[] = {
+ {
+ .name = "swap.current",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_u64 = swap_current_read,
+ },
+ {
+ .name = "swap.max",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = swap_max_show,
+ .write = swap_max_write,
+ },
+ { } /* terminate */
+};
+
static struct cftype memsw_cgroup_files[] = {
{
.name = "memsw.usage_in_bytes",
@@ -5751,6 +5853,8 @@ static int __init mem_cgroup_swap_init(void)
{
if (!mem_cgroup_disabled() && really_do_swap_account) {
do_swap_account = 1;
+ WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys,
+ swap_files));
WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
memsw_cgroup_files));
}
diff --git a/mm/shmem.c b/mm/shmem.c
index b98e1011858c..fa2ceb2d2655 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -912,6 +912,9 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (!swap.val)
goto redirty;
+ if (mem_cgroup_try_charge_swap(page, swap))
+ goto free_swap;
+
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
* if it's not already there. Do it now before the page is
@@ -940,6 +943,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
}
mutex_unlock(&shmem_swaplist_mutex);
+free_swap:
swapcache_free(swap);
redirty:
set_page_dirty(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 676ff2991380..69cb2464e7dc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -170,6 +170,11 @@ int add_to_swap(struct page *page, struct list_head *list)
if (!entry.val)
return 0;
+ if (mem_cgroup_try_charge_swap(page, entry)) {
+ swapcache_free(entry);
+ return 0;
+ }
+
if (unlikely(PageTransHuge(page)))
if (unlikely(split_huge_page_to_list(page, list))) {
swapcache_free(entry);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2bb30aa3a412..22a7a1fc1e47 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -785,14 +785,12 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
count--;
}
- if (!count)
- mem_cgroup_uncharge_swap(entry);
-
usage = count | has_cache;
p->swap_map[offset] = usage;
/* free if no reference */
if (!usage) {
+ mem_cgroup_uncharge_swap(entry);
dec_cluster_info_page(p, p->cluster_info, offset);
if (offset < p->lowest_bit)
p->lowest_bit = offset;