aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst18
-rw-r--r--include/linux/swap.h3
-rw-r--r--mm/memcontrol-v1.c4
-rw-r--r--mm/memcontrol.c53
-rw-r--r--mm/vmscan.c25
5 files changed, 81 insertions, 22 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 8fbb0519d556..3ef815845003 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1299,17 +1299,10 @@ PAGE_SIZE multiple when read back.
This is a simple interface to trigger memory reclaim in the
target cgroup.
- This file accepts a single key, the number of bytes to reclaim.
- No nested keys are currently supported.
-
Example::
echo "1G" > memory.reclaim
- The interface can be later extended with nested keys to
- configure the reclaim behavior. For example, specify the
- type of memory to reclaim from (anon, file, ..).
-
Please note that the kernel can over or under reclaim from
the target cgroup. If less bytes are reclaimed than the
specified amount, -EAGAIN is returned.
@@ -1321,6 +1314,17 @@ PAGE_SIZE multiple when read back.
This means that the networking layer will not adapt based on
reclaim induced by memory.reclaim.
+The following nested keys are defined.
+
+ ========== ================================
+ swappiness Swappiness value to reclaim with
+ ========== ================================
+
+ Specifying a swappiness value instructs the kernel to perform
+ the reclaim with that swappiness value. Note that this has the
+ same semantics as vm.swappiness applied to memcg reclaim with
+ all the existing limitations and potential future extensions.
+
memory.peak
A read-only single value file which exists on non-root
cgroups.
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e97db0fa7659..3df75d62a835 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -409,7 +409,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options);
+ unsigned int reclaim_options,
+ int *swappiness);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 98da8d0af4d0..a3ea4a5e8c96 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -2241,7 +2241,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
- memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
+ memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, NULL)) {
ret = -EBUSY;
break;
}
@@ -2273,7 +2273,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
return -EINTR;
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
- MEMCG_RECLAIM_MAY_SWAP))
+ MEMCG_RECLAIM_MAY_SWAP, NULL))
nr_retries--;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 53a8491634b9..6fb1c933e03a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -48,6 +48,7 @@
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
+#include <linux/parser.h>
#include <linux/vmpressure.h>
#include <linux/memremap.h>
#include <linux/mm_inline.h>
@@ -1921,7 +1922,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
psi_memstall_enter(&pflags);
nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
gfp_mask,
- MEMCG_RECLAIM_MAY_SWAP);
+ MEMCG_RECLAIM_MAY_SWAP,
+ NULL);
psi_memstall_leave(&pflags);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
@@ -2227,7 +2229,7 @@ retry:
psi_memstall_enter(&pflags);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
- gfp_mask, reclaim_options);
+ gfp_mask, reclaim_options, NULL);
psi_memstall_leave(&pflags);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -4097,7 +4099,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
}
reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
- GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
+ GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, NULL);
if (!reclaimed && !nr_retries--)
break;
@@ -4146,7 +4148,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
- GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
+ GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, NULL))
nr_reclaims--;
continue;
}
@@ -4276,19 +4278,50 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
+enum {
+ MEMORY_RECLAIM_SWAPPINESS = 0,
+ MEMORY_RECLAIM_NULL,
+};
+
+static const match_table_t tokens = {
+ { MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
+ { MEMORY_RECLAIM_NULL, NULL },
+};
+
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
+ int swappiness = -1;
unsigned int reclaim_options;
- int err;
+ char *old_buf, *start;
+ substring_t args[MAX_OPT_ARGS];
buf = strstrip(buf);
- err = page_counter_memparse(buf, "", &nr_to_reclaim);
- if (err)
- return err;
+
+ old_buf = buf;
+ nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
+ if (buf == old_buf)
+ return -EINVAL;
+
+ buf = strstrip(buf);
+
+ while ((start = strsep(&buf, " ")) != NULL) {
+ if (!strlen(start))
+ continue;
+ switch (match_token(start, tokens, args)) {
+ case MEMORY_RECLAIM_SWAPPINESS:
+ if (match_int(&args[0], &swappiness))
+ return -EINVAL;
+ if (swappiness < MIN_SWAPPINESS || swappiness > MAX_SWAPPINESS)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
while (nr_reclaimed < nr_to_reclaim) {
@@ -4308,7 +4341,9 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
lru_add_drain_all();
reclaimed = try_to_free_mem_cgroup_pages(memcg,
- batch_size, GFP_KERNEL, reclaim_options);
+ batch_size, GFP_KERNEL,
+ reclaim_options,
+ swappiness == -1 ? NULL : &swappiness);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 91f99558650d..3d4c681c6d40 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -92,6 +92,11 @@ struct scan_control {
unsigned long anon_cost;
unsigned long file_cost;
+#ifdef CONFIG_MEMCG
+ /* Swappiness value for proactive reclaim. Always use sc_swappiness()! */
+ int *proactive_swappiness;
+#endif
+
/* Can active folios be deactivated as part of reclaim? */
#define DEACTIVATE_ANON 1
#define DEACTIVATE_FILE 2
@@ -236,6 +241,13 @@ static bool writeback_throttling_sane(struct scan_control *sc)
#endif
return false;
}
+
+static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg)
+{
+ if (sc->proactive && sc->proactive_swappiness)
+ return *sc->proactive_swappiness;
+ return mem_cgroup_swappiness(memcg);
+}
#else
static bool cgroup_reclaim(struct scan_control *sc)
{
@@ -251,6 +263,11 @@ static bool writeback_throttling_sane(struct scan_control *sc)
{
return true;
}
+
+static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg)
+{
+ return READ_ONCE(vm_swappiness);
+}
#endif
static void set_task_reclaim_state(struct task_struct *task,
@@ -2351,7 +2368,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
unsigned long anon_cost, file_cost, total_cost;
- int swappiness = mem_cgroup_swappiness(memcg);
+ int swappiness = sc_swappiness(sc, memcg);
u64 fraction[ANON_AND_FILE];
u64 denominator = 0; /* gcc */
enum scan_balance scan_balance;
@@ -2632,7 +2649,7 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
return 0;
- return mem_cgroup_swappiness(memcg);
+ return sc_swappiness(sc, memcg);
}
static int get_nr_gens(struct lruvec *lruvec, int type)
@@ -6549,12 +6566,14 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options)
+ unsigned int reclaim_options,
+ int *swappiness)
{
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
+ .proactive_swappiness = swappiness,
.gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
.reclaim_idx = MAX_NR_ZONES - 1,