aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/cgroup
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 12:32:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 12:32:41 -0700
commit98931dd95fd489fcbfa97da563505a6f071d7c77 (patch)
tree44683fc4a92efa614acdca2742a7ff19d26da1e3 /tools/testing/selftests/cgroup
parentMerge tag 'kbuild-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild (diff)
parentmm: kfence: use PAGE_ALIGNED helper (diff)
downloadlinux-dev-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.xz
linux-dev-98931dd95fd489fcbfa97da563505a6f071d7c77.zip
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Almost all of MM here. A few things are still getting finished off, reviewed, etc. - Yang Shi has improved the behaviour of khugepaged collapsing of readonly file-backed transparent hugepages. - Johannes Weiner has arranged for zswap memory use to be tracked and managed on a per-cgroup basis. - Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for runtime enablement of the recent huge page vmemmap optimization feature. - Baolin Wang contributes a series to fix some issues around hugetlb pagetable invalidation. - Zhenwei Pi has fixed some interactions between hwpoisoned pages and virtualization. - Tong Tiangen has enabled the use of the presently x86-only page_table_check debugging feature on arm64 and riscv. - David Vernet has done some fixup work on the memcg selftests. - Peter Xu has taught userfaultfd to handle write protection faults against shmem- and hugetlbfs-backed files. - More DAMON development from SeongJae Park - adding online tuning of the feature and support for monitoring of fixed virtual address ranges. Also easier discovery of which monitoring operations are available. - Nadav Amit has done some optimization of TLB flushing during mprotect(). - Neil Brown continues to labor away at improving our swap-over-NFS support. - David Hildenbrand has some fixes to anon page COWing versus get_user_pages(). - Peng Liu fixed some errors in the core hugetlb code. - Joao Martins has reduced the amount of memory consumed by device-dax's compound devmaps. - Some cleanups of the arch-specific pagemap code from Anshuman Khandual. - Muchun Song has found and fixed some errors in the TLB flushing of transparent hugepages. - Roman Gushchin has done more work on the memcg selftests. ... and, of course, many smaller fixes and cleanups. Notably, the customary million cleanup serieses from Miaohe Lin" * tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits) mm: kfence: use PAGE_ALIGNED helper selftests: vm: add the "settings" file with timeout variable selftests: vm: add "test_hmm.sh" to TEST_FILES selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests selftests: vm: add migration to the .gitignore selftests/vm/pkeys: fix typo in comment ksm: fix typo in comment selftests: vm: add process_mrelease tests Revert "mm/vmscan: never demote for memcg reclaim" mm/kfence: print disabling or re-enabling message include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace" include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion" mm: fix a potential infinite loop in start_isolate_page_range() MAINTAINERS: add Muchun as co-maintainer for HugeTLB zram: fix Kconfig dependency warning mm/shmem: fix shmem folio swapoff hang cgroup: fix an error handling path in alloc_pagecache_max_30M() mm: damon: use HPAGE_PMD_SIZE tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate nodemask.h: fix compilation error with GCC12 ...
Diffstat (limited to 'tools/testing/selftests/cgroup')
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c56
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h1
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c199
3 files changed, 199 insertions, 57 deletions
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 4297d580e3f8..4c52cc6f2f9c 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -19,6 +19,7 @@
#include "cgroup_util.h"
#include "../clone3/clone3_selftests.h"
+/* Returns read len on success, or -errno on failure. */
static ssize_t read_text(const char *path, char *buf, size_t max_len)
{
ssize_t len;
@@ -26,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
fd = open(path, O_RDONLY);
if (fd < 0)
- return fd;
+ return -errno;
len = read(fd, buf, max_len - 1);
- if (len < 0)
- goto out;
- buf[len] = 0;
-out:
+ if (len >= 0)
+ buf[len] = 0;
+
close(fd);
- return len;
+ return len < 0 ? -errno : len;
}
+/* Returns written len on success, or -errno on failure. */
static ssize_t write_text(const char *path, char *buf, ssize_t len)
{
int fd;
fd = open(path, O_WRONLY | O_APPEND);
if (fd < 0)
- return fd;
+ return -errno;
len = write(fd, buf, len);
- if (len < 0) {
- close(fd);
- return len;
- }
-
close(fd);
-
- return len;
+ return len < 0 ? -errno : len;
}
char *cg_name(const char *root, const char *name)
@@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control)
return ret;
}
+/* Returns 0 on success, or -errno on failure. */
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
{
char path[PATH_MAX];
+ ssize_t ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
- if (read_text(path, buf, len) >= 0)
- return 0;
-
- return -1;
+ ret = read_text(path, buf, len);
+ return ret >= 0 ? 0 : ret;
}
int cg_read_strcmp(const char *cgroup, const char *control,
@@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control)
return cnt;
}
+/* Returns 0 on success, or -errno on failure. */
int cg_write(const char *cgroup, const char *control, char *buf)
{
char path[PATH_MAX];
- ssize_t len = strlen(buf);
+ ssize_t len = strlen(buf), ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
-
- if (write_text(path, buf, len) == len)
- return 0;
-
- return -1;
+ ret = write_text(path, buf, len);
+ return ret == len ? 0 : ret;
}
int cg_write_numeric(const char *cgroup, const char *control, long value)
@@ -547,6 +540,18 @@ int set_oom_adj_score(int pid, int score)
return 0;
}
+int proc_mount_contains(const char *option)
+{
+ char buf[4 * PAGE_SIZE];
+ ssize_t read;
+
+ read = read_text("/proc/mounts", buf, sizeof(buf));
+ if (read < 0)
+ return read;
+
+ return strstr(buf, option) != NULL;
+}
+
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
{
char path[PATH_MAX];
@@ -557,7 +562,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t
else
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
- return read_text(path, buf, size);
+ size = read_text(path, buf, size);
+ return size < 0 ? -1 : size;
}
int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 2ee2119281d7..c92df4e5d395 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -52,6 +52,7 @@ extern int is_swap_enabled(void);
extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
+int proc_mount_contains(const char *option);
extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
extern pid_t clone_into_cgroup(int cgroup_fd);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 36ccf2322e21..44a974ec472c 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -21,6 +21,9 @@
#include "../kselftest.h"
#include "cgroup_util.h"
+static bool has_localevents;
+static bool has_recursiveprot;
+
/*
* This test creates two nested cgroups with and without enabling
* the memory controller.
@@ -211,13 +214,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
static int alloc_anon_noexit(const char *cgroup, void *arg)
{
int ppid = getppid();
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
- if (alloc_anon(cgroup, arg))
- return -1;
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
while (getppid() == ppid)
sleep(1);
+ free(buf);
return 0;
}
@@ -244,8 +251,8 @@ static int cg_test_proc_killed(const char *cgroup)
* A/B memory.min = 50M, memory.current = 50M
* A/B/C memory.min = 75M, memory.current = 50M
* A/B/D memory.min = 25M, memory.current = 50M
- * A/B/E memory.min = 500M, memory.current = 0
- * A/B/F memory.min = 0, memory.current = 50M
+ * A/B/E memory.min = 0, memory.current = 50M
+ * A/B/F memory.min = 500M, memory.current = 0
*
* Usages are pagecache, but the test keeps a running
* process in every leaf cgroup.
@@ -255,7 +262,7 @@ static int cg_test_proc_killed(const char *cgroup)
* A/B memory.current ~= 50M
* A/B/C memory.current ~= 33M
* A/B/D memory.current ~= 17M
- * A/B/E memory.current ~= 0
+ * A/B/F memory.current ~= 0
*
* After that it tries to allocate more than there is
* unprotected memory in A available, and checks
@@ -321,7 +328,7 @@ static int test_memcg_min(const char *root)
if (cg_create(children[i]))
goto cleanup;
- if (i == 2)
+ if (i > 2)
continue;
cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
@@ -336,9 +343,9 @@ static int test_memcg_min(const char *root)
goto cleanup;
if (cg_write(children[1], "memory.min", "25M"))
goto cleanup;
- if (cg_write(children[2], "memory.min", "500M"))
+ if (cg_write(children[2], "memory.min", "0"))
goto cleanup;
- if (cg_write(children[3], "memory.min", "0"))
+ if (cg_write(children[3], "memory.min", "500M"))
goto cleanup;
attempts = 0;
@@ -364,7 +371,7 @@ static int test_memcg_min(const char *root)
if (!values_close(c[1], MB(17), 10))
goto cleanup;
- if (!values_close(c[2], 0, 1))
+ if (c[3] != 0)
goto cleanup;
if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
@@ -401,8 +408,8 @@ cleanup:
* A/B memory.low = 50M, memory.current = 50M
* A/B/C memory.low = 75M, memory.current = 50M
* A/B/D memory.low = 25M, memory.current = 50M
- * A/B/E memory.low = 500M, memory.current = 0
- * A/B/F memory.low = 0, memory.current = 50M
+ * A/B/E memory.low = 0, memory.current = 50M
+ * A/B/F memory.low = 500M, memory.current = 0
*
* Usages are pagecache.
* Then it creates A/G an creates a significant
@@ -412,7 +419,7 @@ cleanup:
* A/B memory.current ~= 50M
* A/B/ memory.current ~= 33M
* A/B/D memory.current ~= 17M
- * A/B/E memory.current ~= 0
+ * A/B/F memory.current ~= 0
*
* After that it tries to allocate more than there is
* unprotected memory in A available,
@@ -476,7 +483,7 @@ static int test_memcg_low(const char *root)
if (cg_create(children[i]))
goto cleanup;
- if (i == 2)
+ if (i > 2)
continue;
if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
@@ -491,9 +498,9 @@ static int test_memcg_low(const char *root)
goto cleanup;
if (cg_write(children[1], "memory.low", "25M"))
goto cleanup;
- if (cg_write(children[2], "memory.low", "500M"))
+ if (cg_write(children[2], "memory.low", "0"))
goto cleanup;
- if (cg_write(children[3], "memory.low", "0"))
+ if (cg_write(children[3], "memory.low", "500M"))
goto cleanup;
if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
@@ -511,7 +518,7 @@ static int test_memcg_low(const char *root)
if (!values_close(c[1], MB(17), 10))
goto cleanup;
- if (!values_close(c[2], 0, 1))
+ if (c[3] != 0)
goto cleanup;
if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
@@ -521,15 +528,18 @@ static int test_memcg_low(const char *root)
}
for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int no_low_events_index = has_recursiveprot ? 2 : 1;
+
oom = cg_read_key_long(children[i], "memory.events", "oom ");
low = cg_read_key_long(children[i], "memory.events", "low ");
if (oom)
goto cleanup;
- if (i < 2 && low <= 0)
+ if (i <= no_low_events_index && low <= 0)
goto cleanup;
- if (i >= 2 && low)
+ if (i > no_low_events_index && low)
goto cleanup;
+
}
ret = KSFT_PASS;
@@ -558,9 +568,14 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
{
size_t size = MB(50);
int ret = -1;
- long current;
+ long current, high, max;
int fd;
+ high = cg_read_long(cgroup, "memory.high");
+ max = cg_read_long(cgroup, "memory.max");
+ if (high != MB(30) && max != MB(30))
+ return -1;
+
fd = get_temp_fd();
if (fd < 0)
return -1;
@@ -569,7 +584,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
goto cleanup;
current = cg_read_long(cgroup, "memory.current");
- if (current <= MB(29) || current > MB(30))
+ if (!values_close(current, MB(30), 5))
goto cleanup;
ret = 0;
@@ -607,7 +622,7 @@ static int test_memcg_high(const char *root)
if (cg_write(memcg, "memory.high", "30M"))
goto cleanup;
- if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+ if (cg_run(memcg, alloc_anon, (void *)MB(31)))
goto cleanup;
if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
@@ -756,6 +771,111 @@ cleanup:
return ret;
}
+/*
+ * This test checks that memory.reclaim reclaims the given
+ * amount of memory (from both anon and file, if possible).
+ */
+static int test_memcg_reclaim(const char *root)
+{
+ int ret = KSFT_FAIL, fd, retries;
+ char *memcg;
+ long current, expected_usage, to_reclaim;
+ char buf[64];
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current != 0)
+ goto cleanup;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
+
+ /*
+ * If swap is enabled, try to reclaim from both anon and file, else try
+ * to reclaim from file only.
+ */
+ if (is_swap_enabled()) {
+ cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
+ expected_usage = MB(100);
+ } else
+ expected_usage = MB(50);
+
+ /*
+ * Wait until current usage reaches the expected usage (or we run out of
+ * retries).
+ */
+ retries = 5;
+ while (!values_close(cg_read_long(memcg, "memory.current"),
+ expected_usage, 10)) {
+ if (retries--) {
+ sleep(1);
+ continue;
+ } else {
+ fprintf(stderr,
+ "failed to allocate %ld for memcg reclaim test\n",
+ expected_usage);
+ goto cleanup;
+ }
+ }
+
+ /*
+ * Reclaim until current reaches 30M, this makes sure we hit both anon
+ * and file if swap is enabled.
+ */
+ retries = 5;
+ while (true) {
+ int err;
+
+ current = cg_read_long(memcg, "memory.current");
+ to_reclaim = current - MB(30);
+
+ /*
+ * We only keep looping if we get EAGAIN, which means we could
+ * not reclaim the full amount.
+ */
+ if (to_reclaim <= 0)
+ goto cleanup;
+
+
+ snprintf(buf, sizeof(buf), "%ld", to_reclaim);
+ err = cg_write(memcg, "memory.reclaim", buf);
+ if (!err) {
+ /*
+ * If writing succeeds, then the written amount should have been
+ * fully reclaimed (and maybe more).
+ */
+ current = cg_read_long(memcg, "memory.current");
+ if (!values_close(current, MB(30), 3) && current > MB(30))
+ goto cleanup;
+ break;
+ }
+
+ /* The kernel could not reclaim the full amount, try again. */
+ if (err == -EAGAIN && retries--)
+ continue;
+
+ /* We got an unexpected error or ran out of retries. */
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+ close(fd);
+
+ return ret;
+}
+
static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
{
long mem_max = (long)arg;
@@ -987,9 +1107,6 @@ static int tcp_client(const char *cgroup, unsigned short port)
if (current < 0 || sock < 0)
goto close_sk;
- if (current < sock)
- goto close_sk;
-
if (values_close(current, sock, 10)) {
ret = KSFT_PASS;
break;
@@ -1079,12 +1196,14 @@ cleanup:
/*
* This test disables swapping and tries to allocate anonymous memory
* up to OOM with memory.group.oom set. Then it checks that all
- * processes in the leaf (but not the parent) were killed.
+ * processes in the leaf were killed. It also checks that oom_events
+ * were propagated to the parent level.
*/
static int test_memcg_oom_group_leaf_events(const char *root)
{
int ret = KSFT_FAIL;
char *parent, *child;
+ long parent_oom_events;
parent = cg_name(root, "memcg_test_0");
child = cg_name(root, "memcg_test_0/memcg_test_1");
@@ -1122,7 +1241,7 @@ static int test_memcg_oom_group_leaf_events(const char *root)
if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
goto cleanup;
- if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+ if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0)
goto cleanup;
ret = KSFT_PASS;
@@ -1230,14 +1349,20 @@ static int test_memcg_oom_group_score_events(const char *root)
if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
goto cleanup;
- if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
- goto cleanup;
+ parent_oom_events = cg_read_key_long(
+ parent, "memory.events", "oom_kill ");
+ /*
+ * If memory_localevents is not enabled (the default), the parent should
+ * count OOM events in its children groups. Otherwise, it should not
+ * have observed any events.
+ */
+ if ((has_localevents && parent_oom_events == 0) ||
+ parent_oom_events > 0)
+ ret = KSFT_PASS;
if (kill(safe_pid, SIGKILL))
goto cleanup;
- ret = KSFT_PASS;
-
cleanup:
if (memcg)
cg_destroy(memcg);
@@ -1246,7 +1371,6 @@ cleanup:
return ret;
}
-
#define T(x) { x, #x }
struct memcg_test {
int (*fn)(const char *root);
@@ -1259,6 +1383,7 @@ struct memcg_test {
T(test_memcg_high),
T(test_memcg_high_sync),
T(test_memcg_max),
+ T(test_memcg_reclaim),
T(test_memcg_oom_events),
T(test_memcg_swap_max),
T(test_memcg_sock),
@@ -1271,7 +1396,7 @@ struct memcg_test {
int main(int argc, char **argv)
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i, proc_status, ret = EXIT_SUCCESS;
if (cg_find_unified_root(root, sizeof(root)))
ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -1287,6 +1412,16 @@ int main(int argc, char **argv)
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
+ proc_status = proc_mount_contains("memory_recursiveprot");
+ if (proc_status < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ has_recursiveprot = proc_status;
+
+ proc_status = proc_mount_contains("memory_localevents");
+ if (proc_status < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ has_localevents = proc_status;
+
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS: