aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 12:32:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 12:32:41 -0700
commit98931dd95fd489fcbfa97da563505a6f071d7c77 (patch)
tree44683fc4a92efa614acdca2742a7ff19d26da1e3 /tools
parentMerge tag 'kbuild-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild (diff)
parentmm: kfence: use PAGE_ALIGNED helper (diff)
downloadlinux-dev-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.xz
linux-dev-98931dd95fd489fcbfa97da563505a6f071d7c77.zip
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Almost all of MM here. A few things are still getting finished off, reviewed, etc. - Yang Shi has improved the behaviour of khugepaged collapsing of readonly file-backed transparent hugepages. - Johannes Weiner has arranged for zswap memory use to be tracked and managed on a per-cgroup basis. - Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for runtime enablement of the recent huge page vmemmap optimization feature. - Baolin Wang contributes a series to fix some issues around hugetlb pagetable invalidation. - Zhenwei Pi has fixed some interactions between hwpoisoned pages and virtualization. - Tong Tiangen has enabled the use of the presently x86-only page_table_check debugging feature on arm64 and riscv. - David Vernet has done some fixup work on the memcg selftests. - Peter Xu has taught userfaultfd to handle write protection faults against shmem- and hugetlbfs-backed files. - More DAMON development from SeongJae Park - adding online tuning of the feature and support for monitoring of fixed virtual address ranges. Also easier discovery of which monitoring operations are available. - Nadav Amit has done some optimization of TLB flushing during mprotect(). - Neil Brown continues to labor away at improving our swap-over-NFS support. - David Hildenbrand has some fixes to anon page COWing versus get_user_pages(). - Peng Liu fixed some errors in the core hugetlb code. - Joao Martins has reduced the amount of memory consumed by device-dax's compound devmaps. - Some cleanups of the arch-specific pagemap code from Anshuman Khandual. - Muchun Song has found and fixed some errors in the TLB flushing of transparent hugepages. - Roman Gushchin has done more work on the memcg selftests. ... and, of course, many smaller fixes and cleanups. Notably, the customary million cleanup serieses from Miaohe Lin" * tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits) mm: kfence: use PAGE_ALIGNED helper selftests: vm: add the "settings" file with timeout variable selftests: vm: add "test_hmm.sh" to TEST_FILES selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests selftests: vm: add migration to the .gitignore selftests/vm/pkeys: fix typo in comment ksm: fix typo in comment selftests: vm: add process_mrelease tests Revert "mm/vmscan: never demote for memcg reclaim" mm/kfence: print disabling or re-enabling message include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace" include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion" mm: fix a potential infinite loop in start_isolate_page_range() MAINTAINERS: add Muchun as co-maintainer for HugeTLB zram: fix Kconfig dependency warning mm/shmem: fix shmem folio swapoff hang cgroup: fix an error handling path in alloc_pagecache_max_30M() mm: damon: use HPAGE_PMD_SIZE tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate nodemask.h: fix compilation error with GCC12 ...
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c56
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h1
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c199
-rw-r--r--tools/testing/selftests/damon/sysfs.sh1
-rw-r--r--tools/testing/selftests/vm/.gitignore3
-rw-r--r--tools/testing/selftests/vm/Makefile14
-rw-r--r--tools/testing/selftests/vm/config2
-rw-r--r--tools/testing/selftests/vm/gup_test.c24
-rw-r--r--tools/testing/selftests/vm/hugepage-mremap.c6
-rw-r--r--tools/testing/selftests/vm/ksm_tests.c9
-rw-r--r--tools/testing/selftests/vm/madv_populate.c34
-rw-r--r--tools/testing/selftests/vm/migration.c193
-rw-r--r--tools/testing/selftests/vm/mrelease_test.c200
-rw-r--r--tools/testing/selftests/vm/protection_keys.c2
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh525
-rw-r--r--tools/testing/selftests/vm/settings1
-rw-r--r--tools/testing/selftests/vm/soft-dirty.c145
-rw-r--r--tools/testing/selftests/vm/split_huge_page_test.c79
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c13
-rw-r--r--tools/testing/selftests/vm/vm_util.c108
-rw-r--r--tools/testing/selftests/vm/vm_util.h9
-rw-r--r--tools/vm/page-types.c8
-rw-r--r--tools/vm/page_owner_sort.c386
23 files changed, 1333 insertions, 685 deletions
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 4297d580e3f8..4c52cc6f2f9c 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -19,6 +19,7 @@
#include "cgroup_util.h"
#include "../clone3/clone3_selftests.h"
+/* Returns read len on success, or -errno on failure. */
static ssize_t read_text(const char *path, char *buf, size_t max_len)
{
ssize_t len;
@@ -26,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
fd = open(path, O_RDONLY);
if (fd < 0)
- return fd;
+ return -errno;
len = read(fd, buf, max_len - 1);
- if (len < 0)
- goto out;
- buf[len] = 0;
-out:
+ if (len >= 0)
+ buf[len] = 0;
+
close(fd);
- return len;
+ return len < 0 ? -errno : len;
}
+/* Returns written len on success, or -errno on failure. */
static ssize_t write_text(const char *path, char *buf, ssize_t len)
{
int fd;
fd = open(path, O_WRONLY | O_APPEND);
if (fd < 0)
- return fd;
+ return -errno;
len = write(fd, buf, len);
- if (len < 0) {
- close(fd);
- return len;
- }
-
close(fd);
-
- return len;
+ return len < 0 ? -errno : len;
}
char *cg_name(const char *root, const char *name)
@@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control)
return ret;
}
+/* Returns 0 on success, or -errno on failure. */
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
{
char path[PATH_MAX];
+ ssize_t ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
- if (read_text(path, buf, len) >= 0)
- return 0;
-
- return -1;
+ ret = read_text(path, buf, len);
+ return ret >= 0 ? 0 : ret;
}
int cg_read_strcmp(const char *cgroup, const char *control,
@@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control)
return cnt;
}
+/* Returns 0 on success, or -errno on failure. */
int cg_write(const char *cgroup, const char *control, char *buf)
{
char path[PATH_MAX];
- ssize_t len = strlen(buf);
+ ssize_t len = strlen(buf), ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
-
- if (write_text(path, buf, len) == len)
- return 0;
-
- return -1;
+ ret = write_text(path, buf, len);
+ return ret == len ? 0 : ret;
}
int cg_write_numeric(const char *cgroup, const char *control, long value)
@@ -547,6 +540,18 @@ int set_oom_adj_score(int pid, int score)
return 0;
}
+int proc_mount_contains(const char *option)
+{
+ char buf[4 * PAGE_SIZE];
+ ssize_t read;
+
+ read = read_text("/proc/mounts", buf, sizeof(buf));
+ if (read < 0)
+ return read;
+
+ return strstr(buf, option) != NULL;
+}
+
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
{
char path[PATH_MAX];
@@ -557,7 +562,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t
else
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
- return read_text(path, buf, size);
+ size = read_text(path, buf, size);
+ return size < 0 ? -1 : size;
}
int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 2ee2119281d7..c92df4e5d395 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -52,6 +52,7 @@ extern int is_swap_enabled(void);
extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
+int proc_mount_contains(const char *option);
extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
extern pid_t clone_into_cgroup(int cgroup_fd);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 36ccf2322e21..44a974ec472c 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -21,6 +21,9 @@
#include "../kselftest.h"
#include "cgroup_util.h"
+static bool has_localevents;
+static bool has_recursiveprot;
+
/*
* This test creates two nested cgroups with and without enabling
* the memory controller.
@@ -211,13 +214,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
static int alloc_anon_noexit(const char *cgroup, void *arg)
{
int ppid = getppid();
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
- if (alloc_anon(cgroup, arg))
- return -1;
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
while (getppid() == ppid)
sleep(1);
+ free(buf);
return 0;
}
@@ -244,8 +251,8 @@ static int cg_test_proc_killed(const char *cgroup)
* A/B memory.min = 50M, memory.current = 50M
* A/B/C memory.min = 75M, memory.current = 50M
* A/B/D memory.min = 25M, memory.current = 50M
- * A/B/E memory.min = 500M, memory.current = 0
- * A/B/F memory.min = 0, memory.current = 50M
+ * A/B/E memory.min = 0, memory.current = 50M
+ * A/B/F memory.min = 500M, memory.current = 0
*
* Usages are pagecache, but the test keeps a running
* process in every leaf cgroup.
@@ -255,7 +262,7 @@ static int cg_test_proc_killed(const char *cgroup)
* A/B memory.current ~= 50M
* A/B/C memory.current ~= 33M
* A/B/D memory.current ~= 17M
- * A/B/E memory.current ~= 0
+ * A/B/F memory.current ~= 0
*
* After that it tries to allocate more than there is
* unprotected memory in A available, and checks
@@ -321,7 +328,7 @@ static int test_memcg_min(const char *root)
if (cg_create(children[i]))
goto cleanup;
- if (i == 2)
+ if (i > 2)
continue;
cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
@@ -336,9 +343,9 @@ static int test_memcg_min(const char *root)
goto cleanup;
if (cg_write(children[1], "memory.min", "25M"))
goto cleanup;
- if (cg_write(children[2], "memory.min", "500M"))
+ if (cg_write(children[2], "memory.min", "0"))
goto cleanup;
- if (cg_write(children[3], "memory.min", "0"))
+ if (cg_write(children[3], "memory.min", "500M"))
goto cleanup;
attempts = 0;
@@ -364,7 +371,7 @@ static int test_memcg_min(const char *root)
if (!values_close(c[1], MB(17), 10))
goto cleanup;
- if (!values_close(c[2], 0, 1))
+ if (c[3] != 0)
goto cleanup;
if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
@@ -401,8 +408,8 @@ cleanup:
* A/B memory.low = 50M, memory.current = 50M
* A/B/C memory.low = 75M, memory.current = 50M
* A/B/D memory.low = 25M, memory.current = 50M
- * A/B/E memory.low = 500M, memory.current = 0
- * A/B/F memory.low = 0, memory.current = 50M
+ * A/B/E memory.low = 0, memory.current = 50M
+ * A/B/F memory.low = 500M, memory.current = 0
*
* Usages are pagecache.
* Then it creates A/G an creates a significant
@@ -412,7 +419,7 @@ cleanup:
* A/B memory.current ~= 50M
* A/B/ memory.current ~= 33M
* A/B/D memory.current ~= 17M
- * A/B/E memory.current ~= 0
+ * A/B/F memory.current ~= 0
*
* After that it tries to allocate more than there is
* unprotected memory in A available,
@@ -476,7 +483,7 @@ static int test_memcg_low(const char *root)
if (cg_create(children[i]))
goto cleanup;
- if (i == 2)
+ if (i > 2)
continue;
if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
@@ -491,9 +498,9 @@ static int test_memcg_low(const char *root)
goto cleanup;
if (cg_write(children[1], "memory.low", "25M"))
goto cleanup;
- if (cg_write(children[2], "memory.low", "500M"))
+ if (cg_write(children[2], "memory.low", "0"))
goto cleanup;
- if (cg_write(children[3], "memory.low", "0"))
+ if (cg_write(children[3], "memory.low", "500M"))
goto cleanup;
if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
@@ -511,7 +518,7 @@ static int test_memcg_low(const char *root)
if (!values_close(c[1], MB(17), 10))
goto cleanup;
- if (!values_close(c[2], 0, 1))
+ if (c[3] != 0)
goto cleanup;
if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
@@ -521,15 +528,18 @@ static int test_memcg_low(const char *root)
}
for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int no_low_events_index = has_recursiveprot ? 2 : 1;
+
oom = cg_read_key_long(children[i], "memory.events", "oom ");
low = cg_read_key_long(children[i], "memory.events", "low ");
if (oom)
goto cleanup;
- if (i < 2 && low <= 0)
+ if (i <= no_low_events_index && low <= 0)
goto cleanup;
- if (i >= 2 && low)
+ if (i > no_low_events_index && low)
goto cleanup;
+
}
ret = KSFT_PASS;
@@ -558,9 +568,14 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
{
size_t size = MB(50);
int ret = -1;
- long current;
+ long current, high, max;
int fd;
+ high = cg_read_long(cgroup, "memory.high");
+ max = cg_read_long(cgroup, "memory.max");
+ if (high != MB(30) && max != MB(30))
+ return -1;
+
fd = get_temp_fd();
if (fd < 0)
return -1;
@@ -569,7 +584,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
goto cleanup;
current = cg_read_long(cgroup, "memory.current");
- if (current <= MB(29) || current > MB(30))
+ if (!values_close(current, MB(30), 5))
goto cleanup;
ret = 0;
@@ -607,7 +622,7 @@ static int test_memcg_high(const char *root)
if (cg_write(memcg, "memory.high", "30M"))
goto cleanup;
- if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+ if (cg_run(memcg, alloc_anon, (void *)MB(31)))
goto cleanup;
if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
@@ -756,6 +771,111 @@ cleanup:
return ret;
}
+/*
+ * This test checks that memory.reclaim reclaims the given
+ * amount of memory (from both anon and file, if possible).
+ */
+static int test_memcg_reclaim(const char *root)
+{
+ int ret = KSFT_FAIL, fd, retries;
+ char *memcg;
+ long current, expected_usage, to_reclaim;
+ char buf[64];
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current != 0)
+ goto cleanup;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
+
+ /*
+ * If swap is enabled, try to reclaim from both anon and file, else try
+ * to reclaim from file only.
+ */
+ if (is_swap_enabled()) {
+ cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
+ expected_usage = MB(100);
+ } else
+ expected_usage = MB(50);
+
+ /*
+ * Wait until current usage reaches the expected usage (or we run out of
+ * retries).
+ */
+ retries = 5;
+ while (!values_close(cg_read_long(memcg, "memory.current"),
+ expected_usage, 10)) {
+ if (retries--) {
+ sleep(1);
+ continue;
+ } else {
+ fprintf(stderr,
+ "failed to allocate %ld for memcg reclaim test\n",
+ expected_usage);
+ goto cleanup;
+ }
+ }
+
+ /*
+ * Reclaim until current reaches 30M, this makes sure we hit both anon
+ * and file if swap is enabled.
+ */
+ retries = 5;
+ while (true) {
+ int err;
+
+ current = cg_read_long(memcg, "memory.current");
+ to_reclaim = current - MB(30);
+
+ /*
+ * We only keep looping if we get EAGAIN, which means we could
+ * not reclaim the full amount.
+ */
+ if (to_reclaim <= 0)
+ goto cleanup;
+
+
+ snprintf(buf, sizeof(buf), "%ld", to_reclaim);
+ err = cg_write(memcg, "memory.reclaim", buf);
+ if (!err) {
+ /*
+ * If writing succeeds, then the written amount should have been
+ * fully reclaimed (and maybe more).
+ */
+ current = cg_read_long(memcg, "memory.current");
+ if (!values_close(current, MB(30), 3) && current > MB(30))
+ goto cleanup;
+ break;
+ }
+
+ /* The kernel could not reclaim the full amount, try again. */
+ if (err == -EAGAIN && retries--)
+ continue;
+
+ /* We got an unexpected error or ran out of retries. */
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+ close(fd);
+
+ return ret;
+}
+
static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
{
long mem_max = (long)arg;
@@ -987,9 +1107,6 @@ static int tcp_client(const char *cgroup, unsigned short port)
if (current < 0 || sock < 0)
goto close_sk;
- if (current < sock)
- goto close_sk;
-
if (values_close(current, sock, 10)) {
ret = KSFT_PASS;
break;
@@ -1079,12 +1196,14 @@ cleanup:
/*
* This test disables swapping and tries to allocate anonymous memory
* up to OOM with memory.group.oom set. Then it checks that all
- * processes in the leaf (but not the parent) were killed.
+ * processes in the leaf were killed. It also checks that oom_events
+ * were propagated to the parent level.
*/
static int test_memcg_oom_group_leaf_events(const char *root)
{
int ret = KSFT_FAIL;
char *parent, *child;
+ long parent_oom_events;
parent = cg_name(root, "memcg_test_0");
child = cg_name(root, "memcg_test_0/memcg_test_1");
@@ -1122,7 +1241,7 @@ static int test_memcg_oom_group_leaf_events(const char *root)
if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
goto cleanup;
- if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+ if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0)
goto cleanup;
ret = KSFT_PASS;
@@ -1230,14 +1349,20 @@ static int test_memcg_oom_group_score_events(const char *root)
if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
goto cleanup;
- if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
- goto cleanup;
+ parent_oom_events = cg_read_key_long(
+ parent, "memory.events", "oom_kill ");
+ /*
+ * If memory_localevents is not enabled (the default), the parent should
+ * count OOM events in its children groups. Otherwise, it should not
+ * have observed any events.
+ */
+ if ((has_localevents && parent_oom_events == 0) ||
+ parent_oom_events > 0)
+ ret = KSFT_PASS;
if (kill(safe_pid, SIGKILL))
goto cleanup;
- ret = KSFT_PASS;
-
cleanup:
if (memcg)
cg_destroy(memcg);
@@ -1246,7 +1371,6 @@ cleanup:
return ret;
}
-
#define T(x) { x, #x }
struct memcg_test {
int (*fn)(const char *root);
@@ -1259,6 +1383,7 @@ struct memcg_test {
T(test_memcg_high),
T(test_memcg_high_sync),
T(test_memcg_max),
+ T(test_memcg_reclaim),
T(test_memcg_oom_events),
T(test_memcg_swap_max),
T(test_memcg_sock),
@@ -1271,7 +1396,7 @@ struct memcg_test {
int main(int argc, char **argv)
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i, proc_status, ret = EXIT_SUCCESS;
if (cg_find_unified_root(root, sizeof(root)))
ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -1287,6 +1412,16 @@ int main(int argc, char **argv)
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
+ proc_status = proc_mount_contains("memory_recursiveprot");
+ if (proc_status < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ has_recursiveprot = proc_status;
+
+ proc_status = proc_mount_contains("memory_localevents");
+ if (proc_status < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ has_localevents = proc_status;
+
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 2e3ae77cb6db..89592c64462f 100644
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -231,6 +231,7 @@ test_context()
{
context_dir=$1
ensure_dir "$context_dir" "exist"
+ ensure_file "$context_dir/avail_operations" "exit" 400
ensure_file "$context_dir/operations" "exist" 600
test_monitoring_attrs "$context_dir/monitoring_attrs"
test_targets "$context_dir/targets"
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index d7507f3c7c76..31e5eea2a9b9 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -9,7 +9,9 @@ map_hugetlb
map_populate
thuge-gen
compaction_test
+migration
mlock2-tests
+mrelease_test
mremap_dontunmap
mremap_test
on-fault-limit
@@ -29,5 +31,6 @@ write_to_hugetlbfs
hmm-tests
memfd_secret
local_config.*
+soft-dirty
split_huge_page_test
ksm_tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 5b1ecd00695b..44f25acfbeca 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -36,20 +36,23 @@ TEST_GEN_FILES += hugepage-mremap
TEST_GEN_FILES += hugepage-shm
TEST_GEN_FILES += hugepage-vmemmap
TEST_GEN_FILES += khugepaged
-TEST_GEN_FILES += madv_populate
+TEST_GEN_PROGS = madv_populate
TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
TEST_GEN_FILES += memfd_secret
+TEST_GEN_FILES += migration
TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += mrelease_test
TEST_GEN_FILES += mremap_dontunmap
TEST_GEN_FILES += mremap_test
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
-TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_PROGS += soft-dirty
+TEST_GEN_PROGS += split_huge_page_test
TEST_GEN_FILES += ksm_tests
ifeq ($(MACHINE),x86_64)
@@ -89,10 +92,15 @@ endif
TEST_PROGS := run_vmtests.sh
TEST_FILES := test_vmalloc.sh
+TEST_FILES += test_hmm.sh
KSFT_KHDR_INSTALL := 1
include ../lib.mk
+$(OUTPUT)/madv_populate: vm_util.c
+$(OUTPUT)/soft-dirty: vm_util.c
+$(OUTPUT)/split_huge_page_test: vm_util.c
+
ifeq ($(MACHINE),x86_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
@@ -149,6 +157,8 @@ $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
+$(OUTPUT)/migration: LDLIBS += -lnuma
+
local_config.mk local_config.h: check_config.sh
/bin/sh ./check_config.sh $(CC)
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 60e82da0de85..be087c4bc396 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -4,3 +4,5 @@ CONFIG_TEST_VMALLOC=m
CONFIG_DEVICE_PRIVATE=y
CONFIG_TEST_HMM=m
CONFIG_GUP_TEST=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_MEM_SOFT_DIRTY=y
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index cda837a14736..6bb36ca71cb5 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -1,7 +1,9 @@
#include <fcntl.h>
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <dirent.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
@@ -9,6 +11,7 @@
#include <pthread.h>
#include <assert.h>
#include "../../../../mm/gup_test.h"
+#include "../kselftest.h"
#include "util.h"
@@ -18,6 +21,8 @@
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
+#define GUP_TEST_FILE "/sys/kernel/debug/gup_test"
+
static unsigned long cmd = GUP_FAST_BENCHMARK;
static int gup_fd, repeats = 1;
static unsigned long size = 128 * MB;
@@ -206,8 +211,23 @@ int main(int argc, char **argv)
gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
if (gup_fd == -1) {
- perror("open");
- exit(1);
+ switch (errno) {
+ case EACCES:
+ if (getuid())
+ printf("Please run this test as root\n");
+ break;
+ case ENOENT:
+ if (opendir("/sys/kernel/debug") == NULL) {
+ printf("mount debugfs at /sys/kernel/debug\n");
+ break;
+ }
+ printf("check if CONFIG_GUP_TEST is enabled in kernel config\n");
+ break;
+ default:
+ perror("failed to open /sys/kernel/debug/gup_test");
+ break;
+ }
+ exit(KSFT_SKIP);
}
p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index 1d689084a54b..585978f181ed 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -178,6 +178,12 @@ int main(int argc, char *argv[])
munmap(addr, length);
+ addr = mremap(addr, length, length, 0);
+ if (addr != MAP_FAILED) {
+ printf("mremap: Expected failure, but call succeeded\n");
+ exit(1);
+ }
+
close(fd);
unlink(argv[argc-1]);
diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index fd85f15869d1..2fcf24312da8 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -221,7 +221,8 @@ static bool assert_ksm_pages_count(long dupl_page_count)
static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
{
if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
- ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
+ numa_available() ? 0 :
+ ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
@@ -236,7 +237,8 @@ static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
{
if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
- ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
+ numa_available() ? 0 :
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
@@ -720,7 +722,8 @@ int main(int argc, char *argv[])
if (ksm_write_sysfs(KSM_FP("run"), 2) ||
ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
- ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
+ numa_available() ? 0 :
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
return KSFT_FAIL;
diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c
index 3ee0e8275600..715a42e8e2cd 100644
--- a/tools/testing/selftests/vm/madv_populate.c
+++ b/tools/testing/selftests/vm/madv_populate.c
@@ -18,6 +18,7 @@
#include <sys/mman.h>
#include "../kselftest.h"
+#include "vm_util.h"
/*
* For now, we're using 2 MiB of private anonymous memory for all tests.
@@ -26,18 +27,6 @@
static size_t pagesize;
-static uint64_t pagemap_get_entry(int fd, char *start)
-{
- const unsigned long pfn = (unsigned long)start / pagesize;
- uint64_t entry;
- int ret;
-
- ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
- if (ret != sizeof(entry))
- ksft_exit_fail_msg("reading pagemap failed\n");
- return entry;
-}
-
static bool pagemap_is_populated(int fd, char *start)
{
uint64_t entry = pagemap_get_entry(fd, start);
@@ -46,13 +35,6 @@ static bool pagemap_is_populated(int fd, char *start)
return entry & 0xc000000000000000ull;
}
-static bool pagemap_is_softdirty(int fd, char *start)
-{
- uint64_t entry = pagemap_get_entry(fd, start);
-
- return entry & 0x0080000000000000ull;
-}
-
static void sense_support(void)
{
char *addr;
@@ -258,20 +240,6 @@ static bool range_is_not_softdirty(char *start, ssize_t size)
return ret;
}
-static void clear_softdirty(void)
-{
- int fd = open("/proc/self/clear_refs", O_WRONLY);
- const char *ctrl = "4";
- int ret;
-
- if (fd < 0)
- ksft_exit_fail_msg("opening clear_refs failed\n");
- ret = write(fd, ctrl, strlen(ctrl));
- if (ret != strlen(ctrl))
- ksft_exit_fail_msg("writing clear_refs failed\n");
- close(fd);
-}
-
static void test_softdirty(void)
{
char *addr;
diff --git a/tools/testing/selftests/vm/migration.c b/tools/testing/selftests/vm/migration.c
new file mode 100644
index 000000000000..1cec8425e3ca
--- /dev/null
+++ b/tools/testing/selftests/vm/migration.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The main purpose of the tests here is to exercise the migration entry code
+ * paths in the kernel.
+ */
+
+#include "../kselftest_harness.h"
+#include <strings.h>
+#include <pthread.h>
+#include <numa.h>
+#include <numaif.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <time.h>
+
+#define TWOMEG (2<<20)
+#define RUNTIME (60)
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(migration)
+{
+ pthread_t *threads;
+ pid_t *pids;
+ int nthreads;
+ int n1;
+ int n2;
+};
+
+FIXTURE_SETUP(migration)
+{
+ int n;
+
+ ASSERT_EQ(numa_available(), 0);
+ self->nthreads = numa_num_task_cpus() - 1;
+ self->n1 = -1;
+ self->n2 = -1;
+
+ for (n = 0; n < numa_max_possible_node(); n++)
+ if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
+ if (self->n1 == -1) {
+ self->n1 = n;
+ } else {
+ self->n2 = n;
+ break;
+ }
+ }
+
+ self->threads = malloc(self->nthreads * sizeof(*self->threads));
+ ASSERT_NE(self->threads, NULL);
+ self->pids = malloc(self->nthreads * sizeof(*self->pids));
+ ASSERT_NE(self->pids, NULL);
+};
+
+FIXTURE_TEARDOWN(migration)
+{
+ free(self->threads);
+ free(self->pids);
+}
+
+int migrate(uint64_t *ptr, int n1, int n2)
+{
+ int ret, tmp;
+ int status = 0;
+ struct timespec ts1, ts2;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts1))
+ return -1;
+
+ while (1) {
+ if (clock_gettime(CLOCK_MONOTONIC, &ts2))
+ return -1;
+
+ if (ts2.tv_sec - ts1.tv_sec >= RUNTIME)
+ return 0;
+
+ ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
+ MPOL_MF_MOVE_ALL);
+ if (ret) {
+ if (ret > 0)
+ printf("Didn't migrate %d pages\n", ret);
+ else
+ perror("Couldn't migrate pages");
+ return -2;
+ }
+
+ tmp = n2;
+ n2 = n1;
+ n1 = tmp;
+ }
+
+ return 0;
+}
+
+void *access_mem(void *ptr)
+{
+ uint64_t y = 0;
+ volatile uint64_t *x = ptr;
+
+ while (1) {
+ pthread_testcancel();
+ y += *x;
+ }
+
+ return NULL;
+}
+
+/*
+ * Basic migration entry testing. One thread will move pages back and forth
+ * between nodes whilst other threads try and access them triggering the
+ * migration entry wait paths in the kernel.
+ */
+TEST_F_TIMEOUT(migration, private_anon, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * Same as the previous test but with shared memory.
+ */
+TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
+{
+ pid_t pid;
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ pid = fork();
+ if (!pid)
+ access_mem(ptr);
+ else
+ self->pids[i] = pid;
+ }
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+/*
+ * Tests the pmd migration entry paths.
+ */
+TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, 2*TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
+ ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c
new file mode 100644
index 000000000000..96671c2f7d48
--- /dev/null
+++ b/tools/testing/selftests/vm/mrelease_test.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2022 Google LLC
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "util.h"
+
+#include "../kselftest.h"
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open -1
+#endif
+
+#ifndef __NR_process_mrelease
+#define __NR_process_mrelease -1
+#endif
+
+#define MB(x) (x << 20)
+#define MAX_SIZE_MB 1024
+
+static int alloc_noexit(unsigned long nr_pages, int pipefd)
+{
+ int ppid = getppid();
+ int timeout = 10; /* 10sec timeout to get killed */
+ unsigned long i;
+ char *buf;
+
+ buf = (char *)mmap(NULL, nr_pages * PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, 0, 0);
+ if (buf == MAP_FAILED) {
+ perror("mmap failed, halting the test");
+ return KSFT_FAIL;
+ }
+
+ for (i = 0; i < nr_pages; i++)
+ *((unsigned long *)(buf + (i * PAGE_SIZE))) = i;
+
+ /* Signal the parent that the child is ready */
+ if (write(pipefd, "", 1) < 0) {
+ perror("write");
+ return KSFT_FAIL;
+ }
+
+ /* Wait to be killed (when reparenting happens) */
+ while (getppid() == ppid && timeout > 0) {
+ sleep(1);
+ timeout--;
+ }
+
+ munmap(buf, nr_pages * PAGE_SIZE);
+
+ return (timeout > 0) ? KSFT_PASS : KSFT_FAIL;
+}
+
+/* The process_mrelease calls in this test are expected to fail */
+static void run_negative_tests(int pidfd)
+{
+ /* Test invalid flags. Expect to fail with EINVAL error code. */
+ if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) ||
+ errno != EINVAL) {
+ perror("process_mrelease with wrong flags");
+ exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ }
+ /*
+ * Test reaping while process is alive with no pending SIGKILL.
+ * Expect to fail with EINVAL error code.
+ */
+ if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) {
+ perror("process_mrelease on a live process");
+ exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ }
+}
+
+static int child_main(int pipefd[], size_t size)
+{
+ int res;
+
+ /* Allocate and fault-in memory and wait to be killed */
+ close(pipefd[0]);
+ res = alloc_noexit(MB(size) / PAGE_SIZE, pipefd[1]);
+ close(pipefd[1]);
+ return res;
+}
+
+int main(void)
+{
+ int pipefd[2], pidfd;
+ bool success, retry;
+ size_t size;
+ pid_t pid;
+ char byte;
+ int res;
+
+ /* Test a wrong pidfd */
+ if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) {
+ perror("process_mrelease with wrong pidfd");
+ exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ }
+
+ /* Start the test with 1MB child memory allocation */
+ size = 1;
+retry:
+ /*
+ * Pipe for the child to signal when it's done allocating
+ * memory
+ */
+ if (pipe(pipefd)) {
+ perror("pipe");
+ exit(KSFT_FAIL);
+ }
+ pid = fork();
+ if (pid < 0) {
+ perror("fork");
+ close(pipefd[0]);
+ close(pipefd[1]);
+ exit(KSFT_FAIL);
+ }
+
+ if (pid == 0) {
+ /* Child main routine */
+ res = child_main(pipefd, size);
+ exit(res);
+ }
+
+ /*
+ * Parent main routine:
+ * Wait for the child to finish allocations, then kill and reap
+ */
+ close(pipefd[1]);
+ /* Block until the child is ready */
+ res = read(pipefd[0], &byte, 1);
+ close(pipefd[0]);
+ if (res < 0) {
+ perror("read");
+ if (!kill(pid, SIGKILL))
+ waitpid(pid, NULL, 0);
+ exit(KSFT_FAIL);
+ }
+
+ pidfd = syscall(__NR_pidfd_open, pid, 0);
+ if (pidfd < 0) {
+ perror("pidfd_open");
+ if (!kill(pid, SIGKILL))
+ waitpid(pid, NULL, 0);
+ exit(KSFT_FAIL);
+ }
+
+ /* Run negative tests which require a live child */
+ run_negative_tests(pidfd);
+
+ if (kill(pid, SIGKILL)) {
+ perror("kill");
+ exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ }
+
+ success = (syscall(__NR_process_mrelease, pidfd, 0) == 0);
+ if (!success) {
+ /*
+ * If we failed to reap because the child exited too soon,
+ * before we could call process_mrelease. Double child's memory
+ * which causes it to spend more time on cleanup and increases
+ * our chances of reaping its memory before it exits.
+ * Retry until we succeed or reach MAX_SIZE_MB.
+ */
+ if (errno == ESRCH) {
+ retry = (size <= MAX_SIZE_MB);
+ } else {
+ perror("process_mrelease");
+ waitpid(pid, NULL, 0);
+ exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ }
+ }
+
+ /* Cleanup to prevent zombies */
+ if (waitpid(pid, NULL, 0) < 0) {
+ perror("waitpid");
+ exit(KSFT_FAIL);
+ }
+ close(pidfd);
+
+ if (!success) {
+ if (retry) {
+ size *= 2;
+ goto retry;
+ }
+ printf("All process_mrelease attempts failed!\n");
+ exit(KSFT_FAIL);
+ }
+
+ printf("Success reaping a child with %zuMB of memory allocations\n",
+ size);
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 2d0ae88665db..291bc1e07842 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1523,7 +1523,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
/*
* Reset the shadow, assuming that the above mprotect()
* correctly changed PKRU, but to an unknown value since
- * the actual alllocated pkey is unknown.
+ * the actual allocated pkey is unknown.
*/
shadow_pkey_reg = __read_pkey_reg();
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 352ba00cf26b..41fce8bea929 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -9,12 +9,12 @@ mnt=./huge
exitcode=0
#get huge pagesize and freepages from /proc/meminfo
-while read name size unit; do
+while read -r name size unit; do
if [ "$name" = "HugePages_Free:" ]; then
- freepgs=$size
+ freepgs="$size"
fi
if [ "$name" = "Hugepagesize:" ]; then
- hpgsize_KB=$size
+ hpgsize_KB="$size"
fi
done < /proc/meminfo
@@ -30,27 +30,26 @@ needmem_KB=$((half_ufd_size_MB * 2 * 1024))
#set proper nr_hugepages
if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
- nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+ nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
needpgs=$((needmem_KB / hpgsize_KB))
tries=2
- while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
- lackpgs=$(( $needpgs - $freepgs ))
+ while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do
+ lackpgs=$((needpgs - freepgs))
echo 3 > /proc/sys/vm/drop_caches
- echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
- if [ $? -ne 0 ]; then
+ if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then
echo "Please run this test as root"
exit $ksft_skip
fi
- while read name size unit; do
+ while read -r name size unit; do
if [ "$name" = "HugePages_Free:" ]; then
freepgs=$size
fi
done < /proc/meminfo
tries=$((tries - 1))
done
- if [ $freepgs -lt $needpgs ]; then
+ if [ "$freepgs" -lt "$needpgs" ]; then
printf "Not enough huge pages available (%d < %d)\n" \
- $freepgs $needpgs
+ "$freepgs" "$needpgs"
exit 1
fi
else
@@ -60,458 +59,124 @@ fi
#filter 64bit architectures
ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64"
-if [ -z $ARCH ]; then
- ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'`
+if [ -z "$ARCH" ]; then
+ ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/')
fi
VADDR64=0
-echo "$ARCH64STR" | grep $ARCH && VADDR64=1
-
-mkdir $mnt
-mount -t hugetlbfs none $mnt
-
-echo "---------------------"
-echo "running hugepage-mmap"
-echo "---------------------"
-./hugepage-mmap
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1
+
+# Usage: run_test [test binary] [arbitrary test arguments...]
+run_test() {
+ local title="running $*"
+ local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
+ printf "%s\n%s\n%s\n" "$sep" "$title" "$sep"
+
+ "$@"
+ local ret=$?
+ if [ $ret -eq 0 ]; then
+ echo "[PASS]"
+ elif [ $ret -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+ else
+ echo "[FAIL]"
+ exitcode=1
+ fi
+}
-shmmax=`cat /proc/sys/kernel/shmmax`
-shmall=`cat /proc/sys/kernel/shmall`
+mkdir "$mnt"
+mount -t hugetlbfs none "$mnt"
+
+run_test ./hugepage-mmap
+
+shmmax=$(cat /proc/sys/kernel/shmmax)
+shmall=$(cat /proc/sys/kernel/shmall)
echo 268435456 > /proc/sys/kernel/shmmax
echo 4194304 > /proc/sys/kernel/shmall
-echo "--------------------"
-echo "running hugepage-shm"
-echo "--------------------"
-./hugepage-shm
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-echo $shmmax > /proc/sys/kernel/shmmax
-echo $shmall > /proc/sys/kernel/shmall
-
-echo "-------------------"
-echo "running map_hugetlb"
-echo "-------------------"
-./map_hugetlb
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./hugepage-shm
+echo "$shmmax" > /proc/sys/kernel/shmmax
+echo "$shmall" > /proc/sys/kernel/shmall
-echo "-----------------------"
-echo "running hugepage-mremap"
-echo "-----------------------"
-./hugepage-mremap $mnt/huge_mremap
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-rm -f $mnt/huge_mremap
-
-echo "------------------------"
-echo "running hugepage-vmemmap"
-echo "------------------------"
-./hugepage-vmemmap
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./map_hugetlb
-echo "-----------------------"
-echo "running hugetlb-madvise"
-echo "-----------------------"
-./hugetlb-madvise $mnt/madvise-test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-rm -f $mnt/madvise-test
+run_test ./hugepage-mremap "$mnt"/huge_mremap
+rm -f "$mnt"/huge_mremap
+
+run_test ./hugepage-vmemmap
+
+run_test ./hugetlb-madvise "$mnt"/madvise-test
+rm -f "$mnt"/madvise-test
echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
echo " hugetlb regression testing."
-echo "---------------------------"
-echo "running map_fixed_noreplace"
-echo "---------------------------"
-./map_fixed_noreplace
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./map_fixed_noreplace
-echo "------------------------------------------------------"
-echo "running: gup_test -u # get_user_pages_fast() benchmark"
-echo "------------------------------------------------------"
-./gup_test -u
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+# get_user_pages_fast() benchmark
+run_test ./gup_test -u
+# pin_user_pages_fast() benchmark
+run_test ./gup_test -a
+# Dump pages 0, 19, and 4096, using pin_user_pages:
+run_test ./gup_test -ct -F 0x1 0 19 0x1000
-echo "------------------------------------------------------"
-echo "running: gup_test -a # pin_user_pages_fast() benchmark"
-echo "------------------------------------------------------"
-./gup_test -a
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "------------------------------------------------------------"
-echo "# Dump pages 0, 19, and 4096, using pin_user_pages:"
-echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test"
-echo "------------------------------------------------------------"
-./gup_test -ct -F 0x1 0 19 0x1000
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "-------------------"
-echo "running userfaultfd"
-echo "-------------------"
-./userfaultfd anon 20 16
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "---------------------------"
-echo "running userfaultfd_hugetlb"
-echo "---------------------------"
+run_test ./userfaultfd anon 20 16
# Test requires source and destination huge pages. Size of source
# (half_ufd_size_MB) is passed as argument to test.
-./userfaultfd hugetlb $half_ufd_size_MB 32
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "-------------------------"
-echo "running userfaultfd_shmem"
-echo "-------------------------"
-./userfaultfd shmem 20 16
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32
+run_test ./userfaultfd shmem 20 16
#cleanup
-umount $mnt
-rm -rf $mnt
-echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
-
-echo "-----------------------"
-echo "running compaction_test"
-echo "-----------------------"
-./compaction_test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "----------------------"
-echo "running on-fault-limit"
-echo "----------------------"
-sudo -u nobody ./on-fault-limit
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "--------------------"
-echo "running map_populate"
-echo "--------------------"
-./map_populate
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+umount "$mnt"
+rm -rf "$mnt"
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
-echo "-------------------------"
-echo "running mlock-random-test"
-echo "-------------------------"
-./mlock-random-test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./compaction_test
-echo "--------------------"
-echo "running mlock2-tests"
-echo "--------------------"
-./mlock2-tests
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test sudo -u nobody ./on-fault-limit
-echo "-------------------"
-echo "running mremap_test"
-echo "-------------------"
-./mremap_test
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-echo "-----------------"
-echo "running thuge-gen"
-echo "-----------------"
-./thuge-gen
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-if [ $VADDR64 -ne 0 ]; then
-echo "-----------------------------"
-echo "running virtual_address_range"
-echo "-----------------------------"
-./virtual_address_range
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./map_populate
-echo "-----------------------------"
-echo "running virtual address 128TB switch test"
-echo "-----------------------------"
-./va_128TBswitch
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-fi # VADDR64
+run_test ./mlock-random-test
-echo "------------------------------------"
-echo "running vmalloc stability smoke test"
-echo "------------------------------------"
-./test_vmalloc.sh smoke
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+run_test ./mlock2-tests
-echo "------------------------------------"
-echo "running MREMAP_DONTUNMAP smoke test"
-echo "------------------------------------"
-./mremap_dontunmap
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+run_test ./mrelease_test
-echo "running HMM smoke test"
-echo "------------------------------------"
-./test_hmm.sh smoke
-ret_val=$?
+run_test ./mremap_test
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+run_test ./thuge-gen
-echo "--------------------------------------------------------"
-echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests"
-echo "--------------------------------------------------------"
-./madv_populate
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+if [ $VADDR64 -ne 0 ]; then
+ run_test ./virtual_address_range
-echo "running memfd_secret test"
-echo "------------------------------------"
-./memfd_secret
-ret_val=$?
+ # virtual address 128TB switch test
+ run_test ./va_128TBswitch
+fi # VADDR64
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+# vmalloc stability smoke test
+run_test ./test_vmalloc.sh smoke
-echo "-------------------------------------------------------"
-echo "running KSM MADV_MERGEABLE test with 10 identical pages"
-echo "-------------------------------------------------------"
-./ksm_tests -M -p 10
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+run_test ./mremap_dontunmap
-echo "------------------------"
-echo "running KSM unmerge test"
-echo "------------------------"
-./ksm_tests -U
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+run_test ./test_hmm.sh smoke
-echo "----------------------------------------------------------"
-echo "running KSM test with 10 zero pages and use_zero_pages = 0"
-echo "----------------------------------------------------------"
-./ksm_tests -Z -p 10 -z 0
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-echo "----------------------------------------------------------"
-echo "running KSM test with 10 zero pages and use_zero_pages = 1"
-echo "----------------------------------------------------------"
-./ksm_tests -Z -p 10 -z 1
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-echo "-------------------------------------------------------------"
-echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1"
-echo "-------------------------------------------------------------"
-./ksm_tests -N -m 1
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+run_test ./madv_populate
-echo "-------------------------------------------------------------"
-echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0"
-echo "-------------------------------------------------------------"
-./ksm_tests -N -m 0
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+run_test ./memfd_secret
-exit $exitcode
+# KSM MADV_MERGEABLE test with 10 identical pages
+run_test ./ksm_tests -M -p 10
+# KSM unmerge test
+run_test ./ksm_tests -U
+# KSM test with 10 zero pages and use_zero_pages = 0
+run_test ./ksm_tests -Z -p 10 -z 0
+# KSM test with 10 zero pages and use_zero_pages = 1
+run_test ./ksm_tests -Z -p 10 -z 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 1
+run_test ./ksm_tests -N -m 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 0
+run_test ./ksm_tests -N -m 0
exit $exitcode
diff --git a/tools/testing/selftests/vm/settings b/tools/testing/selftests/vm/settings
new file mode 100644
index 000000000000..9abfc60e9e6f
--- /dev/null
+++ b/tools/testing/selftests/vm/settings
@@ -0,0 +1 @@
+timeout=45
diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c
new file mode 100644
index 000000000000..08ab62a4a9d0
--- /dev/null
+++ b/tools/testing/selftests/vm/soft-dirty.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <sys/mman.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define PAGEMAP_FILE_PATH "/proc/self/pagemap"
+#define TEST_ITERATIONS 10000
+
+static void test_simple(int pagemap_fd, int pagesize)
+{
+ int i;
+ char *map;
+
+ map = aligned_alloc(pagesize, pagesize);
+ if (!map)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ clear_softdirty();
+
+ for (i = 0 ; i < TEST_ITERATIONS; i++) {
+ if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+ ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ // Write something to the page to get the dirty bit enabled on the page
+ map[0]++;
+
+ if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+ ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ }
+ free(map);
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+static void test_vma_reuse(int pagemap_fd, int pagesize)
+{
+ char *map, *map2;
+
+ map = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed");
+
+ // The kernel always marks new regions as soft dirty
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s dirty bit of allocated page\n", __func__);
+
+ clear_softdirty();
+ munmap(map, pagesize);
+
+ map2 = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed");
+
+ // Dirty bit is set for new regions even if they are reused
+ if (map == map2)
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s dirty bit of reused address page\n", __func__);
+ else
+ ksft_test_result_skip("Test %s dirty bit of reused address page\n", __func__);
+
+ munmap(map2, pagesize);
+}
+
+static void test_hugepage(int pagemap_fd, int pagesize)
+{
+ char *map;
+ int i, ret;
+ size_t hpage_len = read_pmd_pagesize();
+
+ map = memalign(hpage_len, hpage_len);
+ if (!map)
+ ksft_exit_fail_msg("memalign failed\n");
+
+ ret = madvise(map, hpage_len, MADV_HUGEPAGE);
+ if (ret)
+ ksft_exit_fail_msg("madvise failed %d\n", ret);
+
+ for (i = 0; i < hpage_len; i++)
+ map[i] = (char)i;
+
+ if (check_huge(map)) {
+ ksft_test_result_pass("Test %s huge page allocation\n", __func__);
+
+ clear_softdirty();
+ for (i = 0 ; i < TEST_ITERATIONS ; i++) {
+ if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+ ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ // Write something to the page to get the dirty bit enabled on the page
+ map[0]++;
+
+ if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+ ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+ clear_softdirty();
+ }
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s huge page dirty bit\n", __func__);
+ } else {
+ // hugepage allocation failed. skip these tests
+ ksft_test_result_skip("Test %s huge page allocation\n", __func__);
+ ksft_test_result_skip("Test %s huge page dirty bit\n", __func__);
+ }
+ free(map);
+}
+
+int main(int argc, char **argv)
+{
+ int pagemap_fd;
+ int pagesize;
+
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH);
+
+ pagesize = getpagesize();
+
+ test_simple(pagemap_fd, pagesize);
+ test_vma_reuse(pagemap_fd, pagesize);
+ test_hugepage(pagemap_fd, pagesize);
+
+ close(pagemap_fd);
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index 52497b7b9f1d..6aa2b8253aed 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -16,14 +16,13 @@
#include <sys/mount.h>
#include <malloc.h>
#include <stdbool.h>
+#include "vm_util.h"
uint64_t pagesize;
unsigned int pageshift;
uint64_t pmd_pagesize;
-#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
-#define SMAP_PATH "/proc/self/smaps"
#define INPUT_MAX 80
#define PID_FMT "%d,0x%lx,0x%lx"
@@ -51,30 +50,6 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
return 0;
}
-
-static uint64_t read_pmd_pagesize(void)
-{
- int fd;
- char buf[20];
- ssize_t num_read;
-
- fd = open(PMD_SIZE_PATH, O_RDONLY);
- if (fd == -1) {
- perror("Open hpage_pmd_size failed");
- exit(EXIT_FAILURE);
- }
- num_read = read(fd, buf, 19);
- if (num_read < 1) {
- close(fd);
- perror("Read hpage_pmd_size failed");
- exit(EXIT_FAILURE);
- }
- buf[num_read] = '\0';
- close(fd);
-
- return strtoul(buf, NULL, 10);
-}
-
static int write_file(const char *path, const char *buf, size_t buflen)
{
int fd;
@@ -113,58 +88,6 @@ static void write_debugfs(const char *fmt, ...)
}
}
-#define MAX_LINE_LENGTH 500
-
-static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
-{
- while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
- if (!strncmp(buf, pattern, strlen(pattern)))
- return true;
- }
- return false;
-}
-
-static uint64_t check_huge(void *addr)
-{
- uint64_t thp = 0;
- int ret;
- FILE *fp;
- char buffer[MAX_LINE_LENGTH];
- char addr_pattern[MAX_LINE_LENGTH];
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
- (unsigned long) addr);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
-
- fp = fopen(SMAP_PATH, "r");
- if (!fp) {
- printf("%s: Failed to open file %s\n", __func__, SMAP_PATH);
- exit(EXIT_FAILURE);
- }
- if (!check_for_pattern(fp, addr_pattern, buffer))
- goto err_out;
-
- /*
- * Fetch the AnonHugePages: in the same block and check the number of
- * hugepages.
- */
- if (!check_for_pattern(fp, "AnonHugePages:", buffer))
- goto err_out;
-
- if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) {
- printf("Reading smap error\n");
- exit(EXIT_FAILURE);
- }
-
-err_out:
- fclose(fp);
- return thp;
-}
-
void split_pmd_thp(void)
{
char *one_page;
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 92a4516f8f0d..0bdfc1955229 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -82,7 +82,7 @@ static int test_type;
static volatile bool test_uffdio_copy_eexist = true;
static volatile bool test_uffdio_zeropage_eexist = true;
/* Whether to test uffd write-protection */
-static bool test_uffdio_wp = false;
+static bool test_uffdio_wp = true;
/* Whether to test uffd minor faults */
static bool test_uffdio_minor = false;
@@ -1422,7 +1422,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
static int userfaultfd_stress(void)
{
void *area;
- char *tmp_area;
unsigned long nr;
struct uffdio_register uffdio_register;
struct uffd_stats uffd_stats[nr_cpus];
@@ -1533,13 +1532,9 @@ static int userfaultfd_stress(void)
count_verify[nr], nr);
/* prepare next bounce */
- tmp_area = area_src;
- area_src = area_dst;
- area_dst = tmp_area;
+ swap(area_src, area_dst);
- tmp_area = area_src_alias;
- area_src_alias = area_dst_alias;
- area_dst_alias = tmp_area;
+ swap(area_src_alias, area_dst_alias);
uffd_stats_report(uffd_stats, nr_cpus);
}
@@ -1594,8 +1589,6 @@ static void set_test_type(const char *type)
if (!strcmp(type, "anon")) {
test_type = TEST_ANON;
uffd_test_ops = &anon_uffd_test_ops;
- /* Only enable write-protect test for anonymous test */
- test_uffdio_wp = true;
} else if (!strcmp(type, "hugetlb")) {
test_type = TEST_HUGETLB;
uffd_test_ops = &hugetlb_uffd_test_ops;
diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c
new file mode 100644
index 000000000000..b58ab11a7a30
--- /dev/null
+++ b/tools/testing/selftests/vm/vm_util.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <fcntl.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+#define SMAP_FILE_PATH "/proc/self/smaps"
+#define MAX_LINE_LENGTH 500
+
+uint64_t pagemap_get_entry(int fd, char *start)
+{
+ const unsigned long pfn = (unsigned long)start / getpagesize();
+ uint64_t entry;
+ int ret;
+
+ ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
+ if (ret != sizeof(entry))
+ ksft_exit_fail_msg("reading pagemap failed\n");
+ return entry;
+}
+
+bool pagemap_is_softdirty(int fd, char *start)
+{
+ uint64_t entry = pagemap_get_entry(fd, start);
+
+ // Check if dirty bit (55th bit) is set
+ return entry & 0x0080000000000000ull;
+}
+
+void clear_softdirty(void)
+{
+ int ret;
+ const char *ctrl = "4";
+ int fd = open("/proc/self/clear_refs", O_WRONLY);
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening clear_refs failed\n");
+ ret = write(fd, ctrl, strlen(ctrl));
+ close(fd);
+ if (ret != strlen(ctrl))
+ ksft_exit_fail_msg("writing clear_refs failed\n");
+}
+
+static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
+{
+ while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
+ if (!strncmp(buf, pattern, strlen(pattern)))
+ return true;
+ }
+ return false;
+}
+
+uint64_t read_pmd_pagesize(void)
+{
+ int fd;
+ char buf[20];
+ ssize_t num_read;
+
+ fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
+ if (fd == -1)
+ ksft_exit_fail_msg("Open hpage_pmd_size failed\n");
+
+ num_read = read(fd, buf, 19);
+ if (num_read < 1) {
+ close(fd);
+ ksft_exit_fail_msg("Read hpage_pmd_size failed\n");
+ }
+ buf[num_read] = '\0';
+ close(fd);
+
+ return strtoul(buf, NULL, 10);
+}
+
+uint64_t check_huge(void *addr)
+{
+ uint64_t thp = 0;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH)
+ ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
+
+ fp = fopen(SMAP_FILE_PATH, "r");
+ if (!fp)
+ ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
+
+ if (!check_for_pattern(fp, addr_pattern, buffer))
+ goto err_out;
+
+ /*
+ * Fetch the AnonHugePages: in the same block and check the number of
+ * hugepages.
+ */
+ if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+ goto err_out;
+
+ if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1)
+ ksft_exit_fail_msg("Reading smap error\n");
+
+err_out:
+ fclose(fp);
+ return thp;
+}
diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h
new file mode 100644
index 000000000000..2e512bd57ae1
--- /dev/null
+++ b/tools/testing/selftests/vm/vm_util.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdint.h>
+#include <stdbool.h>
+
+uint64_t pagemap_get_entry(int fd, char *start);
+bool pagemap_is_softdirty(int fd, char *start);
+void clear_softdirty(void);
+uint64_t read_pmd_pagesize(void);
+uint64_t check_huge(void *addr);
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index b1ed76d9a979..381dcc00cb62 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -80,9 +80,10 @@
#define KPF_SOFTDIRTY 40
#define KPF_ARCH_2 41
-/* [48-] take some arbitrary free slots for expanding overloaded flags
+/* [47-] take some arbitrary free slots for expanding overloaded flags
* not part of kernel API
*/
+#define KPF_ANON_EXCLUSIVE 47
#define KPF_READAHEAD 48
#define KPF_SLOB_FREE 49
#define KPF_SLUB_FROZEN 50
@@ -138,6 +139,7 @@ static const char * const page_flag_names[] = {
[KPF_SOFTDIRTY] = "f:softdirty",
[KPF_ARCH_2] = "H:arch_2",
+ [KPF_ANON_EXCLUSIVE] = "d:anon_exclusive",
[KPF_READAHEAD] = "I:readahead",
[KPF_SLOB_FREE] = "P:slob_free",
[KPF_SLUB_FROZEN] = "A:slub_frozen",
@@ -472,6 +474,10 @@ static int bit_mask_ok(uint64_t flags)
static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
{
+ /* Anonymous pages overload PG_mappedtodisk */
+ if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK)))
+ flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE);
+
/* SLOB/SLUB overload several page flags */
if (flags & BIT(SLAB)) {
if (flags & BIT(PRIVATE))
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index 7d98e76c2291..c149427eb1c9 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -39,6 +39,7 @@ struct block_list {
int page_num;
pid_t pid;
pid_t tgid;
+ int allocator;
};
enum FILTER_BIT {
FILTER_UNRELEASE = 1<<1,
@@ -51,14 +52,39 @@ enum CULL_BIT {
CULL_PID = 1<<2,
CULL_TGID = 1<<3,
CULL_COMM = 1<<4,
- CULL_STACKTRACE = 1<<5
+ CULL_STACKTRACE = 1<<5,
+ CULL_ALLOCATOR = 1<<6
+};
+enum ALLOCATOR_BIT {
+ ALLOCATOR_CMA = 1<<1,
+ ALLOCATOR_SLAB = 1<<2,
+ ALLOCATOR_VMALLOC = 1<<3,
+ ALLOCATOR_OTHERS = 1<<4
+};
+enum ARG_TYPE {
+ ARG_TXT, ARG_COMM, ARG_STACKTRACE, ARG_ALLOC_TS, ARG_FREE_TS,
+ ARG_CULL_TIME, ARG_PAGE_NUM, ARG_PID, ARG_TGID, ARG_UNKNOWN, ARG_FREE,
+ ARG_ALLOCATOR
+};
+enum SORT_ORDER {
+ SORT_ASC = 1,
+ SORT_DESC = -1,
};
struct filter_condition {
- pid_t tgid;
- pid_t pid;
- char comm[TASK_COMM_LEN];
+ pid_t *pids;
+ pid_t *tgids;
+ char **comms;
+ int pids_size;
+ int tgids_size;
+ int comms_size;
+};
+struct sort_condition {
+ int (**cmps)(const void *, const void *);
+ int *signs;
+ int size;
};
static struct filter_condition fc;
+static struct sort_condition sc;
static regex_t order_pattern;
static regex_t pid_pattern;
static regex_t tgid_pattern;
@@ -70,16 +96,22 @@ static int list_size;
static int max_size;
static int cull;
static int filter;
+static bool debug_on;
-int read_block(char *buf, int buf_size, FILE *fin)
+static void set_single_cmp(int (*cmp)(const void *, const void *), int sign);
+
+int read_block(char *buf, char *ext_buf, int buf_size, FILE *fin)
{
char *curr = buf, *const buf_end = buf + buf_size;
while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
- if (*curr == '\n') /* empty line */
+ if (*curr == '\n') { /* empty line */
return curr - buf;
- if (!strncmp(curr, "PFN", 3))
+ }
+ if (!strncmp(curr, "PFN", 3)) {
+ strcpy(ext_buf, curr);
continue;
+ }
curr += strlen(curr);
}
@@ -104,14 +136,14 @@ static int compare_num(const void *p1, const void *p2)
{
const struct block_list *l1 = p1, *l2 = p2;
- return l2->num - l1->num;
+ return l1->num - l2->num;
}
static int compare_page_num(const void *p1, const void *p2)
{
const struct block_list *l1 = p1, *l2 = p2;
- return l2->page_num - l1->page_num;
+ return l1->page_num - l2->page_num;
}
static int compare_pid(const void *p1, const void *p2)
@@ -128,6 +160,13 @@ static int compare_tgid(const void *p1, const void *p2)
return l1->tgid - l2->tgid;
}
+static int compare_allocator(const void *p1, const void *p2)
+{
+ const struct block_list *l1 = p1, *l2 = p2;
+
+ return l1->allocator - l2->allocator;
+}
+
static int compare_comm(const void *p1, const void *p2)
{
const struct block_list *l1 = p1, *l2 = p2;
@@ -149,7 +188,6 @@ static int compare_free_ts(const void *p1, const void *p2)
return l1->free_ts_nsec < l2->free_ts_nsec ? -1 : 1;
}
-
static int compare_release(const void *p1, const void *p2)
{
const struct block_list *l1 = p1, *l2 = p2;
@@ -161,7 +199,6 @@ static int compare_release(const void *p1, const void *p2)
return l1->free_ts_nsec ? 1 : -1;
}
-
static int compare_cull_condition(const void *p1, const void *p2)
{
if (cull == 0)
@@ -176,9 +213,21 @@ static int compare_cull_condition(const void *p1, const void *p2)
return compare_comm(p1, p2);
if ((cull & CULL_UNRELEASE) && compare_release(p1, p2))
return compare_release(p1, p2);
+ if ((cull & CULL_ALLOCATOR) && compare_allocator(p1, p2))
+ return compare_allocator(p1, p2);
return 0;
}
+static int compare_sort_condition(const void *p1, const void *p2)
+{
+ int cmp = 0;
+
+ for (int i = 0; i < sc.size; ++i)
+ if (cmp == 0)
+ cmp = sc.signs[i] * sc.cmps[i](p1, p2);
+ return cmp;
+}
+
static int search_pattern(regex_t *pattern, char *pattern_str, char *buf)
{
int err, val_len;
@@ -186,7 +235,8 @@ static int search_pattern(regex_t *pattern, char *pattern_str, char *buf)
err = regexec(pattern, buf, 2, pmatch, REG_NOTBOL);
if (err != 0 || pmatch[1].rm_so == -1) {
- printf("no matching pattern in %s\n", buf);
+ if (debug_on)
+ fprintf(stderr, "no matching pattern in %s\n", buf);
return -1;
}
val_len = pmatch[1].rm_eo - pmatch[1].rm_so;
@@ -202,7 +252,7 @@ static void check_regcomp(regex_t *pattern, const char *regex)
err = regcomp(pattern, regex, REG_EXTENDED | REG_NEWLINE);
if (err != 0 || pattern->re_nsub != 1) {
- printf("Invalid pattern %s code %d\n", regex, err);
+ fprintf(stderr, "Invalid pattern %s code %d\n", regex, err);
exit(1);
}
}
@@ -251,7 +301,8 @@ static int get_page_num(char *buf)
errno = 0;
order_val = strtol(order_str, &endptr, 10);
if (order_val > 64 || errno != 0 || endptr == order_str || *endptr != '\0') {
- printf("wrong order in follow buf:\n%s\n", buf);
+ if (debug_on)
+ fprintf(stderr, "wrong order in follow buf:\n%s\n", buf);
return 0;
}
@@ -268,7 +319,8 @@ static pid_t get_pid(char *buf)
errno = 0;
pid = strtol(pid_str, &endptr, 10);
if (errno != 0 || endptr == pid_str || *endptr != '\0') {
- printf("wrong/invalid pid in follow buf:\n%s\n", buf);
+ if (debug_on)
+ fprintf(stderr, "wrong/invalid pid in follow buf:\n%s\n", buf);
return -1;
}
@@ -286,7 +338,8 @@ static pid_t get_tgid(char *buf)
errno = 0;
tgid = strtol(tgid_str, &endptr, 10);
if (errno != 0 || endptr == tgid_str || *endptr != '\0') {
- printf("wrong/invalid tgid in follow buf:\n%s\n", buf);
+ if (debug_on)
+ fprintf(stderr, "wrong/invalid tgid in follow buf:\n%s\n", buf);
return -1;
}
@@ -304,7 +357,8 @@ static __u64 get_ts_nsec(char *buf)
errno = 0;
ts_nsec = strtoull(ts_nsec_str, &endptr, 10);
if (errno != 0 || endptr == ts_nsec_str || *endptr != '\0') {
- printf("wrong ts_nsec in follow buf:\n%s\n", buf);
+ if (debug_on)
+ fprintf(stderr, "wrong ts_nsec in follow buf:\n%s\n", buf);
return -1;
}
@@ -321,7 +375,8 @@ static __u64 get_free_ts_nsec(char *buf)
errno = 0;
free_ts_nsec = strtoull(free_ts_nsec_str, &endptr, 10);
if (errno != 0 || endptr == free_ts_nsec_str || *endptr != '\0') {
- printf("wrong free_ts_nsec in follow buf:\n%s\n", buf);
+ if (debug_on)
+ fprintf(stderr, "wrong free_ts_nsec in follow buf:\n%s\n", buf);
return -1;
}
@@ -337,33 +392,104 @@ static char *get_comm(char *buf)
search_pattern(&comm_pattern, comm_str, buf);
errno = 0;
if (errno != 0) {
- printf("wrong comm in follow buf:\n%s\n", buf);
+ if (debug_on)
+ fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf);
return NULL;
}
return comm_str;
}
+static int get_arg_type(const char *arg)
+{
+ if (!strcmp(arg, "pid") || !strcmp(arg, "p"))
+ return ARG_PID;
+ else if (!strcmp(arg, "tgid") || !strcmp(arg, "tg"))
+ return ARG_TGID;
+ else if (!strcmp(arg, "name") || !strcmp(arg, "n"))
+ return ARG_COMM;
+ else if (!strcmp(arg, "stacktrace") || !strcmp(arg, "st"))
+ return ARG_STACKTRACE;
+ else if (!strcmp(arg, "free") || !strcmp(arg, "f"))
+ return ARG_FREE;
+ else if (!strcmp(arg, "txt") || !strcmp(arg, "T"))
+ return ARG_TXT;
+ else if (!strcmp(arg, "free_ts") || !strcmp(arg, "ft"))
+ return ARG_FREE_TS;
+ else if (!strcmp(arg, "alloc_ts") || !strcmp(arg, "at"))
+ return ARG_ALLOC_TS;
+ else if (!strcmp(arg, "allocator") || !strcmp(arg, "ator"))
+ return ARG_ALLOCATOR;
+ else {
+ return ARG_UNKNOWN;
+ }
+}
+
+static int get_allocator(const char *buf, const char *migrate_info)
+{
+ char *tmp, *first_line, *second_line;
+ int allocator = 0;
+
+ if (strstr(migrate_info, "CMA"))
+ allocator |= ALLOCATOR_CMA;
+ if (strstr(migrate_info, "slab"))
+ allocator |= ALLOCATOR_SLAB;
+ tmp = strstr(buf, "__vmalloc_node_range");
+ if (tmp) {
+ second_line = tmp;
+ while (*tmp != '\n')
+ tmp--;
+ tmp--;
+ while (*tmp != '\n')
+ tmp--;
+ first_line = ++tmp;
+ tmp = strstr(tmp, "alloc_pages");
+ if (tmp && first_line <= tmp && tmp < second_line)
+ allocator |= ALLOCATOR_VMALLOC;
+ }
+ if (allocator == 0)
+ allocator = ALLOCATOR_OTHERS;
+ return allocator;
+}
+
+static bool match_num_list(int num, int *list, int list_size)
+{
+ for (int i = 0; i < list_size; ++i)
+ if (list[i] == num)
+ return true;
+ return false;
+}
+
+static bool match_str_list(const char *str, char **list, int list_size)
+{
+ for (int i = 0; i < list_size; ++i)
+ if (!strcmp(list[i], str))
+ return true;
+ return false;
+}
+
static bool is_need(char *buf)
{
if ((filter & FILTER_UNRELEASE) && get_free_ts_nsec(buf) != 0)
return false;
- if ((filter & FILTER_PID) && get_pid(buf) != fc.pid)
+ if ((filter & FILTER_PID) && !match_num_list(get_pid(buf), fc.pids, fc.pids_size))
return false;
- if ((filter & FILTER_TGID) && get_tgid(buf) != fc.tgid)
+ if ((filter & FILTER_TGID) &&
+ !match_num_list(get_tgid(buf), fc.tgids, fc.tgids_size))
return false;
char *comm = get_comm(buf);
if ((filter & FILTER_COMM) &&
- strncmp(comm, fc.comm, TASK_COMM_LEN) != 0) {
+ !match_str_list(comm, fc.comms, fc.comms_size)) {
free(comm);
return false;
}
+ free(comm);
return true;
}
-static void add_list(char *buf, int len)
+static void add_list(char *buf, int len, char *ext_buf)
{
if (list_size != 0 &&
len == list[list_size-1].len &&
@@ -373,7 +499,7 @@ static void add_list(char *buf, int len)
return;
}
if (list_size == max_size) {
- printf("max_size too small??\n");
+ fprintf(stderr, "max_size too small??\n");
exit(1);
}
if (!is_need(buf))
@@ -383,7 +509,7 @@ static void add_list(char *buf, int len)
list[list_size].comm = get_comm(buf);
list[list_size].txt = malloc(len+1);
if (!list[list_size].txt) {
- printf("Out of memory\n");
+ fprintf(stderr, "Out of memory\n");
exit(1);
}
memcpy(list[list_size].txt, buf, len);
@@ -397,6 +523,7 @@ static void add_list(char *buf, int len)
list[list_size].stacktrace++;
list[list_size].ts_nsec = get_ts_nsec(buf);
list[list_size].free_ts_nsec = get_free_ts_nsec(buf);
+ list[list_size].allocator = get_allocator(buf, ext_buf);
list_size++;
if (list_size % 1000 == 0) {
printf("loaded %d\r", list_size);
@@ -409,25 +536,130 @@ static bool parse_cull_args(const char *arg_str)
int size = 0;
char **args = explode(',', arg_str, &size);
- for (int i = 0; i < size; ++i)
- if (!strcmp(args[i], "pid") || !strcmp(args[i], "p"))
+ for (int i = 0; i < size; ++i) {
+ int arg_type = get_arg_type(args[i]);
+
+ if (arg_type == ARG_PID)
cull |= CULL_PID;
- else if (!strcmp(args[i], "tgid") || !strcmp(args[i], "tg"))
+ else if (arg_type == ARG_TGID)
cull |= CULL_TGID;
- else if (!strcmp(args[i], "name") || !strcmp(args[i], "n"))
+ else if (arg_type == ARG_COMM)
cull |= CULL_COMM;
- else if (!strcmp(args[i], "stacktrace") || !strcmp(args[i], "st"))
+ else if (arg_type == ARG_STACKTRACE)
cull |= CULL_STACKTRACE;
- else if (!strcmp(args[i], "free") || !strcmp(args[i], "f"))
+ else if (arg_type == ARG_FREE)
cull |= CULL_UNRELEASE;
+ else if (arg_type == ARG_ALLOCATOR)
+ cull |= CULL_ALLOCATOR;
else {
free_explode(args, size);
return false;
}
+ }
free_explode(args, size);
+ if (sc.size == 0)
+ set_single_cmp(compare_num, SORT_DESC);
return true;
}
+static void set_single_cmp(int (*cmp)(const void *, const void *), int sign)
+{
+ if (sc.signs == NULL || sc.size < 1)
+ sc.signs = calloc(1, sizeof(int));
+ sc.signs[0] = sign;
+ if (sc.cmps == NULL || sc.size < 1)
+ sc.cmps = calloc(1, sizeof(int *));
+ sc.cmps[0] = cmp;
+ sc.size = 1;
+}
+
+static bool parse_sort_args(const char *arg_str)
+{
+ int size = 0;
+
+ if (sc.size != 0) { /* reset sort_condition */
+ free(sc.signs);
+ free(sc.cmps);
+ size = 0;
+ }
+
+ char **args = explode(',', arg_str, &size);
+
+ sc.signs = calloc(size, sizeof(int));
+ sc.cmps = calloc(size, sizeof(int *));
+ for (int i = 0; i < size; ++i) {
+ int offset = 0;
+
+ sc.signs[i] = SORT_ASC;
+ if (args[i][0] == '-' || args[i][0] == '+') {
+ if (args[i][0] == '-')
+ sc.signs[i] = SORT_DESC;
+ offset = 1;
+ }
+
+ int arg_type = get_arg_type(args[i]+offset);
+
+ if (arg_type == ARG_PID)
+ sc.cmps[i] = compare_pid;
+ else if (arg_type == ARG_TGID)
+ sc.cmps[i] = compare_tgid;
+ else if (arg_type == ARG_COMM)
+ sc.cmps[i] = compare_comm;
+ else if (arg_type == ARG_STACKTRACE)
+ sc.cmps[i] = compare_stacktrace;
+ else if (arg_type == ARG_ALLOC_TS)
+ sc.cmps[i] = compare_ts;
+ else if (arg_type == ARG_FREE_TS)
+ sc.cmps[i] = compare_free_ts;
+ else if (arg_type == ARG_TXT)
+ sc.cmps[i] = compare_txt;
+ else if (arg_type == ARG_ALLOCATOR)
+ sc.cmps[i] = compare_allocator;
+ else {
+ free_explode(args, size);
+ sc.size = 0;
+ return false;
+ }
+ }
+ sc.size = size;
+ free_explode(args, size);
+ return true;
+}
+
+static int *parse_nums_list(char *arg_str, int *list_size)
+{
+ int size = 0;
+ char **args = explode(',', arg_str, &size);
+ int *list = calloc(size, sizeof(int));
+
+ errno = 0;
+ for (int i = 0; i < size; ++i) {
+ char *endptr = NULL;
+
+ list[i] = strtol(args[i], &endptr, 10);
+ if (errno != 0 || endptr == args[i] || *endptr != '\0') {
+ free(list);
+ return NULL;
+ }
+ }
+ *list_size = size;
+ free_explode(args, size);
+ return list;
+}
+
+static void print_allocator(FILE *out, int allocator)
+{
+ fprintf(out, "allocated by ");
+ if (allocator & ALLOCATOR_CMA)
+ fprintf(out, "CMA ");
+ if (allocator & ALLOCATOR_SLAB)
+ fprintf(out, "SLAB ");
+ if (allocator & ALLOCATOR_VMALLOC)
+ fprintf(out, "VMALLOC ");
+ if (allocator & ALLOCATOR_OTHERS)
+ fprintf(out, "OTHERS ");
+}
+
#define BUF_SIZE (128 * 1024)
static void usage(void)
@@ -442,19 +674,20 @@ static void usage(void)
"-a\t\tSort by memory allocate time.\n"
"-r\t\tSort by memory release time.\n"
"-f\t\tFilter out the information of blocks whose memory has been released.\n"
- "--pid <PID>\tSelect by pid. This selects the information of blocks whose process ID number equals to <PID>.\n"
- "--tgid <TGID>\tSelect by tgid. This selects the information of blocks whose Thread Group ID number equals to <TGID>.\n"
- "--name <command>\n\t\tSelect by command name. This selects the information of blocks whose command name identical to <command>.\n"
- "--cull <rules>\tCull by user-defined rules. <rules> is a single argument in the form of a comma-separated list with some common fields predefined\n"
+ "-d\t\tPrint debug information.\n"
+ "--pid <pidlist>\tSelect by pid. This selects the information of blocks whose process ID numbers appear in <pidlist>.\n"
+ "--tgid <tgidlist>\tSelect by tgid. This selects the information of blocks whose Thread Group ID numbers appear in <tgidlist>.\n"
+ "--name <cmdlist>\n\t\tSelect by command name. This selects the information of blocks whose command name appears in <cmdlist>.\n"
+ "--cull <rules>\tCull by user-defined rules.<rules> is a single argument in the form of a comma-separated list with some common fields predefined\n"
+ "--sort <order>\tSpecify sort order as: [+|-]key[,[+|-]key[,...]]\n"
);
}
int main(int argc, char **argv)
{
- int (*cmp)(const void *, const void *) = compare_num;
FILE *fin, *fout;
- char *buf, *endptr;
- int ret, i, count;
+ char *buf, *ext_buf;
+ int i, count;
struct stat st;
int opt;
struct option longopts[] = {
@@ -462,64 +695,74 @@ int main(int argc, char **argv)
{ "tgid", required_argument, NULL, 2 },
{ "name", required_argument, NULL, 3 },
{ "cull", required_argument, NULL, 4 },
+ { "sort", required_argument, NULL, 5 },
{ 0, 0, 0, 0},
};
- while ((opt = getopt_long(argc, argv, "afmnprstP", longopts, NULL)) != -1)
+ while ((opt = getopt_long(argc, argv, "adfmnprstP", longopts, NULL)) != -1)
switch (opt) {
case 'a':
- cmp = compare_ts;
+ set_single_cmp(compare_ts, SORT_ASC);
+ break;
+ case 'd':
+ debug_on = true;
break;
case 'f':
filter = filter | FILTER_UNRELEASE;
break;
case 'm':
- cmp = compare_page_num;
+ set_single_cmp(compare_page_num, SORT_DESC);
break;
case 'p':
- cmp = compare_pid;
+ set_single_cmp(compare_pid, SORT_ASC);
break;
case 'r':
- cmp = compare_free_ts;
+ set_single_cmp(compare_free_ts, SORT_ASC);
break;
case 's':
- cmp = compare_stacktrace;
+ set_single_cmp(compare_stacktrace, SORT_ASC);
break;
case 't':
- cmp = compare_num;
+ set_single_cmp(compare_num, SORT_DESC);
break;
case 'P':
- cmp = compare_tgid;
+ set_single_cmp(compare_tgid, SORT_ASC);
break;
case 'n':
- cmp = compare_comm;
+ set_single_cmp(compare_comm, SORT_ASC);
break;
case 1:
filter = filter | FILTER_PID;
- errno = 0;
- fc.pid = strtol(optarg, &endptr, 10);
- if (errno != 0 || endptr == optarg || *endptr != '\0') {
- printf("wrong/invalid pid in from the command line:%s\n", optarg);
+ fc.pids = parse_nums_list(optarg, &fc.pids_size);
+ if (fc.pids == NULL) {
+ fprintf(stderr, "wrong/invalid pid in from the command line:%s\n",
+ optarg);
exit(1);
}
break;
case 2:
filter = filter | FILTER_TGID;
- errno = 0;
- fc.tgid = strtol(optarg, &endptr, 10);
- if (errno != 0 || endptr == optarg || *endptr != '\0') {
- printf("wrong/invalid tgid in from the command line:%s\n", optarg);
+ fc.tgids = parse_nums_list(optarg, &fc.tgids_size);
+ if (fc.tgids == NULL) {
+ fprintf(stderr, "wrong/invalid tgid in from the command line:%s\n",
+ optarg);
exit(1);
}
break;
case 3:
filter = filter | FILTER_COMM;
- strncpy(fc.comm, optarg, TASK_COMM_LEN);
- fc.comm[TASK_COMM_LEN-1] = '\0';
+ fc.comms = explode(',', optarg, &fc.comms_size);
break;
case 4:
if (!parse_cull_args(optarg)) {
- printf("wrong argument after --cull in from the command line:%s\n",
+ fprintf(stderr, "wrong argument after --cull option:%s\n",
+ optarg);
+ exit(1);
+ }
+ break;
+ case 5:
+ if (!parse_sort_args(optarg)) {
+ fprintf(stderr, "wrong argument after --sort option:%s\n",
optarg);
exit(1);
}
@@ -553,17 +796,18 @@ int main(int argc, char **argv)
list = malloc(max_size * sizeof(*list));
buf = malloc(BUF_SIZE);
- if (!list || !buf) {
- printf("Out of memory\n");
+ ext_buf = malloc(BUF_SIZE);
+ if (!list || !buf || !ext_buf) {
+ fprintf(stderr, "Out of memory\n");
exit(1);
}
for ( ; ; ) {
- ret = read_block(buf, BUF_SIZE, fin);
- if (ret < 0)
- break;
+ int buf_len = read_block(buf, ext_buf, BUF_SIZE, fin);
- add_list(buf, ret);
+ if (buf_len < 0)
+ break;
+ add_list(buf, buf_len, ext_buf);
}
printf("loaded %d\n", list_size);
@@ -584,12 +828,14 @@ int main(int argc, char **argv)
}
}
- qsort(list, count, sizeof(list[0]), cmp);
+ qsort(list, count, sizeof(list[0]), compare_sort_condition);
for (i = 0; i < count; i++) {
- if (cull == 0)
- fprintf(fout, "%d times, %d pages:\n%s\n",
- list[i].num, list[i].page_num, list[i].txt);
+ if (cull == 0) {
+ fprintf(fout, "%d times, %d pages, ", list[i].num, list[i].page_num);
+ print_allocator(fout, list[i].allocator);
+ fprintf(fout, ":\n%s\n", list[i].txt);
+ }
else {
fprintf(fout, "%d times, %d pages",
list[i].num, list[i].page_num);
@@ -599,6 +845,10 @@ int main(int argc, char **argv)
fprintf(fout, ", TGID %d", list[i].pid);
if (cull & CULL_COMM || filter & FILTER_COMM)
fprintf(fout, ", task_comm_name: %s", list[i].comm);
+ if (cull & CULL_ALLOCATOR) {
+ fprintf(fout, ", ");
+ print_allocator(fout, list[i].allocator);
+ }
if (cull & CULL_UNRELEASE)
fprintf(fout, " (%s)",
list[i].free_ts_nsec ? "UNRELEASED" : "RELEASED");