aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/vm
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/testing/selftests/vm/.gitignore6
-rw-r--r--tools/testing/selftests/vm/Makefile59
-rw-r--r--tools/testing/selftests/vm/charge_reserved_hugetlb.sh34
-rw-r--r--tools/testing/selftests/vm/check_config.sh31
-rw-r--r--tools/testing/selftests/vm/config2
-rw-r--r--tools/testing/selftests/vm/gup_test.c31
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c526
-rw-r--r--tools/testing/selftests/vm/hugepage-mremap.c66
-rw-r--r--tools/testing/selftests/vm/hugepage-vmemmap.c144
-rw-r--r--tools/testing/selftests/vm/hugetlb-madvise.c411
-rw-r--r--tools/testing/selftests/vm/hugetlb_reparenting_test.sh21
-rw-r--r--tools/testing/selftests/vm/khugepaged.c1195
-rw-r--r--tools/testing/selftests/vm/ksm_tests.c51
-rw-r--r--tools/testing/selftests/vm/madv_populate.c34
-rw-r--r--tools/testing/selftests/vm/map_fixed_noreplace.c49
-rw-r--r--tools/testing/selftests/vm/memfd_secret.c2
-rw-r--r--tools/testing/selftests/vm/migration.c193
-rw-r--r--tools/testing/selftests/vm/mrelease_test.c206
-rw-r--r--tools/testing/selftests/vm/mremap_test.c135
-rw-r--r--tools/testing/selftests/vm/pkey-helpers.h3
-rw-r--r--tools/testing/selftests/vm/pkey-x86.h21
-rw-r--r--tools/testing/selftests/vm/protection_keys.c2
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh513
-rw-r--r--tools/testing/selftests/vm/settings1
-rw-r--r--tools/testing/selftests/vm/soft-dirty.c210
-rw-r--r--tools/testing/selftests/vm/split_huge_page_test.c91
-rwxr-xr-xtools/testing/selftests/vm/test_hmm.sh24
-rw-r--r--tools/testing/selftests/vm/transhuge-stress.c72
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c361
-rw-r--r--tools/testing/selftests/vm/util.h69
-rw-r--r--tools/testing/selftests/vm/va_128TBswitch.c10
-rwxr-xr-xtools/testing/selftests/vm/va_128TBswitch.sh54
-rw-r--r--tools/testing/selftests/vm/vm_util.c126
-rw-r--r--tools/testing/selftests/vm/vm_util.h12
-rw-r--r--tools/testing/selftests/vm/write_hugetlb_memory.sh2
35 files changed, 3508 insertions, 1259 deletions
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 2e7e86e85282..7b9dc2426f18 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -2,12 +2,16 @@
hugepage-mmap
hugepage-mremap
hugepage-shm
+hugepage-vmemmap
+hugetlb-madvise
khugepaged
map_hugetlb
map_populate
thuge-gen
compaction_test
+migration
mlock2-tests
+mrelease_test
mremap_dontunmap
mremap_test
on-fault-limit
@@ -26,6 +30,6 @@ map_fixed_noreplace
write_to_hugetlbfs
hmm-tests
memfd_secret
-local_config.*
+soft-dirty
split_huge_page_test
ksm_tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 1607322a112c..163c2fde3cb3 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm selftests
-include local_config.mk
+LOCAL_HDRS += $(top_srcdir)/mm/gup_test.h
uname_M := $(shell uname -m 2>/dev/null || echo not)
MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
@@ -23,39 +23,44 @@ MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/p
# LDLIBS.
MAKEFLAGS += --no-builtin-rules
-CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
+CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/usr/include $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
LDLIBS = -lrt -lpthread
TEST_GEN_FILES = compaction_test
TEST_GEN_FILES += gup_test
TEST_GEN_FILES += hmm-tests
+TEST_GEN_FILES += hugetlb-madvise
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-mremap
TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += hugepage-vmemmap
TEST_GEN_FILES += khugepaged
-TEST_GEN_FILES += madv_populate
+TEST_GEN_PROGS = madv_populate
TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
TEST_GEN_FILES += memfd_secret
+TEST_GEN_FILES += migration
TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += mrelease_test
TEST_GEN_FILES += mremap_dontunmap
TEST_GEN_FILES += mremap_test
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
-TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_PROGS += soft-dirty
+TEST_GEN_PROGS += split_huge_page_test
TEST_GEN_FILES += ksm_tests
ifeq ($(MACHINE),x86_64)
-CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
-TARGETS := protection_keys
-BINARIES_32 := $(TARGETS:%=%_32)
-BINARIES_64 := $(TARGETS:%=%_64)
+VMTARGETS := protection_keys
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
ifeq ($(CAN_BUILD_WITH_NOPIE),1)
CFLAGS += -no-pie
@@ -85,10 +90,17 @@ endif
TEST_PROGS := run_vmtests.sh
TEST_FILES := test_vmalloc.sh
+TEST_FILES += test_hmm.sh
+TEST_FILES += va_128TBswitch.sh
-KSFT_KHDR_INSTALL := 1
include ../lib.mk
+$(OUTPUT)/khugepaged: vm_util.c
+$(OUTPUT)/madv_populate: vm_util.c
+$(OUTPUT)/soft-dirty: vm_util.c
+$(OUTPUT)/split_huge_page_test: vm_util.c
+$(OUTPUT)/userfaultfd: vm_util.c
+
ifeq ($(MACHINE),x86_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
@@ -108,7 +120,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
$(BINARIES_32): $(OUTPUT)/%_32: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
ifeq ($(CAN_BUILD_X86_64),1)
@@ -116,7 +128,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave
$(BINARIES_64): LDLIBS += -lrt -ldl
$(BINARIES_64): $(OUTPUT)/%_64: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
endif
# x86_64 users should be encouraged to install 32-bit libraries
@@ -140,25 +152,6 @@ endif
$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
-$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
-
-$(OUTPUT)/hmm-tests: local_config.h
-
-# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
-$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
-
$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
-local_config.mk local_config.h: check_config.sh
- /bin/sh ./check_config.sh $(CC)
-
-EXTRA_CLEAN += local_config.mk local_config.h
-
-ifeq ($(HMM_EXTRA_LIBS),)
-all: warn_missing_hugelibs
-
-warn_missing_hugelibs:
- @echo ; \
- echo "Warning: missing libhugetlbfs support. Some HMM tests will be skipped." ; \
- echo
-endif
+$(OUTPUT)/migration: LDLIBS += -lnuma
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
index fe8fcfb334e0..a5cb4b09a46c 100644
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -24,19 +24,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then
reservation_usage_file=rsvd.current
fi
-cgroup_path=/dev/cgroup/memory
-if [[ ! -e $cgroup_path ]]; then
- mkdir -p $cgroup_path
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup2 none $cgroup_path
- else
+ do_umount=1
+ fi
+ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
+else
+ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $cgroup_path
+ do_umount=1
fi
fi
-
-if [[ $cgroup2 ]]; then
- echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
-fi
+export cgroup_path
function cleanup() {
if [[ $cgroup2 ]]; then
@@ -108,7 +112,7 @@ function setup_cgroup() {
function wait_for_hugetlb_memory_to_get_depleted() {
local cgroup="$1"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get depleted.
while [ $(cat $path) != 0 ]; do
echo Waiting for hugetlb memory to get depleted.
@@ -121,7 +125,7 @@ function wait_for_hugetlb_memory_to_get_reserved() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory reservation to reach size $size.
@@ -134,7 +138,7 @@ function wait_for_hugetlb_memory_to_get_written() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory to reach size $size.
@@ -574,5 +578,7 @@ for populate in "" "-o"; do
done # populate
done # method
-umount $cgroup_path
-rmdir $cgroup_path
+if [[ $do_umount ]]; then
+ umount $cgroup_path
+ rmdir $cgroup_path
+fi
diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh
deleted file mode 100644
index 079c8a40b85d..000000000000
--- a/tools/testing/selftests/vm/check_config.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Probe for libraries and create header files to record the results. Both C
-# header files and Makefile include fragments are created.
-
-OUTPUT_H_FILE=local_config.h
-OUTPUT_MKFILE=local_config.mk
-
-# libhugetlbfs
-tmpname=$(mktemp)
-tmpfile_c=${tmpname}.c
-tmpfile_o=${tmpname}.o
-
-echo "#include <sys/types.h>" > $tmpfile_c
-echo "#include <hugetlbfs.h>" >> $tmpfile_c
-echo "int func(void) { return 0; }" >> $tmpfile_c
-
-CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"}
-$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
-
-if [ -f $tmpfile_o ]; then
- echo "#define LOCAL_CONFIG_HAVE_LIBHUGETLBFS 1" > $OUTPUT_H_FILE
- echo "HMM_EXTRA_LIBS = -lhugetlbfs" > $OUTPUT_MKFILE
-else
- echo "// No libhugetlbfs support found" > $OUTPUT_H_FILE
- echo "# No libhugetlbfs support found, so:" > $OUTPUT_MKFILE
- echo "HMM_EXTRA_LIBS = " >> $OUTPUT_MKFILE
-fi
-
-rm ${tmpname}.*
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 60e82da0de85..be087c4bc396 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -4,3 +4,5 @@ CONFIG_TEST_VMALLOC=m
CONFIG_DEVICE_PRIVATE=y
CONFIG_TEST_HMM=m
CONFIG_GUP_TEST=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_MEM_SOFT_DIRTY=y
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index fe043f67798b..e43879291dac 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -1,22 +1,28 @@
#include <fcntl.h>
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <dirent.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <pthread.h>
#include <assert.h>
-#include "../../../../mm/gup_test.h"
+#include <mm/gup_test.h>
+#include "../kselftest.h"
+
+#include "util.h"
#define MB (1UL << 20)
-#define PAGE_SIZE sysconf(_SC_PAGESIZE)
/* Just the flags we need, copied from mm.h: */
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
+#define GUP_TEST_FILE "/sys/kernel/debug/gup_test"
+
static unsigned long cmd = GUP_FAST_BENCHMARK;
static int gup_fd, repeats = 1;
static unsigned long size = 128 * MB;
@@ -203,10 +209,25 @@ int main(int argc, char **argv)
if (write)
gup.gup_flags |= FOLL_WRITE;
- gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ gup_fd = open(GUP_TEST_FILE, O_RDWR);
if (gup_fd == -1) {
- perror("open");
- exit(1);
+ switch (errno) {
+ case EACCES:
+ if (getuid())
+ printf("Please run this test as root\n");
+ break;
+ case ENOENT:
+ if (opendir("/sys/kernel/debug") == NULL) {
+ printf("mount debugfs at /sys/kernel/debug\n");
+ break;
+ }
+ printf("check if CONFIG_GUP_TEST is enabled in kernel config\n");
+ break;
+ default:
+ perror("failed to open " GUP_TEST_FILE);
+ break;
+ }
+ exit(KSFT_SKIP);
}
p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..4adaad1b822f 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -26,16 +26,13 @@
#include <sys/mman.h>
#include <sys/ioctl.h>
-#include "./local_config.h"
-#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
-#include <hugetlbfs.h>
-#endif
/*
* This is a private UAPI to the kernel test module so it isn't exported
* in the usual include/uapi/... directory.
*/
-#include "../../../../lib/test_hmm_uapi.h"
+#include <lib/test_hmm_uapi.h>
+#include <mm/gup_test.h>
struct hmm_buffer {
void *ptr;
@@ -46,12 +43,22 @@ struct hmm_buffer {
uint64_t faults;
};
+enum {
+ HMM_PRIVATE_DEVICE_ONE,
+ HMM_PRIVATE_DEVICE_TWO,
+ HMM_COHERENCE_DEVICE_ONE,
+ HMM_COHERENCE_DEVICE_TWO,
+};
+
#define TWOMEG (1 << 21)
#define HMM_BUFFER_SIZE (1024 << 12)
#define HMM_PATH_MAX 64
#define NTIMES 10
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE 0x01 /* check pte is writable */
+#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */
FIXTURE(hmm)
{
@@ -60,6 +67,21 @@ FIXTURE(hmm)
unsigned int page_shift;
};
+FIXTURE_VARIANT(hmm)
+{
+ int device_number;
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_private)
+{
+ .device_number = HMM_PRIVATE_DEVICE_ONE,
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent)
+{
+ .device_number = HMM_COHERENCE_DEVICE_ONE,
+};
+
FIXTURE(hmm2)
{
int fd0;
@@ -68,6 +90,24 @@ FIXTURE(hmm2)
unsigned int page_shift;
};
+FIXTURE_VARIANT(hmm2)
+{
+ int device_number0;
+ int device_number1;
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private)
+{
+ .device_number0 = HMM_PRIVATE_DEVICE_ONE,
+ .device_number1 = HMM_PRIVATE_DEVICE_TWO,
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent)
+{
+ .device_number0 = HMM_COHERENCE_DEVICE_ONE,
+ .device_number1 = HMM_COHERENCE_DEVICE_TWO,
+};
+
static int hmm_open(int unit)
{
char pathname[HMM_PATH_MAX];
@@ -81,12 +121,19 @@ static int hmm_open(int unit)
return fd;
}
+static bool hmm_is_coherent_type(int dev_num)
+{
+ return (dev_num >= HMM_COHERENCE_DEVICE_ONE);
+}
+
FIXTURE_SETUP(hmm)
{
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
- self->fd = hmm_open(0);
+ self->fd = hmm_open(variant->device_number);
+ if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
+ SKIP(exit(0), "DEVICE_COHERENT not available");
ASSERT_GE(self->fd, 0);
}
@@ -95,9 +142,11 @@ FIXTURE_SETUP(hmm2)
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
- self->fd0 = hmm_open(0);
+ self->fd0 = hmm_open(variant->device_number0);
+ if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
+ SKIP(exit(0), "DEVICE_COHERENT not available");
ASSERT_GE(self->fd0, 0);
- self->fd1 = hmm_open(1);
+ self->fd1 = hmm_open(variant->device_number1);
ASSERT_GE(self->fd1, 0);
}
@@ -211,6 +260,20 @@ static void hmm_nanosleep(unsigned int n)
nanosleep(&t, NULL);
}
+static int hmm_migrate_sys_to_dev(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
+}
+
+static int hmm_migrate_dev_to_sys(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
+}
+
/*
* Simple NULL test of device open/close.
*/
@@ -666,7 +729,54 @@ TEST_F(hmm, anon_write_huge)
hmm_buffer_free(buffer);
}
-#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
+/*
+ * Read numeric data from raw and tagged kernel status files. Used to read
+ * /proc and /sys data (without a tag) and from /proc/meminfo (with a tag).
+ */
+static long file_read_ulong(char *file, const char *tag)
+{
+ int fd;
+ char buf[2048];
+ int len;
+ char *p, *q;
+ long val;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ /* Error opening the file */
+ return -1;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0) {
+ /* Error in reading the file */
+ return -1;
+ }
+ if (len == sizeof(buf)) {
+ /* Error file is too large */
+ return -1;
+ }
+ buf[len] = '\0';
+
+ /* Search for a tag if provided */
+ if (tag) {
+ p = strstr(buf, tag);
+ if (!p)
+ return -1; /* looks like the line we want isn't there */
+ p += strlen(tag);
+ } else
+ p = buf;
+
+ val = strtol(p, &q, 0);
+ if (*q != ' ') {
+ /* Error parsing the file */
+ return -1;
+ }
+
+ return val;
+}
+
/*
* Write huge TLBFS page.
*/
@@ -675,29 +785,27 @@ TEST_F(hmm, anon_write_hugetlbfs)
struct hmm_buffer *buffer;
unsigned long npages;
unsigned long size;
+ unsigned long default_hsize;
unsigned long i;
int *ptr;
int ret;
- long pagesizes[4];
- int n, idx;
-
- /* Skip test if we can't allocate a hugetlbfs page. */
- n = gethugepagesizes(pagesizes, 4);
- if (n <= 0)
+ default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:");
+ if (default_hsize < 0 || default_hsize*1024 < default_hsize)
SKIP(return, "Huge page size could not be determined");
- for (idx = 0; --n > 0; ) {
- if (pagesizes[n] < pagesizes[idx])
- idx = n;
- }
- size = ALIGN(TWOMEG, pagesizes[idx]);
+ default_hsize = default_hsize*1024; /* KB to B */
+
+ size = ALIGN(TWOMEG, default_hsize);
npages = size >> self->page_shift;
buffer = malloc(sizeof(*buffer));
ASSERT_NE(buffer, NULL);
- buffer->ptr = get_hugepage_region(size, GHR_STRICT);
- if (buffer->ptr == NULL) {
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (buffer->ptr == MAP_FAILED) {
free(buffer);
SKIP(return, "Huge page could not be allocated");
}
@@ -721,11 +829,10 @@ TEST_F(hmm, anon_write_hugetlbfs)
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- free_hugepage_region(buffer->ptr);
+ munmap(buffer->ptr, buffer->size);
buffer->ptr = NULL;
hmm_buffer_free(buffer);
}
-#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */
/*
* Read mmap'ed file memory.
@@ -875,7 +982,7 @@ TEST_F(hmm, migrate)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -923,7 +1030,7 @@ TEST_F(hmm, migrate_fault)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -936,7 +1043,48 @@ TEST_F(hmm, migrate_fault)
ASSERT_EQ(ptr[i], i);
/* Migrate memory to the device again. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_F(hmm, migrate_release)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -944,6 +1092,14 @@ TEST_F(hmm, migrate_fault)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
+ /* Release device memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
+ ASSERT_EQ(ptr[i], i);
+
hmm_buffer_free(buffer);
}
@@ -976,7 +1132,7 @@ TEST_F(hmm, migrate_shared)
ASSERT_NE(buffer->ptr, MAP_FAILED);
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, -ENOENT);
hmm_buffer_free(buffer);
@@ -1015,7 +1171,7 @@ TEST_F(hmm2, migrate_mixed)
p = buffer->ptr;
/* Migrating a protected area should be an error. */
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
ASSERT_EQ(ret, -EINVAL);
/* Punch a hole after the first page address. */
@@ -1023,7 +1179,7 @@ TEST_F(hmm2, migrate_mixed)
ASSERT_EQ(ret, 0);
/* We expect an error if the vma doesn't cover the range. */
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3);
ASSERT_EQ(ret, -EINVAL);
/* Page 2 will be a read-only zero page. */
@@ -1055,13 +1211,13 @@ TEST_F(hmm2, migrate_mixed)
/* Now try to migrate pages 2-5 to device 1. */
buffer->ptr = p + 2 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 4);
/* Page 5 won't be migrated to device 0 because it's on device 1. */
buffer->ptr = p + 5 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
ASSERT_EQ(ret, -ENOENT);
buffer->ptr = p;
@@ -1070,8 +1226,12 @@ TEST_F(hmm2, migrate_mixed)
}
/*
- * Migrate anonymous memory to device private memory and fault it back to system
- * memory multiple times.
+ * Migrate anonymous memory to device memory and back to system memory
+ * multiple times. In case of private zone configuration, this is done
+ * through fault pages accessed by CPU. In case of coherent zone configuration,
+ * the pages from the device should be explicitly migrated back to system memory.
+ * The reason is Coherent device zone has coherent access by CPU, therefore
+ * it will not generate any page fault.
*/
TEST_F(hmm, migrate_multiple)
{
@@ -1107,8 +1267,7 @@ TEST_F(hmm, migrate_multiple)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer,
- npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -1116,7 +1275,13 @@ TEST_F(hmm, migrate_multiple)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Fault pages back to system memory and check them. */
+ /* Migrate back to system memory and check them. */
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ }
+
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
@@ -1251,6 +1416,48 @@ TEST_F(hmm, anon_teardown)
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
TEST_F(hmm2, snapshot)
{
struct hmm_buffer *buffer;
@@ -1312,13 +1519,13 @@ TEST_F(hmm2, snapshot)
/* Page 5 will be migrated to device 0. */
buffer->ptr = p + 5 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 1);
/* Page 6 will be migrated to device 1. */
buffer->ptr = p + 6 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 1);
@@ -1335,14 +1542,20 @@ TEST_F(hmm2, snapshot)
ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
- ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
- HMM_DMIRROR_PROT_WRITE);
- ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ if (!hmm_is_coherent_type(variant->device_number0)) {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ } else {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE |
+ HMM_DMIRROR_PROT_WRITE);
+ }
hmm_buffer_free(buffer);
}
-#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
/*
* Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
* should be mapped by a large page table entry.
@@ -1352,30 +1565,30 @@ TEST_F(hmm, compound)
struct hmm_buffer *buffer;
unsigned long npages;
unsigned long size;
+ unsigned long default_hsize;
int *ptr;
unsigned char *m;
int ret;
- long pagesizes[4];
- int n, idx;
unsigned long i;
/* Skip test if we can't allocate a hugetlbfs page. */
- n = gethugepagesizes(pagesizes, 4);
- if (n <= 0)
- return;
- for (idx = 0; --n > 0; ) {
- if (pagesizes[n] < pagesizes[idx])
- idx = n;
- }
- size = ALIGN(TWOMEG, pagesizes[idx]);
+ default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:");
+ if (default_hsize < 0 || default_hsize*1024 < default_hsize)
+ SKIP(return, "Huge page size could not be determined");
+ default_hsize = default_hsize*1024; /* KB to B */
+
+ size = ALIGN(TWOMEG, default_hsize);
npages = size >> self->page_shift;
buffer = malloc(sizeof(*buffer));
ASSERT_NE(buffer, NULL);
- buffer->ptr = get_hugepage_region(size, GHR_STRICT);
- if (buffer->ptr == NULL) {
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (buffer->ptr == MAP_FAILED) {
free(buffer);
return;
}
@@ -1414,11 +1627,10 @@ TEST_F(hmm, compound)
ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ |
HMM_DMIRROR_PROT_PMD);
- free_hugepage_region(buffer->ptr);
+ munmap(buffer->ptr, buffer->size);
buffer->ptr = NULL;
hmm_buffer_free(buffer);
}
-#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */
/*
* Test two devices reading the same memory (double mapped).
@@ -1478,9 +1690,19 @@ TEST_F(hmm2, double_map)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Punch a hole after the first page address. */
- ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ /* Migrate pages to device 1 and try to read from device 0. */
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what device 0 read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
hmm_buffer_free(buffer);
}
@@ -1643,4 +1865,190 @@ TEST_F(hmm, exclusive_cow)
hmm_buffer_free(buffer);
}
+static int gup_test_exec(int gup_fd, unsigned long addr, int cmd,
+ int npages, int size, int flags)
+{
+ struct gup_test gup = {
+ .nr_pages_per_call = npages,
+ .addr = addr,
+ .gup_flags = FOLL_WRITE | flags,
+ .size = size,
+ };
+
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl on error\n");
+ return errno;
+ }
+
+ return 0;
+}
+
+/*
+ * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
+ * This should trigger a migration back to system memory for both, private
+ * and coherent type pages.
+ * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
+ * to your configuration before you run it.
+ */
+TEST_F(hmm, hmm_gup_test)
+{
+ struct hmm_buffer *buffer;
+ int gup_fd;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1)
+ SKIP(return, "Skipping test, could not find gup_test driver");
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr,
+ GUP_BASIC_TEST, 1, self->page_size, 0), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 1 * self->page_size,
+ GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 2 * self->page_size,
+ PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 3 * self->page_size,
+ PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0);
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]);
+ } else {
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]);
+ }
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]);
+ /*
+ * Check again the content on the pages. Make sure there's no
+ * corrupted data.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ close(gup_fd);
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test copy-on-write in device pages.
+ * In case of writing to COW private page(s), a page fault will migrate pages
+ * back to system memory first. Then, these pages will be duplicated. In case
+ * of COW device coherent type, pages are duplicated directly from device
+ * memory.
+ */
+TEST_F(hmm, hmm_cow_in_device)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+ pid_t pid;
+ int status;
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (!pid) {
+ /* Child process waitd for SIGTERM from the parent. */
+ while (1) {
+ }
+ perror("Should not reach this\n");
+ exit(0);
+ }
+ /* Parent process writes to COW pages(s) and gets a
+ * new copy in system. In case of device private pages,
+ * this write causes a migration to system mem first.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Terminate child and wait */
+ EXPECT_EQ(0, kill(pid, SIGTERM));
+ EXPECT_EQ(pid, waitpid(pid, &status, 0));
+ EXPECT_NE(0, WIFSIGNALED(status));
+ EXPECT_EQ(SIGTERM, WTERMSIG(status));
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ for (i = 0; i < npages; i++)
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]);
+
+ hmm_buffer_free(buffer);
+}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index 257df94697a5..e63a0214f639 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -3,8 +3,13 @@
* hugepage-mremap:
*
* Example of remapping huge page memory in a user application using the
- * mremap system call. Code assumes a hugetlbfs filesystem is mounted
- * at './huge'. The code will use 10MB worth of huge pages.
+ * mremap system call. The path to a file in a hugetlbfs filesystem must
+ * be passed as the last argument to this test. The amount of memory used
+ * by this test in MBs can optionally be passed as an argument. If no memory
+ * amount is passed, the default amount is 10MB.
+ *
+ * To make sure the test triggers pmd sharing and goes through the 'unshare'
+ * path in the mremap code use 1GB (1024) or more.
*/
#define _GNU_SOURCE
@@ -18,7 +23,8 @@
#include <linux/userfaultfd.h>
#include <sys/ioctl.h>
-#define LENGTH (1UL * 1024 * 1024 * 1024)
+#define DEFAULT_LENGTH_MB 10UL
+#define MB_TO_BYTES(x) (x * 1024 * 1024)
#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
@@ -28,20 +34,20 @@ static void check_bytes(char *addr)
printf("First hex is %x\n", *((unsigned int *)addr));
}
-static void write_bytes(char *addr)
+static void write_bytes(char *addr, size_t len)
{
unsigned long i;
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < len; i++)
*(addr + i) = (char)i;
}
-static int read_bytes(char *addr)
+static int read_bytes(char *addr, size_t len)
{
unsigned long i;
check_bytes(addr);
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < len; i++)
if (*(addr + i) != (char)i) {
printf("Mismatch at %lu\n", i);
return 1;
@@ -99,11 +105,28 @@ static void register_region_with_uffd(char *addr, size_t len)
}
}
-int main(void)
+int main(int argc, char *argv[])
{
+ size_t length = 0;
+
+ if (argc != 2 && argc != 3) {
+ printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]);
+ exit(1);
+ }
+
+ /* Read memory length as the first arg if valid, otherwise fallback to
+ * the default length.
+ */
+ if (argc == 3)
+ length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL;
+
+ length = length > 0 ? length : DEFAULT_LENGTH_MB;
+ length = MB_TO_BYTES(length);
+
int ret = 0;
- int fd = open("/huge/test", O_CREAT | O_RDWR, 0755);
+ /* last arg is the hugetlb file name */
+ int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755);
if (fd < 0) {
perror("Open failed");
@@ -112,7 +135,7 @@ int main(void)
/* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
unsigned long suggested_addr = 0x7eaa40000000;
- void *haddr = mmap((void *)suggested_addr, LENGTH, PROTECTION,
+ void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
printf("Map haddr: Returned address is %p\n", haddr);
if (haddr == MAP_FAILED) {
@@ -122,7 +145,7 @@ int main(void)
/* mmap again to a dummy address to hopefully trigger pmd sharing. */
suggested_addr = 0x7daa40000000;
- void *daddr = mmap((void *)suggested_addr, LENGTH, PROTECTION,
+ void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
printf("Map daddr: Returned address is %p\n", daddr);
if (daddr == MAP_FAILED) {
@@ -132,16 +155,16 @@ int main(void)
suggested_addr = 0x7faa40000000;
void *vaddr =
- mmap((void *)suggested_addr, LENGTH, PROTECTION, FLAGS, -1, 0);
+ mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
printf("Map vaddr: Returned address is %p\n", vaddr);
if (vaddr == MAP_FAILED) {
perror("mmap2");
exit(1);
}
- register_region_with_uffd(haddr, LENGTH);
+ register_region_with_uffd(haddr, length);
- void *addr = mremap(haddr, LENGTH, LENGTH,
+ void *addr = mremap(haddr, length, length,
MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
if (addr == MAP_FAILED) {
perror("mremap");
@@ -150,10 +173,19 @@ int main(void)
printf("Mremap: Returned address is %p\n", addr);
check_bytes(addr);
- write_bytes(addr);
- ret = read_bytes(addr);
+ write_bytes(addr, length);
+ ret = read_bytes(addr, length);
+
+ munmap(addr, length);
+
+ addr = mremap(addr, length, length, 0);
+ if (addr != MAP_FAILED) {
+ printf("mremap: Expected failure, but call succeeded\n");
+ exit(1);
+ }
- munmap(addr, LENGTH);
+ close(fd);
+ unlink(argv[argc-1]);
return ret;
}
diff --git a/tools/testing/selftests/vm/hugepage-vmemmap.c b/tools/testing/selftests/vm/hugepage-vmemmap.c
new file mode 100644
index 000000000000..557bdbd4f87e
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-vmemmap.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case of using hugepage memory in a user application using the
+ * mmap system call with MAP_HUGETLB flag. Before running this program
+ * make sure the administrator has allocated enough default sized huge
+ * pages to cover the 2 MB allocation.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define MAP_LENGTH (2UL * 1024 * 1024)
+
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000 /* arch specific */
+#endif
+
+#define PAGE_SIZE 4096
+
+#define PAGE_COMPOUND_HEAD (1UL << 15)
+#define PAGE_COMPOUND_TAIL (1UL << 16)
+#define PAGE_HUGE (1UL << 17)
+
+#define HEAD_PAGE_FLAGS (PAGE_COMPOUND_HEAD | PAGE_HUGE)
+#define TAIL_PAGE_FLAGS (PAGE_COMPOUND_TAIL | PAGE_HUGE)
+
+#define PM_PFRAME_BITS 55
+#define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1)
+
+/*
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified. Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#ifdef __ia64__
+#define MAP_ADDR (void *)(0x8000000000000000UL)
+#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define MAP_ADDR NULL
+#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+static void write_bytes(char *addr, size_t length)
+{
+ unsigned long i;
+
+ for (i = 0; i < length; i++)
+ *(addr + i) = (char)i;
+}
+
+static unsigned long virt_to_pfn(void *addr)
+{
+ int fd;
+ unsigned long pagemap;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ return -1UL;
+
+ lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET);
+ read(fd, &pagemap, sizeof(pagemap));
+ close(fd);
+
+ return pagemap & ~PM_PFRAME_MASK;
+}
+
+static int check_page_flags(unsigned long pfn)
+{
+ int fd, i;
+ unsigned long pageflags;
+
+ fd = open("/proc/kpageflags", O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ lseek(fd, pfn * sizeof(pageflags), SEEK_SET);
+
+ read(fd, &pageflags, sizeof(pageflags));
+ if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) {
+ close(fd);
+ printf("Head page flags (%lx) is invalid\n", pageflags);
+ return -1;
+ }
+
+ /*
+ * pages other than the first page must be tail and shouldn't be head;
+ * this also verifies kernel has correctly set the fake page_head to tail
+ * while hugetlb_free_vmemmap is enabled.
+ */
+ for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) {
+ read(fd, &pageflags, sizeof(pageflags));
+ if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
+ (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
+ close(fd);
+ printf("Tail page flags (%lx) is invalid\n", pageflags);
+ return -1;
+ }
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ void *addr;
+ unsigned long pfn;
+
+ addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* Trigger allocation of HugeTLB page. */
+ write_bytes(addr, MAP_LENGTH);
+
+ pfn = virt_to_pfn(addr);
+ if (pfn == -1UL) {
+ munmap(addr, MAP_LENGTH);
+ perror("virt_to_pfn");
+ exit(1);
+ }
+
+ printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
+
+ if (check_page_flags(pfn) < 0) {
+ munmap(addr, MAP_LENGTH);
+ perror("check_page_flags");
+ exit(1);
+ }
+
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, MAP_LENGTH)) {
+ perror("munmap");
+ exit(1);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c
new file mode 100644
index 000000000000..3c9943131881
--- /dev/null
+++ b/tools/testing/selftests/vm/hugetlb-madvise.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-madvise:
+ *
+ * Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
+ * on hugetlb mappings.
+ *
+ * Before running this test, make sure the administrator has pre-allocated
+ * at least MIN_FREE_PAGES hugetlb pages and they are free. In addition,
+ * the test takes an argument that is the path to a file in a hugetlbfs
+ * filesystem. Therefore, a hugetlbfs filesystem must be mounted on some
+ * directory.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#define __USE_GNU
+#include <fcntl.h>
+
+#define USAGE "USAGE: %s <hugepagefile_name>\n"
+#define MIN_FREE_PAGES 20
+#define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */
+
+#define validate_free_pages(exp_free) \
+ do { \
+ int fhp = get_free_hugepages(); \
+ if (fhp != (exp_free)) { \
+ printf("Unexpected number of free huge " \
+ "pages line %d\n", __LINE__); \
+ exit(1); \
+ } \
+ } while (0)
+
+unsigned long huge_page_size;
+unsigned long base_page_size;
+
+/*
+ * default_huge_page_size copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+unsigned long get_free_hugepages(void)
+{
+ unsigned long fhp = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return fhp;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
+ break;
+ }
+
+ free(line);
+ fclose(f);
+ return fhp;
+}
+
+void write_fault_pages(void *addr, unsigned long nr_pages)
+{
+ unsigned long i;
+
+ for (i = 0; i < nr_pages; i++)
+ *((unsigned long *)(addr + (i * huge_page_size))) = i;
+}
+
+void read_fault_pages(void *addr, unsigned long nr_pages)
+{
+ unsigned long dummy = 0;
+ unsigned long i;
+
+ for (i = 0; i < nr_pages; i++)
+ dummy += *((unsigned long *)(addr + (i * huge_page_size)));
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long free_hugepages;
+ void *addr, *addr2;
+ int fd;
+ int ret;
+
+ if (argc != 2) {
+ printf(USAGE, argv[0]);
+ exit(1);
+ }
+
+ huge_page_size = default_huge_page_size();
+ if (!huge_page_size) {
+ printf("Unable to determine huge page size, exiting!\n");
+ exit(1);
+ }
+ base_page_size = sysconf(_SC_PAGE_SIZE);
+ if (!huge_page_size) {
+ printf("Unable to determine base page size, exiting!\n");
+ exit(1);
+ }
+
+ free_hugepages = get_free_hugepages();
+ if (free_hugepages < MIN_FREE_PAGES) {
+ printf("Not enough free huge pages to test, exiting!\n");
+ exit(1);
+ }
+
+ fd = open(argv[1], O_CREAT | O_RDWR, 0755);
+ if (fd < 0) {
+ perror("Open failed");
+ exit(1);
+ }
+
+ /*
+ * Test validity of MADV_DONTNEED addr and length arguments. mmap
+ * size is NR_HUGE_PAGES + 2. One page at the beginning and end of
+ * the mapping will be unmapped so we KNOW there is nothing mapped
+ * there.
+ */
+ addr = mmap(NULL, (NR_HUGE_PAGES + 2) * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ if (munmap(addr, huge_page_size) ||
+ munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
+ huge_page_size)) {
+ perror("munmap");
+ exit(1);
+ }
+ addr = addr + huge_page_size;
+
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* addr before mapping should fail */
+ ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with invalid addr line %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ /* addr + length after mapping should fail */
+ ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with invalid length line %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test alignment of MADV_DONTNEED addr and length arguments
+ */
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* addr is not huge page size aligned and should fail */
+ ret = madvise(addr + base_page_size,
+ NR_HUGE_PAGES * huge_page_size - base_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with unaligned start address %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ /* addr + length should be aligned up to huge page size */
+ if (madvise(addr,
+ ((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
+ MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+
+ /* should free all pages in mapping */
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_DONTNEED on anonymous private mapping
+ */
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+
+ /* should free all pages in mapping */
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_DONTNEED on private mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* read should not consume any pages */
+ read_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* madvise should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* writes should allocate private pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise should free private pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* writes should allocate private pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /*
+ * The fallocate below certainly should free the pages associated
+ * with the file. However, pages in the private mapping are also
+ * freed. This is not the 'correct' behavior, but is expected
+ * because this is how it has worked since the initial hugetlb
+ * implementation.
+ */
+ if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_DONTNEED on shared mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* write should not consume any pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* madvise should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /*
+ * Test MADV_REMOVE on shared mapping of hugetlb file
+ *
+ * madvise is same as hole punch and should free all pages.
+ */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_REMOVE on shared and private mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* shared write should not consume any additional pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (addr2 == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* private read should not consume any pages */
+ read_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* private write should consume additional pages */
+ write_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise of shared mapping should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise of private mapping should free private pages */
+ if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* private write should consume additional pages again */
+ write_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /*
+ * madvise should free both file and private pages although this is
+ * not correct. private pages should not be freed, but this is
+ * expected. See comment associated with FALLOC_FL_PUNCH_HOLE call.
+ */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+ (void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
+
+ close(fd);
+ unlink(argv[1]);
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
index 4a9a3afe9fd4..bf2d2a684edf 100644
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -18,19 +18,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then
usage_file=current
fi
-CGROUP_ROOT='/dev/cgroup/memory'
-MNT='/mnt/huge/'
-if [[ ! -e $CGROUP_ROOT ]]; then
- mkdir -p $CGROUP_ROOT
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup2 none $CGROUP_ROOT
- sleep 1
- echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
- else
+ do_umount=1
+ fi
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+else
+ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $CGROUP_ROOT
+ do_umount=1
fi
fi
+MNT='/mnt/huge/'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
index 155120b67a16..64126c8cd561 100644
--- a/tools/testing/selftests/vm/khugepaged.c
+++ b/tools/testing/selftests/vm/khugepaged.c
@@ -1,6 +1,9 @@
#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
#include <fcntl.h>
#include <limits.h>
+#include <dirent.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@@ -10,10 +13,24 @@
#include <sys/mman.h>
#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+
+#include "linux/magic.h"
+
+#include "vm_util.h"
#ifndef MADV_PAGEOUT
#define MADV_PAGEOUT 21
#endif
+#ifndef MADV_POPULATE_READ
+#define MADV_POPULATE_READ 22
+#endif
+#ifndef MADV_COLLAPSE
+#define MADV_COLLAPSE 25
+#endif
#define BASE_ADDR ((void *)(1UL << 30))
static unsigned long hpage_pmd_size;
@@ -22,6 +39,47 @@ static int hpage_pmd_nr;
#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
#define PID_SMAPS "/proc/self/smaps"
+#define TEST_FILE "collapse_test_file"
+
+#define MAX_LINE_LENGTH 500
+
+enum vma_type {
+ VMA_ANON,
+ VMA_FILE,
+ VMA_SHMEM,
+};
+
+struct mem_ops {
+ void *(*setup_area)(int nr_hpages);
+ void (*cleanup_area)(void *p, unsigned long size);
+ void (*fault)(void *p, unsigned long start, unsigned long end);
+ bool (*check_huge)(void *addr, int nr_hpages);
+ const char *name;
+};
+
+static struct mem_ops *file_ops;
+static struct mem_ops *anon_ops;
+static struct mem_ops *shmem_ops;
+
+struct collapse_context {
+ void (*collapse)(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect);
+ bool enforce_pte_scan_limits;
+ const char *name;
+};
+
+static struct collapse_context *khugepaged_context;
+static struct collapse_context *madvise_context;
+
+struct file_info {
+ const char *dir;
+ char path[PATH_MAX];
+ enum vma_type type;
+ int fd;
+ char dev_queue_read_ahead_path[PATH_MAX];
+};
+
+static struct file_info finfo;
enum thp_enabled {
THP_ALWAYS,
@@ -88,18 +146,7 @@ struct settings {
enum shmem_enabled shmem_enabled;
bool use_zero_page;
struct khugepaged_settings khugepaged;
-};
-
-static struct settings default_settings = {
- .thp_enabled = THP_MADVISE,
- .thp_defrag = THP_DEFRAG_ALWAYS,
- .shmem_enabled = SHMEM_NEVER,
- .use_zero_page = 0,
- .khugepaged = {
- .defrag = 1,
- .alloc_sleep_millisecs = 10,
- .scan_sleep_millisecs = 10,
- },
+ unsigned long read_ahead_kb;
};
static struct settings saved_settings;
@@ -118,6 +165,11 @@ static void fail(const char *msg)
exit_status++;
}
+static void skip(const char *msg)
+{
+ printf(" \e[33m%s\e[0m\n", msg);
+}
+
static int read_file(const char *path, char *buf, size_t buflen)
{
int fd;
@@ -145,13 +197,19 @@ static int write_file(const char *path, const char *buf, size_t buflen)
ssize_t numwritten;
fd = open(path, O_WRONLY);
- if (fd == -1)
+ if (fd == -1) {
+ printf("open(%s)\n", path);
+ exit(EXIT_FAILURE);
return 0;
+ }
numwritten = write(fd, buf, buflen - 1);
close(fd);
- if (numwritten < 1)
+ if (numwritten < 1) {
+ printf("write(%s)\n", buf);
+ exit(EXIT_FAILURE);
return 0;
+ }
return (unsigned int) numwritten;
}
@@ -218,20 +276,11 @@ static void write_string(const char *name, const char *val)
}
}
-static const unsigned long read_num(const char *name)
+static const unsigned long _read_num(const char *path)
{
- char path[PATH_MAX];
char buf[21];
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
- ret = read_file(path, buf, sizeof(buf));
- if (ret < 0) {
+ if (read_file(path, buf, sizeof(buf)) < 0) {
perror("read_file(read_num)");
exit(EXIT_FAILURE);
}
@@ -239,10 +288,9 @@ static const unsigned long read_num(const char *name)
return strtoul(buf, NULL, 10);
}
-static void write_num(const char *name, unsigned long num)
+static const unsigned long read_num(const char *name)
{
char path[PATH_MAX];
- char buf[21];
int ret;
ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
@@ -250,6 +298,12 @@ static void write_num(const char *name, unsigned long num)
printf("%s: Pathname is too long\n", __func__);
exit(EXIT_FAILURE);
}
+ return _read_num(path);
+}
+
+static void _write_num(const char *path, unsigned long num)
+{
+ char buf[21];
sprintf(buf, "%ld", num);
if (!write_file(path, buf, strlen(buf) + 1)) {
@@ -258,6 +312,19 @@ static void write_num(const char *name, unsigned long num)
}
}
+static void write_num(const char *name, unsigned long num)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ _write_num(path, num);
+}
+
static void write_settings(struct settings *settings)
{
struct khugepaged_settings *khugepaged = &settings->khugepaged;
@@ -277,6 +344,43 @@ static void write_settings(struct settings *settings)
write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
+
+ if (file_ops && finfo.type == VMA_FILE)
+ _write_num(finfo.dev_queue_read_ahead_path,
+ settings->read_ahead_kb);
+}
+
+#define MAX_SETTINGS_DEPTH 4
+static struct settings settings_stack[MAX_SETTINGS_DEPTH];
+static int settings_index;
+
+static struct settings *current_settings(void)
+{
+ if (!settings_index) {
+ printf("Fail: No settings set");
+ exit(EXIT_FAILURE);
+ }
+ return settings_stack + settings_index - 1;
+}
+
+static void push_settings(struct settings *settings)
+{
+ if (settings_index >= MAX_SETTINGS_DEPTH) {
+ printf("Fail: Settings stack exceeded");
+ exit(EXIT_FAILURE);
+ }
+ settings_stack[settings_index++] = *settings;
+ write_settings(current_settings());
+}
+
+static void pop_settings(void)
+{
+ if (settings_index <= 0) {
+ printf("Fail: Settings stack empty");
+ exit(EXIT_FAILURE);
+ }
+ --settings_index;
+ write_settings(current_settings());
}
static void restore_settings(int sig)
@@ -314,6 +418,10 @@ static void save_settings(void)
.max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
.pages_to_scan = read_num("khugepaged/pages_to_scan"),
};
+ if (file_ops && finfo.type == VMA_FILE)
+ saved_settings.read_ahead_kb =
+ _read_num(finfo.dev_queue_read_ahead_path);
+
success("OK");
signal(SIGTERM, restore_settings);
@@ -322,72 +430,90 @@ static void save_settings(void)
signal(SIGQUIT, restore_settings);
}
-static void adjust_settings(void)
+static void get_finfo(const char *dir)
{
+ struct stat path_stat;
+ struct statfs fs;
+ char buf[1 << 10];
+ char path[PATH_MAX];
+ char *str, *end;
- printf("Adjust settings...");
- write_settings(&default_settings);
- success("OK");
-}
-
-#define MAX_LINE_LENGTH 500
-
-static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
-{
- while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
- if (!strncmp(buf, pattern, strlen(pattern)))
- return true;
+ finfo.dir = dir;
+ stat(finfo.dir, &path_stat);
+ if (!S_ISDIR(path_stat.st_mode)) {
+ printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
+ exit(EXIT_FAILURE);
}
- return false;
-}
-
-static bool check_huge(void *addr)
-{
- bool thp = false;
- int ret;
- FILE *fp;
- char buffer[MAX_LINE_LENGTH];
- char addr_pattern[MAX_LINE_LENGTH];
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
- (unsigned long) addr);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
+ if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
+ finfo.dir) >= sizeof(finfo.path)) {
+ printf("%s: Pathname is too long\n", __func__);
exit(EXIT_FAILURE);
}
-
-
- fp = fopen(PID_SMAPS, "r");
- if (!fp) {
- printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+ if (statfs(finfo.dir, &fs)) {
+ perror("statfs()");
exit(EXIT_FAILURE);
}
- if (!check_for_pattern(fp, addr_pattern, buffer))
- goto err_out;
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
- hpage_pmd_size >> 10);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
+ finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
+ if (finfo.type == VMA_SHMEM)
+ return;
+
+ /* Find owning device's queue/read_ahead_kb control */
+ if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
+ major(path_stat.st_dev), minor(path_stat.st_dev))
+ >= sizeof(path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ if (read_file(path, buf, sizeof(buf)) < 0) {
+ perror("read_file(read_num)");
+ exit(EXIT_FAILURE);
+ }
+ if (strstr(buf, "DEVTYPE=disk")) {
+ /* Found it */
+ if (snprintf(finfo.dev_queue_read_ahead_path,
+ sizeof(finfo.dev_queue_read_ahead_path),
+ "/sys/dev/block/%d:%d/queue/read_ahead_kb",
+ major(path_stat.st_dev), minor(path_stat.st_dev))
+ >= sizeof(finfo.dev_queue_read_ahead_path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+ if (!strstr(buf, "DEVTYPE=partition")) {
+ printf("%s: Unknown device type: %s\n", __func__, path);
exit(EXIT_FAILURE);
}
/*
- * Fetch the AnonHugePages: in the same block and check whether it got
- * the expected number of hugeepages next.
+ * Partition of block device - need to find actual device.
+ * Using naming convention that devnameN is partition of
+ * device devname.
*/
- if (!check_for_pattern(fp, "AnonHugePages:", buffer))
- goto err_out;
-
- if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
- goto err_out;
-
- thp = true;
-err_out:
- fclose(fp);
- return thp;
+ str = strstr(buf, "DEVNAME=");
+ if (!str) {
+ printf("%s: Could not read: %s", __func__, path);
+ exit(EXIT_FAILURE);
+ }
+ str += 8;
+ end = str;
+ while (*end) {
+ if (isdigit(*end)) {
+ *end = '\0';
+ if (snprintf(finfo.dev_queue_read_ahead_path,
+ sizeof(finfo.dev_queue_read_ahead_path),
+ "/sys/block/%s/queue/read_ahead_kb",
+ str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+ ++end;
+ }
+ printf("%s: Could not read: %s\n", __func__, path);
+ exit(EXIT_FAILURE);
}
-
static bool check_swap(void *addr, unsigned long size)
{
bool swap = false;
@@ -409,7 +535,7 @@ static bool check_swap(void *addr, unsigned long size)
printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
exit(EXIT_FAILURE);
}
- if (!check_for_pattern(fp, addr_pattern, buffer))
+ if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
goto err_out;
ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
@@ -422,7 +548,7 @@ static bool check_swap(void *addr, unsigned long size)
* Fetch the Swap: in the same block and check whether it got
* the expected number of hugeepages next.
*/
- if (!check_for_pattern(fp, "Swap:", buffer))
+ if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer)))
goto err_out;
if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
@@ -434,12 +560,12 @@ err_out:
return swap;
}
-static void *alloc_mapping(void)
+static void *alloc_mapping(int nr)
{
void *p;
- p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (p != BASE_ADDR) {
printf("Failed to allocate VMA at %p\n", BASE_ADDR);
exit(EXIT_FAILURE);
@@ -456,6 +582,60 @@ static void fill_memory(int *p, unsigned long start, unsigned long end)
p[i * page_size / sizeof(*p)] = i + 0xdead0000;
}
+/*
+ * MADV_COLLAPSE is a best-effort request and may fail if an internal
+ * resource is temporarily unavailable, in which case it will set errno to
+ * EAGAIN. In such a case, immediately reattempt the operation one more
+ * time.
+ */
+static int madvise_collapse_retry(void *p, unsigned long size)
+{
+ bool retry = true;
+ int ret;
+
+retry:
+ ret = madvise(p, size, MADV_COLLAPSE);
+ if (ret && errno == EAGAIN && retry) {
+ retry = false;
+ goto retry;
+ }
+ return ret;
+}
+
+/*
+ * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with
+ * validate_memory()'able contents.
+ */
+static void *alloc_hpage(struct mem_ops *ops)
+{
+ void *p = ops->setup_area(1);
+
+ ops->fault(p, 0, hpage_pmd_size);
+
+ /*
+ * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE.
+ * The latter is ineligible for collapse by MADV_COLLAPSE
+ * while the former might cause MADV_COLLAPSE to race with
+ * khugepaged on low-load system (like a test machine), which
+ * would cause MADV_COLLAPSE to fail with EAGAIN.
+ */
+ printf("Allocate huge page...");
+ if (madvise_collapse_retry(p, hpage_pmd_size)) {
+ perror("madvise(MADV_COLLAPSE)");
+ exit(EXIT_FAILURE);
+ }
+ if (!ops->check_huge(p, 1)) {
+ perror("madvise(MADV_COLLAPSE)");
+ exit(EXIT_FAILURE);
+ }
+ if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
+ perror("madvise(MADV_HUGEPAGE)");
+ exit(EXIT_FAILURE);
+ }
+ success("OK");
+ return p;
+}
+
static void validate_memory(int *p, unsigned long start, unsigned long end)
{
int i;
@@ -469,26 +649,216 @@ static void validate_memory(int *p, unsigned long start, unsigned long end)
}
}
+static void *anon_setup_area(int nr_hpages)
+{
+ return alloc_mapping(nr_hpages);
+}
+
+static void anon_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+}
+
+static void anon_fault(void *p, unsigned long start, unsigned long end)
+{
+ fill_memory(p, start, end);
+}
+
+static bool anon_check_huge(void *addr, int nr_hpages)
+{
+ return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
+}
+
+static void *file_setup_area(int nr_hpages)
+{
+ int fd;
+ void *p;
+ unsigned long size;
+
+ unlink(finfo.path); /* Cleanup from previous failed tests */
+ printf("Creating %s for collapse%s...", finfo.path,
+ finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
+ fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
+ 777);
+ if (fd < 0) {
+ perror("open()");
+ exit(EXIT_FAILURE);
+ }
+
+ size = nr_hpages * hpage_pmd_size;
+ p = alloc_mapping(nr_hpages);
+ fill_memory(p, 0, size);
+ write(fd, p, size);
+ close(fd);
+ munmap(p, size);
+ success("OK");
+
+ printf("Opening %s read only for collapse...", finfo.path);
+ finfo.fd = open(finfo.path, O_RDONLY, 777);
+ if (finfo.fd < 0) {
+ perror("open()");
+ exit(EXIT_FAILURE);
+ }
+ p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, finfo.fd, 0);
+ if (p == MAP_FAILED || p != BASE_ADDR) {
+ perror("mmap()");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Drop page cache */
+ write_file("/proc/sys/vm/drop_caches", "3", 2);
+ success("OK");
+ return p;
+}
+
+static void file_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+ close(finfo.fd);
+ unlink(finfo.path);
+}
+
+static void file_fault(void *p, unsigned long start, unsigned long end)
+{
+ if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
+ perror("madvise(MADV_POPULATE_READ");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static bool file_check_huge(void *addr, int nr_hpages)
+{
+ switch (finfo.type) {
+ case VMA_FILE:
+ return check_huge_file(addr, nr_hpages, hpage_pmd_size);
+ case VMA_SHMEM:
+ return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
+ default:
+ exit(EXIT_FAILURE);
+ return false;
+ }
+}
+
+static void *shmem_setup_area(int nr_hpages)
+{
+ void *p;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
+ if (finfo.fd < 0) {
+ perror("memfd_create()");
+ exit(EXIT_FAILURE);
+ }
+ if (ftruncate(finfo.fd, size)) {
+ perror("ftruncate()");
+ exit(EXIT_FAILURE);
+ }
+ p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
+ 0);
+ if (p != BASE_ADDR) {
+ perror("mmap()");
+ exit(EXIT_FAILURE);
+ }
+ return p;
+}
+
+static void shmem_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+ close(finfo.fd);
+}
+
+static bool shmem_check_huge(void *addr, int nr_hpages)
+{
+ return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
+}
+
+static struct mem_ops __anon_ops = {
+ .setup_area = &anon_setup_area,
+ .cleanup_area = &anon_cleanup_area,
+ .fault = &anon_fault,
+ .check_huge = &anon_check_huge,
+ .name = "anon",
+};
+
+static struct mem_ops __file_ops = {
+ .setup_area = &file_setup_area,
+ .cleanup_area = &file_cleanup_area,
+ .fault = &file_fault,
+ .check_huge = &file_check_huge,
+ .name = "file",
+};
+
+static struct mem_ops __shmem_ops = {
+ .setup_area = &shmem_setup_area,
+ .cleanup_area = &shmem_cleanup_area,
+ .fault = &anon_fault,
+ .check_huge = &shmem_check_huge,
+ .name = "shmem",
+};
+
+static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ int ret;
+ struct settings settings = *current_settings();
+
+ printf("%s...", msg);
+
+ /*
+ * Prevent khugepaged interference and tests that MADV_COLLAPSE
+ * ignores /sys/kernel/mm/transparent_hugepage/enabled
+ */
+ settings.thp_enabled = THP_NEVER;
+ settings.shmem_enabled = SHMEM_NEVER;
+ push_settings(&settings);
+
+ /* Clear VM_NOHUGEPAGE */
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
+ ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size);
+ if (((bool)ret) == expect)
+ fail("Fail: Bad return value");
+ else if (!ops->check_huge(p, expect ? nr_hpages : 0))
+ fail("Fail: check_huge()");
+ else
+ success("OK");
+
+ pop_settings();
+}
+
+static void madvise_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ /* Sanity check */
+ if (!ops->check_huge(p, 0)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+ __madvise_collapse(msg, p, nr_hpages, ops, expect);
+}
+
#define TICK 500000
-static bool wait_for_scan(const char *msg, char *p)
+static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops)
{
int full_scans;
int timeout = 6; /* 3 seconds */
/* Sanity check */
- if (check_huge(p)) {
+ if (!ops->check_huge(p, 0)) {
printf("Unexpected huge page\n");
exit(EXIT_FAILURE);
}
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
/* Wait until the second full_scan completed */
full_scans = read_num("khugepaged/full_scans") + 2;
printf("%s...", msg);
while (timeout--) {
- if (check_huge(p))
+ if (ops->check_huge(p, nr_hpages))
break;
if (read_num("khugepaged/full_scans") >= full_scans)
break;
@@ -496,122 +866,155 @@ static bool wait_for_scan(const char *msg, char *p)
usleep(TICK);
}
- madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
return timeout == -1;
}
+static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ if (wait_for_scan(msg, p, nr_hpages, ops)) {
+ if (expect)
+ fail("Timeout");
+ else
+ success("OK");
+ return;
+ }
+
+ /*
+ * For file and shmem memory, khugepaged only retracts pte entries after
+ * putting the new hugepage in the page cache. The hugepage must be
+ * subsequently refaulted to install the pmd mapping for the mm.
+ */
+ if (ops != &__anon_ops)
+ ops->fault(p, 0, nr_hpages * hpage_pmd_size);
+
+ if (ops->check_huge(p, expect ? nr_hpages : 0))
+ success("OK");
+ else
+ fail("Fail");
+}
+
+static struct collapse_context __khugepaged_context = {
+ .collapse = &khugepaged_collapse,
+ .enforce_pte_scan_limits = true,
+ .name = "khugepaged",
+};
+
+static struct collapse_context __madvise_context = {
+ .collapse = &madvise_collapse,
+ .enforce_pte_scan_limits = false,
+ .name = "madvise",
+};
+
+static bool is_tmpfs(struct mem_ops *ops)
+{
+ return ops == &__file_ops && finfo.type == VMA_SHMEM;
+}
+
static void alloc_at_fault(void)
{
- struct settings settings = default_settings;
+ struct settings settings = *current_settings();
char *p;
settings.thp_enabled = THP_ALWAYS;
- write_settings(&settings);
+ push_settings(&settings);
- p = alloc_mapping();
+ p = alloc_mapping(1);
*p = 1;
printf("Allocate huge page on fault...");
- if (check_huge(p))
+ if (check_huge_anon(p, 1, hpage_pmd_size))
success("OK");
else
fail("Fail");
- write_settings(&default_settings);
+ pop_settings();
madvise(p, page_size, MADV_DONTNEED);
printf("Split huge PMD on MADV_DONTNEED...");
- if (!check_huge(p))
+ if (check_huge_anon(p, 0, hpage_pmd_size))
success("OK");
else
fail("Fail");
munmap(p, hpage_pmd_size);
}
-static void collapse_full(void)
+static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
-
- p = alloc_mapping();
- fill_memory(p, 0, hpage_pmd_size);
- if (wait_for_scan("Collapse fully populated PTE table", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ int nr_hpages = 4;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ p = ops->setup_area(nr_hpages);
+ ops->fault(p, 0, size);
+ c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
+ ops, true);
+ validate_memory(p, 0, size);
+ ops->cleanup_area(p, size);
}
-static void collapse_empty(void)
+static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
- if (wait_for_scan("Do not collapse empty PTE table", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
- munmap(p, hpage_pmd_size);
+ p = ops->setup_area(1);
+ c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_single_pte_entry(void)
+static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
- fill_memory(p, 0, page_size);
- if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
+ p = ops->setup_area(1);
+ ops->fault(p, 0, page_size);
+ c->collapse("Collapse PTE table with single PTE entry present", p,
+ 1, ops, true);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_max_ptes_none(void)
+static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
{
int max_ptes_none = hpage_pmd_nr / 2;
- struct settings settings = default_settings;
+ struct settings settings = *current_settings();
void *p;
settings.khugepaged.max_ptes_none = max_ptes_none;
- write_settings(&settings);
+ push_settings(&settings);
- p = alloc_mapping();
+ p = ops->setup_area(1);
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
- if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
- validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+ if (is_tmpfs(ops)) {
+ /* shmem pages always in the page cache */
+ printf("tmpfs...");
+ skip("Skip");
+ goto skip;
+ }
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
- if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+ c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
+ ops, !c->enforce_pte_scan_limits);
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
- munmap(p, hpage_pmd_size);
- write_settings(&default_settings);
+ if (c->enforce_pte_scan_limits) {
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+ c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
+ true);
+ validate_memory(p, 0,
+ (hpage_pmd_nr - max_ptes_none) * page_size);
+ }
+skip:
+ ops->cleanup_area(p, hpage_pmd_size);
+ pop_settings();
}
-static void collapse_swapin_single_pte(void)
+static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
- fill_memory(p, 0, hpage_pmd_size);
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
printf("Swapout one page...");
if (madvise(p, page_size, MADV_PAGEOUT)) {
@@ -625,25 +1028,21 @@ static void collapse_swapin_single_pte(void)
goto out;
}
- if (wait_for_scan("Collapse with swapping in single PTE entry", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse with swapping in single PTE entry", p, 1, ops,
+ true);
validate_memory(p, 0, hpage_pmd_size);
out:
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_max_ptes_swap(void)
+static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
{
int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
void *p;
- p = alloc_mapping();
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
- fill_memory(p, 0, hpage_pmd_size);
printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
perror("madvise(MADV_PAGEOUT)");
@@ -656,115 +1055,93 @@ static void collapse_max_ptes_swap(void)
goto out;
}
- if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
+ c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops,
+ !c->enforce_pte_scan_limits);
validate_memory(p, 0, hpage_pmd_size);
- fill_memory(p, 0, hpage_pmd_size);
- printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
- if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
- perror("madvise(MADV_PAGEOUT)");
- exit(EXIT_FAILURE);
- }
- if (check_swap(p, max_ptes_swap * page_size)) {
- success("OK");
- } else {
- fail("Fail");
- goto out;
- }
+ if (c->enforce_pte_scan_limits) {
+ ops->fault(p, 0, hpage_pmd_size);
+ printf("Swapout %d of %d pages...", max_ptes_swap,
+ hpage_pmd_nr);
+ if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, max_ptes_swap * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
- if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
+ c->collapse("Collapse with max_ptes_swap pages swapped out", p,
+ 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+ }
out:
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_single_pte_entry_compound(void)
+static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
+ p = alloc_hpage(ops);
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (is_tmpfs(ops)) {
+ /* MADV_DONTNEED won't evict tmpfs pages */
+ printf("tmpfs...");
+ skip("Skip");
+ goto skip;
+ }
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
printf("Split huge page leaving single PTE mapping compound page...");
madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse PTE table with single PTE mapping compound page",
+ p, 1, ops, true);
validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
+skip:
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_full_of_compound(void)
+static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
+ p = alloc_hpage(ops);
printf("Split huge page leaving single PTE page table full of compound pages...");
madvise(p, page_size, MADV_NOHUGEPAGE);
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- if (wait_for_scan("Collapse PTE table full of compound pages", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse PTE table full of compound pages", p, 1, ops,
+ true);
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_compound_extreme(void)
+static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
int i;
- p = alloc_mapping();
+ p = ops->setup_area(1);
for (i = 0; i < hpage_pmd_nr; i++) {
printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
i + 1, hpage_pmd_nr);
madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(BASE_ADDR, 0, hpage_pmd_size);
- if (!check_huge(BASE_ADDR)) {
+ ops->fault(BASE_ADDR, 0, hpage_pmd_size);
+ if (!ops->check_huge(BASE_ADDR, 1)) {
printf("Failed to allocate huge page\n");
exit(EXIT_FAILURE);
}
@@ -791,34 +1168,30 @@ static void collapse_compound_extreme(void)
}
}
- munmap(BASE_ADDR, hpage_pmd_size);
- fill_memory(p, 0, hpage_pmd_size);
- if (!check_huge(p))
+ ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
+ ops->fault(p, 0, hpage_pmd_size);
+ if (!ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
- if (wait_for_scan("Collapse PTE table full of different compound pages", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse PTE table full of different compound pages", p, 1,
+ ops, true);
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_fork(void)
+static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
{
int wstatus;
void *p;
- p = alloc_mapping();
+ p = ops->setup_area(1);
printf("Allocate small page...");
- fill_memory(p, 0, page_size);
- if (!check_huge(p))
+ ops->fault(p, 0, page_size);
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
@@ -829,22 +1202,17 @@ static void collapse_fork(void)
skip_settings_restore = true;
exit_status = 0;
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- fill_memory(p, page_size, 2 * page_size);
-
- if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ ops->fault(p, page_size, 2 * page_size);
+ c->collapse("Collapse PTE table with single page shared with parent process",
+ p, 1, ops, true);
validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
exit(exit_status);
}
@@ -852,36 +1220,27 @@ static void collapse_fork(void)
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has small page...");
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_fork_compound(void)
+static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
{
int wstatus;
void *p;
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
+ p = alloc_hpage(ops);
printf("Share huge page over fork()...");
if (!fork()) {
/* Do not touch settings on child exit */
skip_settings_restore = true;
exit_status = 0;
- if (check_huge(p))
+ if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
@@ -889,24 +1248,20 @@ static void collapse_fork_compound(void)
printf("Split huge page PMD in child process...");
madvise(p, page_size, MADV_NOHUGEPAGE);
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- fill_memory(p, 0, page_size);
+ ops->fault(p, 0, page_size);
write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
- if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse PTE table full of compound pages in child",
+ p, 1, ops, true);
write_num("khugepaged/max_ptes_shared",
- default_settings.khugepaged.max_ptes_shared);
+ current_settings()->khugepaged.max_ptes_shared);
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
exit(exit_status);
}
@@ -914,74 +1269,59 @@ static void collapse_fork_compound(void)
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has huge page...");
- if (check_huge(p))
+ if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_max_ptes_shared()
+static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
{
int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
int wstatus;
void *p;
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
+ p = alloc_hpage(ops);
printf("Share huge page over fork()...");
if (!fork()) {
/* Do not touch settings on child exit */
skip_settings_restore = true;
exit_status = 0;
- if (check_huge(p))
+ if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
printf("Trigger CoW on page %d of %d...",
hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
- fail("Timeout");
- else if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Trigger CoW on page %d of %d...",
- hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
- if (!check_huge(p))
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
-
- if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
+ 1, ops, !c->enforce_pte_scan_limits);
+
+ if (c->enforce_pte_scan_limits) {
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
+ page_size);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse with max_ptes_shared PTEs shared",
+ p, 1, ops, true);
+ }
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
exit(exit_status);
}
@@ -989,20 +1329,153 @@ static void collapse_max_ptes_shared()
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has huge page...");
- if (check_huge(p))
+ if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-int main(void)
+static void madvise_collapse_existing_thps(struct collapse_context *c,
+ struct mem_ops *ops)
{
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
+ c->collapse("Collapse fully populated PTE table...", p, 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+
+ /* c->collapse() will find a hugepage and complain - call directly. */
+ __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+/*
+ * Test race with khugepaged where page tables have been retracted and
+ * pmd cleared.
+ */
+static void madvise_retracted_page_tables(struct collapse_context *c,
+ struct mem_ops *ops)
+{
+ void *p;
+ int nr_hpages = 1;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ p = ops->setup_area(nr_hpages);
+ ops->fault(p, 0, size);
+
+ /* Let khugepaged collapse and leave pmd cleared */
+ if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages,
+ ops)) {
+ fail("Timeout");
+ return;
+ }
+ success("OK");
+ c->collapse("Install huge PMD from page cache", p, nr_hpages, ops,
+ true);
+ validate_memory(p, 0, size);
+ ops->cleanup_area(p, size);
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n");
+ fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
+ fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
+ fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
+ fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
+ fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
+ fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n");
+ exit(1);
+}
+
+static void parse_test_type(int argc, const char **argv)
+{
+ char *buf;
+ const char *token;
+
+ if (argc == 1) {
+ /* Backwards compatibility */
+ khugepaged_context = &__khugepaged_context;
+ madvise_context = &__madvise_context;
+ anon_ops = &__anon_ops;
+ return;
+ }
+
+ buf = strdup(argv[1]);
+ token = strsep(&buf, ":");
+
+ if (!strcmp(token, "all")) {
+ khugepaged_context = &__khugepaged_context;
+ madvise_context = &__madvise_context;
+ } else if (!strcmp(token, "khugepaged")) {
+ khugepaged_context = &__khugepaged_context;
+ } else if (!strcmp(token, "madvise")) {
+ madvise_context = &__madvise_context;
+ } else {
+ usage();
+ }
+
+ if (!buf)
+ usage();
+
+ if (!strcmp(buf, "all")) {
+ file_ops = &__file_ops;
+ anon_ops = &__anon_ops;
+ shmem_ops = &__shmem_ops;
+ } else if (!strcmp(buf, "anon")) {
+ anon_ops = &__anon_ops;
+ } else if (!strcmp(buf, "file")) {
+ file_ops = &__file_ops;
+ } else if (!strcmp(buf, "shmem")) {
+ shmem_ops = &__shmem_ops;
+ } else {
+ usage();
+ }
+
+ if (!file_ops)
+ return;
+
+ if (argc != 3)
+ usage();
+}
+
+int main(int argc, const char **argv)
+{
+ struct settings default_settings = {
+ .thp_enabled = THP_MADVISE,
+ .thp_defrag = THP_DEFRAG_ALWAYS,
+ .shmem_enabled = SHMEM_ADVISE,
+ .use_zero_page = 0,
+ .khugepaged = {
+ .defrag = 1,
+ .alloc_sleep_millisecs = 10,
+ .scan_sleep_millisecs = 10,
+ },
+ /*
+ * When testing file-backed memory, the collapse path
+ * looks at how many pages are found in the page cache, not
+ * what pages are mapped. Disable read ahead optimization so
+ * pages don't find their way into the page cache unless
+ * we mem_ops->fault() them in.
+ */
+ .read_ahead_kb = 0,
+ };
+
+ parse_test_type(argc, argv);
+
+ if (file_ops)
+ get_finfo(argv[2]);
+
setbuf(stdout, NULL);
page_size = getpagesize();
- hpage_pmd_size = read_num("hpage_pmd_size");
+ hpage_pmd_size = read_pmd_pagesize();
hpage_pmd_nr = hpage_pmd_size / page_size;
default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
@@ -1011,21 +1484,75 @@ int main(void)
default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
save_settings();
- adjust_settings();
+ push_settings(&default_settings);
alloc_at_fault();
- collapse_full();
- collapse_empty();
- collapse_single_pte_entry();
- collapse_max_ptes_none();
- collapse_swapin_single_pte();
- collapse_max_ptes_swap();
- collapse_single_pte_entry_compound();
- collapse_full_of_compound();
- collapse_compound_extreme();
- collapse_fork();
- collapse_fork_compound();
- collapse_max_ptes_shared();
+
+#define TEST(t, c, o) do { \
+ if (c && o) { \
+ printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
+ t(c, o); \
+ } \
+ } while (0)
+
+ TEST(collapse_full, khugepaged_context, anon_ops);
+ TEST(collapse_full, khugepaged_context, file_ops);
+ TEST(collapse_full, khugepaged_context, shmem_ops);
+ TEST(collapse_full, madvise_context, anon_ops);
+ TEST(collapse_full, madvise_context, file_ops);
+ TEST(collapse_full, madvise_context, shmem_ops);
+
+ TEST(collapse_empty, khugepaged_context, anon_ops);
+ TEST(collapse_empty, madvise_context, anon_ops);
+
+ TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
+ TEST(collapse_single_pte_entry, madvise_context, anon_ops);
+ TEST(collapse_single_pte_entry, madvise_context, file_ops);
+ TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
+
+ TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
+ TEST(collapse_max_ptes_none, madvise_context, anon_ops);
+ TEST(collapse_max_ptes_none, madvise_context, file_ops);
+
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
+
+ TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, file_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
+ TEST(collapse_full_of_compound, madvise_context, anon_ops);
+ TEST(collapse_full_of_compound, madvise_context, file_ops);
+ TEST(collapse_full_of_compound, madvise_context, shmem_ops);
+
+ TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
+ TEST(collapse_compound_extreme, madvise_context, anon_ops);
+
+ TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops);
+ TEST(collapse_swapin_single_pte, madvise_context, anon_ops);
+
+ TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_swap, madvise_context, anon_ops);
+
+ TEST(collapse_fork, khugepaged_context, anon_ops);
+ TEST(collapse_fork, madvise_context, anon_ops);
+
+ TEST(collapse_fork_compound, khugepaged_context, anon_ops);
+ TEST(collapse_fork_compound, madvise_context, anon_ops);
+
+ TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
+
+ TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
+
+ TEST(madvise_retracted_page_tables, madvise_context, file_ops);
+ TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
restore_settings(0);
}
diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index 1436e1a9a3d3..0d85be2350fa 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -11,7 +11,8 @@
#include <err.h>
#include "../kselftest.h"
-#include "../../../../include/vdso/time64.h"
+#include <include/vdso/time64.h>
+#include "util.h"
#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
#define KSM_FP(s) (KSM_SYSFS_PATH s)
@@ -22,15 +23,6 @@
#define KSM_MERGE_ACROSS_NODES_DEFAULT true
#define MB (1ul << 20)
-#define PAGE_SHIFT 12
-#define HPAGE_SHIFT 21
-
-#define PAGE_SIZE (1 << PAGE_SHIFT)
-#define HPAGE_SIZE (1 << HPAGE_SHIFT)
-
-#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
-#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
-
struct ksm_sysfs {
unsigned long max_page_sharing;
unsigned long merge_across_nodes;
@@ -62,6 +54,7 @@ static int ksm_write_sysfs(const char *file_path, unsigned long val)
}
if (fprintf(f, "%lu", val) < 0) {
perror("fprintf");
+ fclose(f);
return 1;
}
fclose(f);
@@ -80,6 +73,7 @@ static int ksm_read_sysfs(const char *file_path, unsigned long *val)
}
if (fscanf(f, "%lu", val) != 1) {
perror("fscanf");
+ fclose(f);
return 1;
}
fclose(f);
@@ -229,7 +223,8 @@ static bool assert_ksm_pages_count(long dupl_page_count)
static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
{
if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
- ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
+ numa_available() ? 0 :
+ ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
@@ -244,7 +239,8 @@ static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
{
if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
- ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
+ numa_available() ? 0 :
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
@@ -456,34 +452,6 @@ err_out:
return KSFT_FAIL;
}
-int64_t allocate_transhuge(void *ptr, int pagemap_fd)
-{
- uint64_t ent[2];
-
- /* drop pmd */
- if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANONYMOUS |
- MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
- errx(2, "mmap transhuge");
-
- if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
- err(2, "MADV_HUGEPAGE");
-
- /* allocate transparent huge page */
- *(volatile void **)ptr = ptr;
-
- if (pread(pagemap_fd, ent, sizeof(ent),
- (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
- err(2, "read pagemap");
-
- if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
- PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
- !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
- return PAGEMAP_PFN(ent[0]);
-
- return -1;
-}
-
static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t map_size)
{
void *map_ptr, *map_ptr_orig;
@@ -756,7 +724,8 @@ int main(int argc, char *argv[])
if (ksm_write_sysfs(KSM_FP("run"), 2) ||
ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
- ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
+ numa_available() ? 0 :
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
return KSFT_FAIL;
diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c
index 3ee0e8275600..715a42e8e2cd 100644
--- a/tools/testing/selftests/vm/madv_populate.c
+++ b/tools/testing/selftests/vm/madv_populate.c
@@ -18,6 +18,7 @@
#include <sys/mman.h>
#include "../kselftest.h"
+#include "vm_util.h"
/*
* For now, we're using 2 MiB of private anonymous memory for all tests.
@@ -26,18 +27,6 @@
static size_t pagesize;
-static uint64_t pagemap_get_entry(int fd, char *start)
-{
- const unsigned long pfn = (unsigned long)start / pagesize;
- uint64_t entry;
- int ret;
-
- ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
- if (ret != sizeof(entry))
- ksft_exit_fail_msg("reading pagemap failed\n");
- return entry;
-}
-
static bool pagemap_is_populated(int fd, char *start)
{
uint64_t entry = pagemap_get_entry(fd, start);
@@ -46,13 +35,6 @@ static bool pagemap_is_populated(int fd, char *start)
return entry & 0xc000000000000000ull;
}
-static bool pagemap_is_softdirty(int fd, char *start)
-{
- uint64_t entry = pagemap_get_entry(fd, start);
-
- return entry & 0x0080000000000000ull;
-}
-
static void sense_support(void)
{
char *addr;
@@ -258,20 +240,6 @@ static bool range_is_not_softdirty(char *start, ssize_t size)
return ret;
}
-static void clear_softdirty(void)
-{
- int fd = open("/proc/self/clear_refs", O_WRONLY);
- const char *ctrl = "4";
- int ret;
-
- if (fd < 0)
- ksft_exit_fail_msg("opening clear_refs failed\n");
- ret = write(fd, ctrl, strlen(ctrl));
- if (ret != strlen(ctrl))
- ksft_exit_fail_msg("writing clear_refs failed\n");
- close(fd);
-}
-
static void test_softdirty(void)
{
char *addr;
diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c
index d91bde511268..eed44322d1a6 100644
--- a/tools/testing/selftests/vm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/vm/map_fixed_noreplace.c
@@ -17,9 +17,6 @@
#define MAP_FIXED_NOREPLACE 0x100000
#endif
-#define BASE_ADDRESS (256ul * 1024 * 1024)
-
-
static void dump_maps(void)
{
char cmd[32];
@@ -28,18 +25,46 @@ static void dump_maps(void)
system(cmd);
}
+static unsigned long find_base_addr(unsigned long size)
+{
+ void *addr;
+ unsigned long flags;
+
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+ addr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ if (addr == MAP_FAILED) {
+ printf("Error: couldn't map the space we need for the test\n");
+ return 0;
+ }
+
+ if (munmap(addr, size) != 0) {
+ printf("Error: couldn't map the space we need for the test\n");
+ return 0;
+ }
+ return (unsigned long)addr;
+}
+
int main(void)
{
+ unsigned long base_addr;
unsigned long flags, addr, size, page_size;
char *p;
page_size = sysconf(_SC_PAGE_SIZE);
+ //let's find a base addr that is free before we start the tests
+ size = 5 * page_size;
+ base_addr = find_base_addr(size);
+ if (!base_addr) {
+ printf("Error: couldn't map the space we need for the test\n");
+ return 1;
+ }
+
flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
// Check we can map all the areas we need below
errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 5 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
@@ -60,7 +85,7 @@ int main(void)
printf("unmap() successful\n");
errno = 0;
- addr = BASE_ADDRESS + page_size;
+ addr = base_addr + page_size;
size = 3 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -80,7 +105,7 @@ int main(void)
* +4 | free | new
*/
errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 5 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -101,7 +126,7 @@ int main(void)
* +4 | free |
*/
errno = 0;
- addr = BASE_ADDRESS + (2 * page_size);
+ addr = base_addr + (2 * page_size);
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -121,7 +146,7 @@ int main(void)
* +4 | free | new
*/
errno = 0;
- addr = BASE_ADDRESS + (3 * page_size);
+ addr = base_addr + (3 * page_size);
size = 2 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -141,7 +166,7 @@ int main(void)
* +4 | free |
*/
errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 2 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -161,7 +186,7 @@ int main(void)
* +4 | free |
*/
errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -181,7 +206,7 @@ int main(void)
* +4 | free | new
*/
errno = 0;
- addr = BASE_ADDRESS + (4 * page_size);
+ addr = base_addr + (4 * page_size);
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -192,7 +217,7 @@ int main(void)
return 1;
}
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 5 * page_size;
if (munmap((void *)addr, size) != 0) {
dump_maps();
diff --git a/tools/testing/selftests/vm/memfd_secret.c b/tools/testing/selftests/vm/memfd_secret.c
index 93e7e7ffed33..957b9e18c729 100644
--- a/tools/testing/selftests/vm/memfd_secret.c
+++ b/tools/testing/selftests/vm/memfd_secret.c
@@ -282,7 +282,7 @@ int main(int argc, char *argv[])
close(fd);
- ksft_exit(!ksft_get_fail_cnt());
+ ksft_finished();
}
#else /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/vm/migration.c b/tools/testing/selftests/vm/migration.c
new file mode 100644
index 000000000000..1cec8425e3ca
--- /dev/null
+++ b/tools/testing/selftests/vm/migration.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The main purpose of the tests here is to exercise the migration entry code
+ * paths in the kernel.
+ */
+
+#include "../kselftest_harness.h"
+#include <strings.h>
+#include <pthread.h>
+#include <numa.h>
+#include <numaif.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <time.h>
+
+#define TWOMEG (2<<20)
+#define RUNTIME (60)
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(migration)
+{
+ pthread_t *threads;
+ pid_t *pids;
+ int nthreads;
+ int n1;
+ int n2;
+};
+
+FIXTURE_SETUP(migration)
+{
+ int n;
+
+ ASSERT_EQ(numa_available(), 0);
+ self->nthreads = numa_num_task_cpus() - 1;
+ self->n1 = -1;
+ self->n2 = -1;
+
+ for (n = 0; n < numa_max_possible_node(); n++)
+ if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
+ if (self->n1 == -1) {
+ self->n1 = n;
+ } else {
+ self->n2 = n;
+ break;
+ }
+ }
+
+ self->threads = malloc(self->nthreads * sizeof(*self->threads));
+ ASSERT_NE(self->threads, NULL);
+ self->pids = malloc(self->nthreads * sizeof(*self->pids));
+ ASSERT_NE(self->pids, NULL);
+};
+
+FIXTURE_TEARDOWN(migration)
+{
+ free(self->threads);
+ free(self->pids);
+}
+
+int migrate(uint64_t *ptr, int n1, int n2)
+{
+ int ret, tmp;
+ int status = 0;
+ struct timespec ts1, ts2;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts1))
+ return -1;
+
+ while (1) {
+ if (clock_gettime(CLOCK_MONOTONIC, &ts2))
+ return -1;
+
+ if (ts2.tv_sec - ts1.tv_sec >= RUNTIME)
+ return 0;
+
+ ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
+ MPOL_MF_MOVE_ALL);
+ if (ret) {
+ if (ret > 0)
+ printf("Didn't migrate %d pages\n", ret);
+ else
+ perror("Couldn't migrate pages");
+ return -2;
+ }
+
+ tmp = n2;
+ n2 = n1;
+ n1 = tmp;
+ }
+
+ return 0;
+}
+
+void *access_mem(void *ptr)
+{
+ uint64_t y = 0;
+ volatile uint64_t *x = ptr;
+
+ while (1) {
+ pthread_testcancel();
+ y += *x;
+ }
+
+ return NULL;
+}
+
+/*
+ * Basic migration entry testing. One thread will move pages back and forth
+ * between nodes whilst other threads try and access them triggering the
+ * migration entry wait paths in the kernel.
+ */
+TEST_F_TIMEOUT(migration, private_anon, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * Same as the previous test but with shared memory.
+ */
+TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
+{
+ pid_t pid;
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ pid = fork();
+ if (!pid)
+ access_mem(ptr);
+ else
+ self->pids[i] = pid;
+ }
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+/*
+ * Tests the pmd migration entry paths.
+ */
+TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, 2*TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
+ ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c
new file mode 100644
index 000000000000..6c62966ab5db
--- /dev/null
+++ b/tools/testing/selftests/vm/mrelease_test.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2022 Google LLC
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "util.h"
+
+#include "../kselftest.h"
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open -1
+#endif
+
+#ifndef __NR_process_mrelease
+#define __NR_process_mrelease -1
+#endif
+
+#define MB(x) (x << 20)
+#define MAX_SIZE_MB 1024
+
+static int alloc_noexit(unsigned long nr_pages, int pipefd)
+{
+ int ppid = getppid();
+ int timeout = 10; /* 10sec timeout to get killed */
+ unsigned long i;
+ char *buf;
+
+ buf = (char *)mmap(NULL, nr_pages * PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, 0, 0);
+ if (buf == MAP_FAILED) {
+ perror("mmap failed, halting the test");
+ return KSFT_FAIL;
+ }
+
+ for (i = 0; i < nr_pages; i++)
+ *((unsigned long *)(buf + (i * PAGE_SIZE))) = i;
+
+ /* Signal the parent that the child is ready */
+ if (write(pipefd, "", 1) < 0) {
+ perror("write");
+ return KSFT_FAIL;
+ }
+
+ /* Wait to be killed (when reparenting happens) */
+ while (getppid() == ppid && timeout > 0) {
+ sleep(1);
+ timeout--;
+ }
+
+ munmap(buf, nr_pages * PAGE_SIZE);
+
+ return (timeout > 0) ? KSFT_PASS : KSFT_FAIL;
+}
+
+/* The process_mrelease calls in this test are expected to fail */
+static void run_negative_tests(int pidfd)
+{
+ int res;
+ /* Test invalid flags. Expect to fail with EINVAL error code. */
+ if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) ||
+ errno != EINVAL) {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ perror("process_mrelease with wrong flags");
+ exit(res);
+ }
+ /*
+ * Test reaping while process is alive with no pending SIGKILL.
+ * Expect to fail with EINVAL error code.
+ */
+ if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ perror("process_mrelease on a live process");
+ exit(res);
+ }
+}
+
+static int child_main(int pipefd[], size_t size)
+{
+ int res;
+
+ /* Allocate and fault-in memory and wait to be killed */
+ close(pipefd[0]);
+ res = alloc_noexit(MB(size) / PAGE_SIZE, pipefd[1]);
+ close(pipefd[1]);
+ return res;
+}
+
+int main(void)
+{
+ int pipefd[2], pidfd;
+ bool success, retry;
+ size_t size;
+ pid_t pid;
+ char byte;
+ int res;
+
+ /* Test a wrong pidfd */
+ if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ perror("process_mrelease with wrong pidfd");
+ exit(res);
+ }
+
+ /* Start the test with 1MB child memory allocation */
+ size = 1;
+retry:
+ /*
+ * Pipe for the child to signal when it's done allocating
+ * memory
+ */
+ if (pipe(pipefd)) {
+ perror("pipe");
+ exit(KSFT_FAIL);
+ }
+ pid = fork();
+ if (pid < 0) {
+ perror("fork");
+ close(pipefd[0]);
+ close(pipefd[1]);
+ exit(KSFT_FAIL);
+ }
+
+ if (pid == 0) {
+ /* Child main routine */
+ res = child_main(pipefd, size);
+ exit(res);
+ }
+
+ /*
+ * Parent main routine:
+ * Wait for the child to finish allocations, then kill and reap
+ */
+ close(pipefd[1]);
+ /* Block until the child is ready */
+ res = read(pipefd[0], &byte, 1);
+ close(pipefd[0]);
+ if (res < 0) {
+ perror("read");
+ if (!kill(pid, SIGKILL))
+ waitpid(pid, NULL, 0);
+ exit(KSFT_FAIL);
+ }
+
+ pidfd = syscall(__NR_pidfd_open, pid, 0);
+ if (pidfd < 0) {
+ perror("pidfd_open");
+ if (!kill(pid, SIGKILL))
+ waitpid(pid, NULL, 0);
+ exit(KSFT_FAIL);
+ }
+
+ /* Run negative tests which require a live child */
+ run_negative_tests(pidfd);
+
+ if (kill(pid, SIGKILL)) {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ perror("kill");
+ exit(res);
+ }
+
+ success = (syscall(__NR_process_mrelease, pidfd, 0) == 0);
+ if (!success) {
+ /*
+ * If we failed to reap because the child exited too soon,
+ * before we could call process_mrelease. Double child's memory
+ * which causes it to spend more time on cleanup and increases
+ * our chances of reaping its memory before it exits.
+ * Retry until we succeed or reach MAX_SIZE_MB.
+ */
+ if (errno == ESRCH) {
+ retry = (size <= MAX_SIZE_MB);
+ } else {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ perror("process_mrelease");
+ waitpid(pid, NULL, 0);
+ exit(res);
+ }
+ }
+
+ /* Cleanup to prevent zombies */
+ if (waitpid(pid, NULL, 0) < 0) {
+ perror("waitpid");
+ exit(KSFT_FAIL);
+ }
+ close(pidfd);
+
+ if (!success) {
+ if (retry) {
+ size *= 2;
+ goto retry;
+ }
+ printf("All process_mrelease attempts failed!\n");
+ exit(KSFT_FAIL);
+ }
+
+ printf("Success reaping a child with %zuMB of memory allocations\n",
+ size);
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 0624d1bd71b5..9496346973d4 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -6,9 +6,11 @@
#include <errno.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
+#include <stdbool.h>
#include "../kselftest.h"
@@ -20,7 +22,6 @@
#define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */
#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
struct config {
@@ -65,6 +66,103 @@ enum {
}
/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+ void *remap_addr = NULL;
+ bool ret = true;
+
+ /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+ remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+
+ if (remap_addr == MAP_FAILED) {
+ if (errno == EEXIST)
+ ret = false;
+ } else {
+ munmap(remap_addr, size);
+ }
+
+ return ret;
+}
+
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+ FILE *fp;
+ int n_matched;
+ static unsigned long long addr;
+
+ if (addr)
+ return addr;
+
+ fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+ if (fp == NULL) {
+ ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ exit(KSFT_SKIP);
+ }
+
+ n_matched = fscanf(fp, "%llu", &addr);
+ if (n_matched != 1) {
+ ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ fclose(fp);
+ exit(KSFT_SKIP);
+ }
+
+ fclose(fp);
+ return addr;
+}
+
+/*
+ * This test validates that merge is called when expanding a mapping.
+ * Mapping containing three pages is created, middle page is unmapped
+ * and then the mapping containing the first page is expanded so that
+ * it fills the created hole. The two parts should merge creating
+ * single mapping with three pages.
+ */
+static void mremap_expand_merge(unsigned long page_size)
+{
+ char *test_name = "mremap expand merge";
+ FILE *fp;
+ char *line = NULL;
+ size_t len = 0;
+ bool success = false;
+ char *start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ munmap(start + page_size, page_size);
+ mremap(start, page_size, 2 * page_size, 0);
+
+ fp = fopen("/proc/self/maps", "r");
+ if (fp == NULL) {
+ ksft_test_result_fail("%s\n", test_name);
+ return;
+ }
+
+ while (getline(&line, &len, fp) != -1) {
+ char *first = strtok(line, "- ");
+ void *first_val = (void *)strtol(first, NULL, 16);
+ char *second = strtok(NULL, "- ");
+ void *second_val = (void *) strtol(second, NULL, 16);
+
+ if (first_val == start && second_val == start + 3 * page_size) {
+ success = true;
+ break;
+ }
+ }
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+ fclose(fp);
+}
+
+/*
* Returns the start address of the mapping on success, else returns
* NULL on failure.
*/
@@ -72,11 +170,18 @@ static void *get_source_mapping(struct config c)
{
unsigned long long addr = 0ULL;
void *src_addr = NULL;
+ unsigned long long mmap_min_addr;
+
+ mmap_min_addr = get_mmap_min_addr();
+
retry:
addr += c.src_alignment;
+ if (addr < mmap_min_addr)
+ goto retry;
+
src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
- MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
- -1, 0);
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
if (src_addr == MAP_FAILED) {
if (errno == EPERM || errno == EEXIST)
goto retry;
@@ -91,8 +196,10 @@ retry:
* alignment in the tests.
*/
if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
- !((unsigned long long) src_addr & c.src_alignment))
+ !((unsigned long long) src_addr & c.src_alignment)) {
+ munmap(src_addr, c.region_size);
goto retry;
+ }
if (!src_addr)
goto error;
@@ -141,9 +248,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
if (!((unsigned long long) addr & c.dest_alignment))
addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+ /* Don't destroy existing mappings unless expected to overlap */
+ while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
+ /* Check for unsigned overflow */
+ if (addr + c.dest_alignment < addr) {
+ ksft_print_msg("Couldn't find a valid region to remap to\n");
+ ret = -1;
+ goto out;
+ }
+ addr += c.dest_alignment;
+ }
+
clock_gettime(CLOCK_MONOTONIC, &t_start);
dest_addr = mremap(src_addr, c.region_size, c.region_size,
- MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+ MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
clock_gettime(CLOCK_MONOTONIC, &t_end);
if (dest_addr == MAP_FAILED) {
@@ -194,7 +312,7 @@ static void run_mremap_test_case(struct test test_case, int *failures,
if (remap_time < 0) {
if (test_case.expect_failure)
- ksft_test_result_pass("%s\n\tExpected mremap failure\n",
+ ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
test_case.name);
else {
ksft_test_result_fail("%s\n", test_case.name);
@@ -262,6 +380,7 @@ int main(int argc, char **argv)
int i, run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
unsigned int pattern_seed;
+ int num_expand_tests = 1;
struct test test_cases[MAX_TEST];
struct test perf_test_cases[MAX_PERF_TEST];
int page_size;
@@ -333,12 +452,14 @@ int main(int argc, char **argv)
(threshold_mb * _1MB >= _1GB);
ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
- ARRAY_SIZE(perf_test_cases) : 0));
+ ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests);
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_mremap_test_case(test_cases[i], &failures, threshold_mb,
pattern_seed);
+ mremap_expand_merge(page_size);
+
if (run_perf_tests) {
ksft_print_msg("\n%s\n",
"mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 622a85848f61..92f3be3dd8e5 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -13,6 +13,8 @@
#include <ucontext.h>
#include <sys/mman.h>
+#include "../kselftest.h"
+
/* Define some kernel-like types */
#define u8 __u8
#define u16 __u16
@@ -175,7 +177,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
}
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) \
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index e4a4ce2b826d..b078ce9c6d2a 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -80,19 +80,6 @@ static inline void __write_pkey_reg(u64 pkey_reg)
assert(pkey_reg == __read_pkey_reg());
}
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
@@ -104,9 +91,7 @@ static inline int cpu_has_pkeys(void)
unsigned int ecx;
unsigned int edx;
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(0x7, 0x0, eax, ebx, ecx, edx);
if (!(ecx & X86_FEATURE_PKU)) {
dprintf2("cpu does not have PKU\n");
@@ -142,9 +127,7 @@ int pkey_reg_xstate_offset(void)
/* assume that XSTATE_PKEY is set in XCR0 */
leaf = XSTATE_PKEY_BIT;
{
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(XSTATE_CPUID, leaf, eax, ebx, ecx, edx);
if (leaf == XSTATE_PKEY_BIT) {
xstate_offset = ebx;
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 2d0ae88665db..291bc1e07842 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1523,7 +1523,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
/*
* Reset the shadow, assuming that the above mprotect()
* correctly changed PKRU, but to an unknown value since
- * the actual alllocated pkey is unknown.
+ * the actual allocated pkey is unknown.
*/
shadow_pkey_reg = __read_pkey_reg();
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index a24d30af3094..e780e76c26b8 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -9,12 +9,12 @@ mnt=./huge
exitcode=0
#get huge pagesize and freepages from /proc/meminfo
-while read name size unit; do
+while read -r name size unit; do
if [ "$name" = "HugePages_Free:" ]; then
- freepgs=$size
+ freepgs="$size"
fi
if [ "$name" = "Hugepagesize:" ]; then
- hpgsize_KB=$size
+ hpgsize_KB="$size"
fi
done < /proc/meminfo
@@ -30,27 +30,26 @@ needmem_KB=$((half_ufd_size_MB * 2 * 1024))
#set proper nr_hugepages
if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
- nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+ nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
needpgs=$((needmem_KB / hpgsize_KB))
tries=2
- while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
- lackpgs=$(( $needpgs - $freepgs ))
+ while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do
+ lackpgs=$((needpgs - freepgs))
echo 3 > /proc/sys/vm/drop_caches
- echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
- if [ $? -ne 0 ]; then
+ if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then
echo "Please run this test as root"
exit $ksft_skip
fi
- while read name size unit; do
+ while read -r name size unit; do
if [ "$name" = "HugePages_Free:" ]; then
freepgs=$size
fi
done < /proc/meminfo
tries=$((tries - 1))
done
- if [ $freepgs -lt $needpgs ]; then
+ if [ "$freepgs" -lt "$needpgs" ]; then
printf "Not enough huge pages available (%d < %d)\n" \
- $freepgs $needpgs
+ "$freepgs" "$needpgs"
exit 1
fi
else
@@ -60,430 +59,142 @@ fi
#filter 64bit architectures
ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64"
-if [ -z $ARCH ]; then
- ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'`
+if [ -z "$ARCH" ]; then
+ ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/')
fi
VADDR64=0
-echo "$ARCH64STR" | grep $ARCH && VADDR64=1
-
-mkdir $mnt
-mount -t hugetlbfs none $mnt
-
-echo "---------------------"
-echo "running hugepage-mmap"
-echo "---------------------"
-./hugepage-mmap
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1
+
+# Usage: run_test [test binary] [arbitrary test arguments...]
+run_test() {
+ local title="running $*"
+ local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
+ printf "%s\n%s\n%s\n" "$sep" "$title" "$sep"
+
+ "$@"
+ local ret=$?
+ if [ $ret -eq 0 ]; then
+ echo "[PASS]"
+ elif [ $ret -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+ else
+ echo "[FAIL]"
+ exitcode=1
+ fi
+}
+
+mkdir "$mnt"
+mount -t hugetlbfs none "$mnt"
+
+run_test ./hugepage-mmap
-shmmax=`cat /proc/sys/kernel/shmmax`
-shmall=`cat /proc/sys/kernel/shmall`
+shmmax=$(cat /proc/sys/kernel/shmmax)
+shmall=$(cat /proc/sys/kernel/shmall)
echo 268435456 > /proc/sys/kernel/shmmax
echo 4194304 > /proc/sys/kernel/shmall
-echo "--------------------"
-echo "running hugepage-shm"
-echo "--------------------"
-./hugepage-shm
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-echo $shmmax > /proc/sys/kernel/shmmax
-echo $shmall > /proc/sys/kernel/shmall
-
-echo "-------------------"
-echo "running map_hugetlb"
-echo "-------------------"
-./map_hugetlb
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./hugepage-shm
+echo "$shmmax" > /proc/sys/kernel/shmmax
+echo "$shmall" > /proc/sys/kernel/shmall
-echo "-----------------------"
-echo "running hugepage-mremap"
-echo "-----------------------"
-./hugepage-mremap
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./map_hugetlb
+
+run_test ./hugepage-mremap "$mnt"/huge_mremap
+rm -f "$mnt"/huge_mremap
+
+run_test ./hugepage-vmemmap
+
+run_test ./hugetlb-madvise "$mnt"/madvise-test
+rm -f "$mnt"/madvise-test
echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
echo " hugetlb regression testing."
-echo "---------------------------"
-echo "running map_fixed_noreplace"
-echo "---------------------------"
-./map_fixed_noreplace
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./map_fixed_noreplace
+
+# get_user_pages_fast() benchmark
+run_test ./gup_test -u
+# pin_user_pages_fast() benchmark
+run_test ./gup_test -a
+# Dump pages 0, 19, and 4096, using pin_user_pages:
+run_test ./gup_test -ct -F 0x1 0 19 0x1000
+
+uffd_mods=("" ":dev")
+for mod in "${uffd_mods[@]}"; do
+ run_test ./userfaultfd anon${mod} 20 16
+ # Hugetlb tests require source and destination huge pages. Pass in half
+ # the size ($half_ufd_size_MB), which is used for *each*.
+ run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32
+ run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 "$mnt"/uffd-test
+ rm -f "$mnt"/uffd-test
+ run_test ./userfaultfd shmem${mod} 20 16
+done
-echo "------------------------------------------------------"
-echo "running: gup_test -u # get_user_pages_fast() benchmark"
-echo "------------------------------------------------------"
-./gup_test -u
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+#cleanup
+umount "$mnt"
+rm -rf "$mnt"
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
-echo "------------------------------------------------------"
-echo "running: gup_test -a # pin_user_pages_fast() benchmark"
-echo "------------------------------------------------------"
-./gup_test -a
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./compaction_test
-echo "------------------------------------------------------------"
-echo "# Dump pages 0, 19, and 4096, using pin_user_pages:"
-echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test"
-echo "------------------------------------------------------------"
-./gup_test -ct -F 0x1 0 19 0x1000
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test sudo -u nobody ./on-fault-limit
-echo "-------------------"
-echo "running userfaultfd"
-echo "-------------------"
-./userfaultfd anon 20 16
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./map_populate
-echo "---------------------------"
-echo "running userfaultfd_hugetlb"
-echo "---------------------------"
-# Test requires source and destination huge pages. Size of source
-# (half_ufd_size_MB) is passed as argument to test.
-./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-rm -f $mnt/ufd_test_file
-
-echo "-------------------------"
-echo "running userfaultfd_shmem"
-echo "-------------------------"
-./userfaultfd shmem 20 16
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./mlock-random-test
-#cleanup
-umount $mnt
-rm -rf $mnt
-echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
-
-echo "-----------------------"
-echo "running compaction_test"
-echo "-----------------------"
-./compaction_test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./mlock2-tests
-echo "----------------------"
-echo "running on-fault-limit"
-echo "----------------------"
-sudo -u nobody ./on-fault-limit
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./mrelease_test
-echo "--------------------"
-echo "running map_populate"
-echo "--------------------"
-./map_populate
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./mremap_test
-echo "-------------------------"
-echo "running mlock-random-test"
-echo "-------------------------"
-./mlock-random-test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "--------------------"
-echo "running mlock2-tests"
-echo "--------------------"
-./mlock2-tests
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "-------------------"
-echo "running mremap_test"
-echo "-------------------"
-./mremap_test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "-----------------"
-echo "running thuge-gen"
-echo "-----------------"
-./thuge-gen
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+run_test ./thuge-gen
if [ $VADDR64 -ne 0 ]; then
-echo "-----------------------------"
-echo "running virtual_address_range"
-echo "-----------------------------"
-./virtual_address_range
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+ run_test ./virtual_address_range
-echo "-----------------------------"
-echo "running virtual address 128TB switch test"
-echo "-----------------------------"
-./va_128TBswitch
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
+ # virtual address 128TB switch test
+ run_test ./va_128TBswitch.sh
fi # VADDR64
-echo "------------------------------------"
-echo "running vmalloc stability smoke test"
-echo "------------------------------------"
-./test_vmalloc.sh smoke
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-echo "------------------------------------"
-echo "running MREMAP_DONTUNMAP smoke test"
-echo "------------------------------------"
-./mremap_dontunmap
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-echo "running HMM smoke test"
-echo "------------------------------------"
-./test_hmm.sh smoke
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+# vmalloc stability smoke test
+run_test ./test_vmalloc.sh smoke
-echo "--------------------------------------------------------"
-echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests"
-echo "--------------------------------------------------------"
-./madv_populate
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+run_test ./mremap_dontunmap
-echo "running memfd_secret test"
-echo "------------------------------------"
-./memfd_secret
-ret_val=$?
+run_test ./test_hmm.sh smoke
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+run_test ./madv_populate
-echo "-------------------------------------------------------"
-echo "running KSM MADV_MERGEABLE test with 10 identical pages"
-echo "-------------------------------------------------------"
-./ksm_tests -M -p 10
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+run_test ./memfd_secret
-echo "------------------------"
-echo "running KSM unmerge test"
-echo "------------------------"
-./ksm_tests -U
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
+# KSM MADV_MERGEABLE test with 10 identical pages
+run_test ./ksm_tests -M -p 10
+# KSM unmerge test
+run_test ./ksm_tests -U
+# KSM test with 10 zero pages and use_zero_pages = 0
+run_test ./ksm_tests -Z -p 10 -z 0
+# KSM test with 10 zero pages and use_zero_pages = 1
+run_test ./ksm_tests -Z -p 10 -z 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 1
+run_test ./ksm_tests -N -m 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 0
+run_test ./ksm_tests -N -m 0
-echo "----------------------------------------------------------"
-echo "running KSM test with 10 zero pages and use_zero_pages = 0"
-echo "----------------------------------------------------------"
-./ksm_tests -Z -p 10 -z 0
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
+# protection_keys tests
+if [ -x ./protection_keys_32 ]
+then
+ run_test ./protection_keys_32
fi
-echo "----------------------------------------------------------"
-echo "running KSM test with 10 zero pages and use_zero_pages = 1"
-echo "----------------------------------------------------------"
-./ksm_tests -Z -p 10 -z 1
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-echo "-------------------------------------------------------------"
-echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1"
-echo "-------------------------------------------------------------"
-./ksm_tests -N -m 1
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
+if [ -x ./protection_keys_64 ]
+then
+ run_test ./protection_keys_64
fi
-echo "-------------------------------------------------------------"
-echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0"
-echo "-------------------------------------------------------------"
-./ksm_tests -N -m 0
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-exit $exitcode
+run_test ./soft-dirty
exit $exitcode
diff --git a/tools/testing/selftests/vm/settings b/tools/testing/selftests/vm/settings
new file mode 100644
index 000000000000..9abfc60e9e6f
--- /dev/null
+++ b/tools/testing/selftests/vm/settings
@@ -0,0 +1 @@
+timeout=45
diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c
new file mode 100644
index 000000000000..21d8830c5f24
--- /dev/null
+++ b/tools/testing/selftests/vm/soft-dirty.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <sys/mman.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define PAGEMAP_FILE_PATH "/proc/self/pagemap"
+#define TEST_ITERATIONS 10000
+
+static void test_simple(int pagemap_fd, int pagesize)
+{
+ int i;
+ char *map;
+
+ map = aligned_alloc(pagesize, pagesize);
+ if (!map)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ clear_softdirty();
+
+ for (i = 0 ; i < TEST_ITERATIONS; i++) {
+ if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+ ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ // Write something to the page to get the dirty bit enabled on the page
+ map[0]++;
+
+ if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+ ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ }
+ free(map);
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+static void test_vma_reuse(int pagemap_fd, int pagesize)
+{
+ char *map, *map2;
+
+ map = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed");
+
+ // The kernel always marks new regions as soft dirty
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s dirty bit of allocated page\n", __func__);
+
+ clear_softdirty();
+ munmap(map, pagesize);
+
+ map2 = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed");
+
+ // Dirty bit is set for new regions even if they are reused
+ if (map == map2)
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s dirty bit of reused address page\n", __func__);
+ else
+ ksft_test_result_skip("Test %s dirty bit of reused address page\n", __func__);
+
+ munmap(map2, pagesize);
+}
+
+static void test_hugepage(int pagemap_fd, int pagesize)
+{
+ char *map;
+ int i, ret;
+ size_t hpage_len = read_pmd_pagesize();
+
+ map = memalign(hpage_len, hpage_len);
+ if (!map)
+ ksft_exit_fail_msg("memalign failed\n");
+
+ ret = madvise(map, hpage_len, MADV_HUGEPAGE);
+ if (ret)
+ ksft_exit_fail_msg("madvise failed %d\n", ret);
+
+ for (i = 0; i < hpage_len; i++)
+ map[i] = (char)i;
+
+ if (check_huge_anon(map, 1, hpage_len)) {
+ ksft_test_result_pass("Test %s huge page allocation\n", __func__);
+
+ clear_softdirty();
+ for (i = 0 ; i < TEST_ITERATIONS ; i++) {
+ if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+ ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ // Write something to the page to get the dirty bit enabled on the page
+ map[0]++;
+
+ if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+ ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+ clear_softdirty();
+ }
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s huge page dirty bit\n", __func__);
+ } else {
+ // hugepage allocation failed. skip these tests
+ ksft_test_result_skip("Test %s huge page allocation\n", __func__);
+ ksft_test_result_skip("Test %s huge page dirty bit\n", __func__);
+ }
+ free(map);
+}
+
+static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
+{
+ const char *type[] = {"file", "anon"};
+ const char *fname = "./soft-dirty-test-file";
+ int test_fd;
+ char *map;
+
+ if (anon) {
+ map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (!map)
+ ksft_exit_fail_msg("anon mmap failed\n");
+ } else {
+ test_fd = open(fname, O_RDWR | O_CREAT);
+ if (test_fd < 0) {
+ ksft_test_result_skip("Test %s open() file failed\n", __func__);
+ return;
+ }
+ unlink(fname);
+ ftruncate(test_fd, pagesize);
+ map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
+ MAP_SHARED, test_fd, 0);
+ if (!map)
+ ksft_exit_fail_msg("file mmap failed\n");
+ }
+
+ *map = 1;
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-%s dirty bit of new written page\n",
+ __func__, type[anon]);
+ clear_softdirty();
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after clear_refs\n",
+ __func__, type[anon]);
+ mprotect(map, pagesize, PROT_READ);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after marking RO\n",
+ __func__, type[anon]);
+ mprotect(map, pagesize, PROT_READ|PROT_WRITE);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after marking RW\n",
+ __func__, type[anon]);
+ *map = 2;
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-%s soft-dirty after rewritten\n",
+ __func__, type[anon]);
+
+ munmap(map, pagesize);
+
+ if (!anon)
+ close(test_fd);
+}
+
+static void test_mprotect_anon(int pagemap_fd, int pagesize)
+{
+ test_mprotect(pagemap_fd, pagesize, true);
+}
+
+static void test_mprotect_file(int pagemap_fd, int pagesize)
+{
+ test_mprotect(pagemap_fd, pagesize, false);
+}
+
+int main(int argc, char **argv)
+{
+ int pagemap_fd;
+ int pagesize;
+
+ ksft_print_header();
+ ksft_set_plan(15);
+
+ pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH);
+
+ pagesize = getpagesize();
+
+ test_simple(pagemap_fd, pagesize);
+ test_vma_reuse(pagemap_fd, pagesize);
+ test_hugepage(pagemap_fd, pagesize);
+ test_mprotect_anon(pagemap_fd, pagesize);
+ test_mprotect_file(pagemap_fd, pagesize);
+
+ close(pagemap_fd);
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index 52497b7b9f1d..76e1c36dd9e5 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -16,14 +16,13 @@
#include <sys/mount.h>
#include <malloc.h>
#include <stdbool.h>
+#include "vm_util.h"
uint64_t pagesize;
unsigned int pageshift;
uint64_t pmd_pagesize;
-#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
-#define SMAP_PATH "/proc/self/smaps"
#define INPUT_MAX 80
#define PID_FMT "%d,0x%lx,0x%lx"
@@ -51,30 +50,6 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
return 0;
}
-
-static uint64_t read_pmd_pagesize(void)
-{
- int fd;
- char buf[20];
- ssize_t num_read;
-
- fd = open(PMD_SIZE_PATH, O_RDONLY);
- if (fd == -1) {
- perror("Open hpage_pmd_size failed");
- exit(EXIT_FAILURE);
- }
- num_read = read(fd, buf, 19);
- if (num_read < 1) {
- close(fd);
- perror("Read hpage_pmd_size failed");
- exit(EXIT_FAILURE);
- }
- buf[num_read] = '\0';
- close(fd);
-
- return strtoul(buf, NULL, 10);
-}
-
static int write_file(const char *path, const char *buf, size_t buflen)
{
int fd;
@@ -113,63 +88,10 @@ static void write_debugfs(const char *fmt, ...)
}
}
-#define MAX_LINE_LENGTH 500
-
-static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
-{
- while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
- if (!strncmp(buf, pattern, strlen(pattern)))
- return true;
- }
- return false;
-}
-
-static uint64_t check_huge(void *addr)
-{
- uint64_t thp = 0;
- int ret;
- FILE *fp;
- char buffer[MAX_LINE_LENGTH];
- char addr_pattern[MAX_LINE_LENGTH];
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
- (unsigned long) addr);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
-
- fp = fopen(SMAP_PATH, "r");
- if (!fp) {
- printf("%s: Failed to open file %s\n", __func__, SMAP_PATH);
- exit(EXIT_FAILURE);
- }
- if (!check_for_pattern(fp, addr_pattern, buffer))
- goto err_out;
-
- /*
- * Fetch the AnonHugePages: in the same block and check the number of
- * hugepages.
- */
- if (!check_for_pattern(fp, "AnonHugePages:", buffer))
- goto err_out;
-
- if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) {
- printf("Reading smap error\n");
- exit(EXIT_FAILURE);
- }
-
-err_out:
- fclose(fp);
- return thp;
-}
-
void split_pmd_thp(void)
{
char *one_page;
size_t len = 4 * pmd_pagesize;
- uint64_t thp_size;
size_t i;
one_page = memalign(pmd_pagesize, len);
@@ -184,8 +106,7 @@ void split_pmd_thp(void)
for (i = 0; i < len; i++)
one_page[i] = (char)i;
- thp_size = check_huge(one_page);
- if (!thp_size) {
+ if (!check_huge_anon(one_page, 1, pmd_pagesize)) {
printf("No THP is allocated\n");
exit(EXIT_FAILURE);
}
@@ -201,9 +122,8 @@ void split_pmd_thp(void)
}
- thp_size = check_huge(one_page);
- if (thp_size) {
- printf("Still %ld kB AnonHugePages not split\n", thp_size);
+ if (check_huge_anon(one_page, 0, pmd_pagesize)) {
+ printf("Still AnonHugePages not split\n");
exit(EXIT_FAILURE);
}
@@ -249,8 +169,7 @@ void split_pte_mapped_thp(void)
for (i = 0; i < len; i++)
one_page[i] = (char)i;
- thp_size = check_huge(one_page);
- if (!thp_size) {
+ if (!check_huge_anon(one_page, 1, pmd_pagesize)) {
printf("No THP is allocated\n");
exit(EXIT_FAILURE);
}
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh
index 0647b525a625..46e19b5d648d 100755
--- a/tools/testing/selftests/vm/test_hmm.sh
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -40,25 +40,30 @@ check_test_requirements()
load_driver()
{
- modprobe $DRIVER > /dev/null 2>&1
- if [ $? == 0 ]; then
- major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
- mknod /dev/hmm_dmirror0 c $major 0
- mknod /dev/hmm_dmirror1 c $major 1
+ if [ $# -eq 0 ]; then
+ modprobe $DRIVER > /dev/null 2>&1
+ else
+ if [ $# -eq 2 ]; then
+ modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2
+ > /dev/null 2>&1
+ else
+ echo "Missing module parameters. Make sure pass"\
+ "spm_addr_dev0 and spm_addr_dev1"
+ usage
+ fi
fi
}
unload_driver()
{
modprobe -r $DRIVER > /dev/null 2>&1
- rm -f /dev/hmm_dmirror?
}
run_smoke()
{
echo "Running smoke test. Note, this test provides basic coverage."
- load_driver
+ load_driver $1 $2
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
unload_driver
}
@@ -75,6 +80,9 @@ usage()
echo "# Smoke testing"
echo "./${TEST_NAME}.sh smoke"
echo
+ echo "# Smoke testing with SPM enabled"
+ echo "./${TEST_NAME}.sh smoke <spm_addr_dev0> <spm_addr_dev1>"
+ echo
exit 0
}
@@ -84,7 +92,7 @@ function run_test()
usage
else
if [ "$1" = "smoke" ]; then
- run_smoke
+ run_smoke $2 $3
else
usage
fi
diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c
index 5e4c036f6ad3..e3f00adb1b82 100644
--- a/tools/testing/selftests/vm/transhuge-stress.c
+++ b/tools/testing/selftests/vm/transhuge-stress.c
@@ -15,67 +15,46 @@
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
+#include "util.h"
-#define PAGE_SHIFT 12
-#define HPAGE_SHIFT 21
-
-#define PAGE_SIZE (1 << PAGE_SHIFT)
-#define HPAGE_SIZE (1 << HPAGE_SHIFT)
-
-#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
-#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
-
-int pagemap_fd;
-
-int64_t allocate_transhuge(void *ptr)
-{
- uint64_t ent[2];
-
- /* drop pmd */
- if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANONYMOUS |
- MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
- errx(2, "mmap transhuge");
-
- if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
- err(2, "MADV_HUGEPAGE");
-
- /* allocate transparent huge page */
- *(volatile void **)ptr = ptr;
-
- if (pread(pagemap_fd, ent, sizeof(ent),
- (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
- err(2, "read pagemap");
-
- if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
- PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
- !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
- return PAGEMAP_PFN(ent[0]);
-
- return -1;
-}
+int backing_fd = -1;
+int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
+#define PROT_RW (PROT_READ | PROT_WRITE)
int main(int argc, char **argv)
{
size_t ram, len;
void *ptr, *p;
struct timespec a, b;
+ int i = 0;
+ char *name = NULL;
double s;
uint8_t *map;
size_t map_len;
+ int pagemap_fd;
ram = sysconf(_SC_PHYS_PAGES);
if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4)
ram = SIZE_MAX / 4;
else
ram *= sysconf(_SC_PAGESIZE);
+ len = ram;
+
+ while (++i < argc) {
+ if (!strcmp(argv[i], "-h"))
+ errx(1, "usage: %s [size in MiB]", argv[0]);
+ else if (!strcmp(argv[i], "-f"))
+ name = argv[++i];
+ else
+ len = atoll(argv[i]) << 20;
+ }
- if (argc == 1)
- len = ram;
- else if (!strcmp(argv[1], "-h"))
- errx(1, "usage: %s [size in MiB]", argv[0]);
- else
- len = atoll(argv[1]) << 20;
+ if (name) {
+ backing_fd = open(name, O_RDWR);
+ if (backing_fd == -1)
+ errx(2, "open %s", name);
+ mmap_flags = MAP_SHARED;
+ }
warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
" and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
@@ -86,8 +65,7 @@ int main(int argc, char **argv)
err(2, "open pagemap");
len -= len % HPAGE_SIZE;
- ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, len + HPAGE_SIZE, PROT_RW, mmap_flags, backing_fd, 0);
if (ptr == MAP_FAILED)
err(2, "initial mmap");
ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
@@ -109,7 +87,7 @@ int main(int argc, char **argv)
for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
int64_t pfn;
- pfn = allocate_transhuge(p);
+ pfn = allocate_transhuge(p, pagemap_fd);
if (pfn < 0) {
nr_failed++;
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 8a09057d2f22..297f250c1d95 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -46,6 +46,7 @@
#include <signal.h>
#include <poll.h>
#include <string.h>
+#include <linux/mman.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
@@ -60,10 +61,11 @@
#include <sys/random.h>
#include "../kselftest.h"
+#include "vm_util.h"
#ifdef __NR_userfaultfd
-static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size, hpage_size;
#define BOUNCE_RANDOM (1<<0)
#define BOUNCE_RACINGFAULTS (1<<1)
@@ -76,25 +78,32 @@ static int bounces;
#define TEST_SHMEM 3
static int test_type;
+#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)
+
+#define BASE_PMD_ADDR ((void *)(1UL << 30))
+
+/* test using /dev/userfaultfd, instead of userfaultfd(2) */
+static bool test_dev_userfaultfd;
+
/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
#define ALARM_INTERVAL_SECS 10
static volatile bool test_uffdio_copy_eexist = true;
static volatile bool test_uffdio_zeropage_eexist = true;
/* Whether to test uffd write-protection */
-static bool test_uffdio_wp = false;
+static bool test_uffdio_wp = true;
/* Whether to test uffd minor faults */
static bool test_uffdio_minor = false;
static bool map_shared;
static int shm_fd;
static int huge_fd;
-static char *huge_fd_off0;
static unsigned long long *count_verify;
static int uffd = -1;
static int uffd_flags, finished, *pipefd;
-static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
+static char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
static char *zeropage;
pthread_attr_t attr;
+static bool test_collapse;
/* Userfaultfd test statistics */
struct uffd_stats {
@@ -119,14 +128,21 @@ struct uffd_stats {
~(unsigned long)(sizeof(unsigned long long) \
- 1)))
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#define factor_of_2(x) ((x) ^ ((x) & ((x) - 1)))
+
const char *examples =
"# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
"./userfaultfd anon 100 99999\n\n"
+ "# Run the same anonymous memory test, but using /dev/userfaultfd:\n"
+ "./userfaultfd anon:dev 100 99999\n\n"
"# Run share memory test on 1GiB region with 99 bounces:\n"
"./userfaultfd shmem 1000 99\n\n"
- "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
- "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
- "# Run the same hugetlb test but using shmem:\n"
+ "# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
+ "./userfaultfd hugetlb 256 50\n\n"
+ "# Run the same hugetlb test but using shared file:\n"
"./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
"# 10MiB-~6GiB 999 bounces anonymous test, "
"continue forever unless an error triggers\n"
@@ -138,6 +154,16 @@ static void usage(void)
"[hugetlbfs_file]\n\n");
fprintf(stderr, "Supported <test type>: anon, hugetlb, "
"hugetlb_shared, shmem\n\n");
+ fprintf(stderr, "'Test mods' can be joined to the test type string with a ':'. "
+ "Supported mods:\n");
+ fprintf(stderr, "\tsyscall - Use userfaultfd(2) (default)\n");
+ fprintf(stderr, "\tdev - Use /dev/userfaultfd instead of userfaultfd(2)\n");
+ fprintf(stderr, "\tcollapse - Test MADV_COLLAPSE of UFFDIO_REGISTER_MODE_MINOR\n"
+ "memory\n");
+ fprintf(stderr, "\nExample test mod usage:\n");
+ fprintf(stderr, "# Run anonymous memory test with /dev/userfaultfd:\n");
+ fprintf(stderr, "./userfaultfd anon:dev 100 99999\n\n");
+
fprintf(stderr, "Examples:\n\n");
fprintf(stderr, "%s", examples);
exit(1);
@@ -151,12 +177,14 @@ static void usage(void)
ret, __LINE__); \
} while (0)
-#define err(fmt, ...) \
+#define errexit(exitcode, fmt, ...) \
do { \
_err(fmt, ##__VA_ARGS__); \
- exit(1); \
+ exit(exitcode); \
} while (0)
+#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__)
+
static void uffd_stats_reset(struct uffd_stats *uffd_stats,
unsigned long n_cpus)
{
@@ -209,12 +237,10 @@ static void anon_release_pages(char *rel_area)
err("madvise(MADV_DONTNEED) failed");
}
-static void anon_allocate_area(void **alloc_area)
+static void anon_allocate_area(void **alloc_area, bool is_src)
{
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (*alloc_area == MAP_FAILED)
- err("mmap of anonymous memory failed");
}
static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
@@ -223,36 +249,51 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
static void hugetlb_release_pages(char *rel_area)
{
- if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
- nr_pages * page_size))
- err("fallocate() failed");
+ if (!map_shared) {
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+ } else {
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
+ err("madvise(MADV_REMOVE) failed");
+ }
}
-static void hugetlb_allocate_area(void **alloc_area)
+static void hugetlb_allocate_area(void **alloc_area, bool is_src)
{
void *area_alias = NULL;
char **alloc_area_alias;
- *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- (map_shared ? MAP_SHARED : MAP_PRIVATE) |
- MAP_HUGETLB,
- huge_fd, *alloc_area == area_src ? 0 :
- nr_pages * page_size);
+ if (!map_shared)
+ *alloc_area = mmap(NULL,
+ nr_pages * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB |
+ (is_src ? 0 : MAP_NORESERVE),
+ -1,
+ 0);
+ else
+ *alloc_area = mmap(NULL,
+ nr_pages * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED |
+ (is_src ? 0 : MAP_NORESERVE),
+ huge_fd,
+ is_src ? 0 : nr_pages * page_size);
if (*alloc_area == MAP_FAILED)
err("mmap of hugetlbfs file failed");
if (map_shared) {
- area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_HUGETLB,
- huge_fd, *alloc_area == area_src ? 0 :
- nr_pages * page_size);
+ area_alias = mmap(NULL,
+ nr_pages * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ huge_fd,
+ is_src ? 0 : nr_pages * page_size);
if (area_alias == MAP_FAILED)
err("mmap of hugetlb file alias failed");
}
- if (*alloc_area == area_src) {
- huge_fd_off0 = *alloc_area;
+ if (is_src) {
alloc_area_alias = &area_src_alias;
} else {
alloc_area_alias = &area_dst_alias;
@@ -265,12 +306,7 @@ static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset
{
if (!map_shared)
return;
- /*
- * We can't zap just the pagetable with hugetlbfs because
- * MADV_DONTEED won't work. So exercise -EEXIST on a alias
- * mapping where the pagetables are not established initially,
- * this way we'll exercise the -EEXEC at the fs level.
- */
+
*start = (unsigned long) area_dst_alias + offset;
}
@@ -280,21 +316,36 @@ static void shmem_release_pages(char *rel_area)
err("madvise(MADV_REMOVE) failed");
}
-static void shmem_allocate_area(void **alloc_area)
+static void shmem_allocate_area(void **alloc_area, bool is_src)
{
void *area_alias = NULL;
- bool is_src = alloc_area == (void **)&area_src;
- unsigned long offset = is_src ? 0 : nr_pages * page_size;
+ size_t bytes = nr_pages * page_size;
+ unsigned long offset = is_src ? 0 : bytes;
+ char *p = NULL, *p_alias = NULL;
+
+ if (test_collapse) {
+ p = BASE_PMD_ADDR;
+ if (!is_src)
+ /* src map + alias + interleaved hpages */
+ p += 2 * (bytes + hpage_size);
+ p_alias = p;
+ p_alias += bytes;
+ p_alias += hpage_size; /* Prevent src/dst VMA merge */
+ }
- *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED, shm_fd, offset);
+ *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ shm_fd, offset);
if (*alloc_area == MAP_FAILED)
err("mmap of memfd failed");
+ if (test_collapse && *alloc_area != p)
+ err("mmap of memfd failed at %p", p);
- area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED, shm_fd, offset);
+ area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ shm_fd, offset);
if (area_alias == MAP_FAILED)
err("mmap of memfd alias failed");
+ if (test_collapse && area_alias != p_alias)
+ err("mmap of anonymous memory failed at %p", p_alias);
if (is_src)
area_src_alias = area_alias;
@@ -307,28 +358,39 @@ static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset)
*start = (unsigned long)area_dst_alias + offset;
}
+static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages)
+{
+ if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, hpage_size))
+ err("Did not find expected %d number of hugepages",
+ expect_nr_hpages);
+}
+
struct uffd_test_ops {
- void (*allocate_area)(void **alloc_area);
+ void (*allocate_area)(void **alloc_area, bool is_src);
void (*release_pages)(char *rel_area);
void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
+ void (*check_pmd_mapping)(void *p, int expect_nr_hpages);
};
static struct uffd_test_ops anon_uffd_test_ops = {
.allocate_area = anon_allocate_area,
.release_pages = anon_release_pages,
.alias_mapping = noop_alias_mapping,
+ .check_pmd_mapping = NULL,
};
static struct uffd_test_ops shmem_uffd_test_ops = {
.allocate_area = shmem_allocate_area,
.release_pages = shmem_release_pages,
.alias_mapping = shmem_alias_mapping,
+ .check_pmd_mapping = shmem_check_pmd_mapping,
};
static struct uffd_test_ops hugetlb_uffd_test_ops = {
.allocate_area = hugetlb_allocate_area,
.release_pages = hugetlb_release_pages,
.alias_mapping = hugetlb_alias_mapping,
+ .check_pmd_mapping = NULL,
};
static struct uffd_test_ops *uffd_test_ops;
@@ -370,13 +432,34 @@ static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls)
}
}
+static int __userfaultfd_open_dev(void)
+{
+ int fd, _uffd;
+
+ fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ errexit(KSFT_SKIP, "opening /dev/userfaultfd failed");
+
+ _uffd = ioctl(fd, USERFAULTFD_IOC_NEW, UFFD_FLAGS);
+ if (_uffd < 0)
+ errexit(errno == ENOTTY ? KSFT_SKIP : 1,
+ "creating userfaultfd failed");
+ close(fd);
+ return _uffd;
+}
+
static void userfaultfd_open(uint64_t *features)
{
struct uffdio_api uffdio_api;
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
- if (uffd < 0)
- err("userfaultfd syscall not available in this kernel");
+ if (test_dev_userfaultfd)
+ uffd = __userfaultfd_open_dev();
+ else {
+ uffd = syscall(__NR_userfaultfd, UFFD_FLAGS);
+ if (uffd < 0)
+ errexit(errno == ENOSYS ? KSFT_SKIP : 1,
+ "creating userfaultfd failed");
+ }
uffd_flags = fcntl(uffd, F_GETFD, NULL);
uffdio_api.api = UFFD_API;
@@ -423,11 +506,11 @@ static void uffd_test_ctx_clear(void)
uffd = -1;
}
- huge_fd_off0 = NULL;
munmap_area((void **)&area_src);
munmap_area((void **)&area_src_alias);
munmap_area((void **)&area_dst);
munmap_area((void **)&area_dst_alias);
+ munmap_area((void **)&area_remap);
}
static void uffd_test_ctx_init(uint64_t features)
@@ -436,8 +519,8 @@ static void uffd_test_ctx_init(uint64_t features)
uffd_test_ctx_clear();
- uffd_test_ops->allocate_area((void **)&area_src);
- uffd_test_ops->allocate_area((void **)&area_dst);
+ uffd_test_ops->allocate_area((void **)&area_src, true);
+ uffd_test_ops->allocate_area((void **)&area_dst, false);
userfaultfd_open(&features);
@@ -538,7 +621,7 @@ static void continue_range(int ufd, __u64 start, __u64 len)
static void *locking_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
- unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
+ unsigned long page_nr;
unsigned long long count;
if (!(bounces & BOUNCE_RANDOM)) {
@@ -644,7 +727,7 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret != sizeof(*msg)) {
if (ret < 0) {
- if (errno == EAGAIN)
+ if (errno == EAGAIN || errno == EINTR)
return 1;
err("blocking read error");
} else {
@@ -691,7 +774,27 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
continue_range(uffd, msg->arg.pagefault.address, page_size);
stats->minor_faults++;
} else {
- /* Missing page faults */
+ /*
+ * Missing page faults.
+ *
+ * Here we force a write check for each of the missing mode
+ * faults. It's guaranteed because the only threads that
+ * will trigger uffd faults are the locking threads, and
+ * their first instruction to touch the missing page will
+ * always be pthread_mutex_lock().
+ *
+ * Note that here we relied on an NPTL glibc impl detail to
+ * always read the lock type at the entry of the lock op
+ * (pthread_mutex_t.__data.__type, offset 0x10) before
+ * doing any locking operations to guarantee that. It's
+ * actually not good to rely on this impl detail because
+ * logically a pthread-compatible lib can implement the
+ * locks without types and we can fail when linking with
+ * them. However since we used to find bugs with this
+ * strict check we still keep it around. Hopefully this
+ * could be a good hint when it fails again. If one day
+ * it'll break on some other impl of glibc we'll revisit.
+ */
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
err("unexpected write fault");
@@ -720,8 +823,11 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (ret <= 0)
+ if (ret <= 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
err("poll error: %d", ret);
+ }
if (pollfd[1].revents & POLLIN) {
if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
err("read pipefd error");
@@ -751,6 +857,7 @@ static void *uffd_poll_thread(void *arg)
err("remove failure");
break;
case UFFD_EVENT_REMAP:
+ area_remap = area_dst; /* save for later unmap */
area_dst = (char *)(unsigned long)msg.arg.remap.to;
break;
}
@@ -845,7 +952,7 @@ static int stress(struct uffd_stats *uffd_stats)
/*
* Be strict and immediately zap area_src, the whole area has
* been transferred already by the background treads. The
- * area_src could then be faulted in in a racy way by still
+ * area_src could then be faulted in a racy way by still
* running uffdio_threads reading zeropages after we zapped
* area_src (but they're guaranteed to get -EEXIST from
* UFFDIO_COPY without writing zero pages into area_dst
@@ -916,12 +1023,9 @@ static int faulting_process(int signal_test)
unsigned long split_nr_pages;
unsigned long lastnr;
struct sigaction act;
- unsigned long signalled = 0;
+ volatile unsigned long signalled = 0;
- if (test_type != TEST_HUGETLB)
- split_nr_pages = (nr_pages + 1) / 2;
- else
- split_nr_pages = nr_pages;
+ split_nr_pages = (nr_pages + 1) / 2;
if (signal_test) {
sigbuf = &jbuf;
@@ -934,7 +1038,7 @@ static int faulting_process(int signal_test)
}
for (nr = 0; nr < split_nr_pages; nr++) {
- int steps = 1;
+ volatile int steps = 1;
unsigned long offset = nr * page_size;
if (signal_test) {
@@ -978,9 +1082,6 @@ static int faulting_process(int signal_test)
if (signal_test)
return signalled != split_nr_pages;
- if (test_type == TEST_HUGETLB)
- return 0;
-
area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
if (area_dst == MAP_FAILED)
@@ -1209,13 +1310,30 @@ static int userfaultfd_sig_test(void)
return userfaults != 0;
}
+void check_memory_contents(char *p)
+{
+ unsigned long i;
+ uint8_t expected_byte;
+ void *expected_page;
+
+ if (posix_memalign(&expected_page, page_size, page_size))
+ err("out of memory");
+
+ for (i = 0; i < nr_pages; ++i) {
+ expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
+ memset(expected_page, expected_byte, page_size);
+ if (my_bcmp(expected_page, p + (i * page_size), page_size))
+ err("unexpected page contents after minor fault");
+ }
+
+ free(expected_page);
+}
+
static int userfaultfd_minor_test(void)
{
- struct uffdio_register uffdio_register;
unsigned long p;
+ struct uffdio_register uffdio_register;
pthread_t uffd_mon;
- uint8_t expected_byte;
- void *expected_page;
char c;
struct uffd_stats stats = { 0 };
@@ -1254,17 +1372,7 @@ static int userfaultfd_minor_test(void)
* fault. uffd_poll_thread will resolve the fault by bit-flipping the
* page's contents, and then issuing a CONTINUE ioctl.
*/
-
- if (posix_memalign(&expected_page, page_size, page_size))
- err("out of memory");
-
- for (p = 0; p < nr_pages; ++p) {
- expected_byte = ~((uint8_t)(p % ((uint8_t)-1)));
- memset(expected_page, expected_byte, page_size);
- if (my_bcmp(expected_page, area_dst_alias + (p * page_size),
- page_size))
- err("unexpected page contents after minor fault");
- }
+ check_memory_contents(area_dst_alias);
if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
err("pipe write");
@@ -1273,6 +1381,23 @@ static int userfaultfd_minor_test(void)
uffd_stats_report(&stats, 1);
+ if (test_collapse) {
+ printf("testing collapse of uffd memory into PMD-mapped THPs:");
+ if (madvise(area_dst_alias, nr_pages * page_size,
+ MADV_COLLAPSE))
+ err("madvise(MADV_COLLAPSE)");
+
+ uffd_test_ops->check_pmd_mapping(area_dst,
+ nr_pages * page_size /
+ hpage_size);
+ /*
+ * This won't cause uffd-fault - it purely just makes sure there
+ * was no corruption.
+ */
+ check_memory_contents(area_dst_alias);
+ printf(" done.\n");
+ }
+
return stats.missing_faults != 0 || stats.minor_faults != nr_pages;
}
@@ -1413,7 +1538,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
static int userfaultfd_stress(void)
{
void *area;
- char *tmp_area;
unsigned long nr;
struct uffdio_register uffdio_register;
struct uffd_stats uffd_stats[nr_cpus];
@@ -1524,13 +1648,9 @@ static int userfaultfd_stress(void)
count_verify[nr], nr);
/* prepare next bounce */
- tmp_area = area_src;
- area_src = area_dst;
- area_dst = tmp_area;
+ swap(area_src, area_dst);
- tmp_area = area_src_alias;
- area_src_alias = area_dst_alias;
- area_dst_alias = tmp_area;
+ swap(area_src_alias, area_dst_alias);
uffd_stats_report(uffd_stats, nr_cpus);
}
@@ -1580,13 +1700,9 @@ unsigned long default_huge_page_size(void)
static void set_test_type(const char *type)
{
- uint64_t features = UFFD_API_FEATURES;
-
if (!strcmp(type, "anon")) {
test_type = TEST_ANON;
uffd_test_ops = &anon_uffd_test_ops;
- /* Only enable write-protect test for anonymous test */
- test_uffdio_wp = true;
} else if (!strcmp(type, "hugetlb")) {
test_type = TEST_HUGETLB;
uffd_test_ops = &hugetlb_uffd_test_ops;
@@ -1601,12 +1717,37 @@ static void set_test_type(const char *type)
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
test_uffdio_minor = true;
- } else {
- err("Unknown test type: %s", type);
+ }
+}
+
+static void parse_test_type_arg(const char *raw_type)
+{
+ char *buf = strdup(raw_type);
+ uint64_t features = UFFD_API_FEATURES;
+
+ while (buf) {
+ const char *token = strsep(&buf, ":");
+
+ if (!test_type)
+ set_test_type(token);
+ else if (!strcmp(token, "dev"))
+ test_dev_userfaultfd = true;
+ else if (!strcmp(token, "syscall"))
+ test_dev_userfaultfd = false;
+ else if (!strcmp(token, "collapse"))
+ test_collapse = true;
+ else
+ err("unrecognized test mod '%s'", token);
}
+ if (!test_type)
+ err("failed to parse test type argument: '%s'", raw_type);
+
+ if (test_collapse && test_type != TEST_SHMEM)
+ err("Unsupported test: %s", raw_type);
+
if (test_type == TEST_HUGETLB)
- page_size = default_huge_page_size();
+ page_size = hpage_size;
else
page_size = sysconf(_SC_PAGE_SIZE);
@@ -1644,6 +1785,8 @@ static void sigalrm(int sig)
int main(int argc, char **argv)
{
+ size_t bytes;
+
if (argc < 4)
usage();
@@ -1651,11 +1794,41 @@ int main(int argc, char **argv)
err("failed to arm SIGALRM");
alarm(ALARM_INTERVAL_SECS);
- set_test_type(argv[1]);
+ hpage_size = default_huge_page_size();
+ parse_test_type_arg(argv[1]);
+ bytes = atol(argv[2]) * 1024 * 1024;
+
+ if (test_collapse && bytes & (hpage_size - 1))
+ err("MiB must be multiple of %lu if :collapse mod set",
+ hpage_size >> 20);
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
- nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
- nr_cpus;
+
+ if (test_collapse) {
+ /* nr_cpus must divide (bytes / page_size), otherwise,
+ * area allocations of (nr_pages * paze_size) won't be a
+ * multiple of hpage_size, even if bytes is a multiple of
+ * hpage_size.
+ *
+ * This means that nr_cpus must divide (N * (2 << (H-P))
+ * where:
+ * bytes = hpage_size * N
+ * hpage_size = 2 << H
+ * page_size = 2 << P
+ *
+ * And we want to chose nr_cpus to be the largest value
+ * satisfying this constraint, not larger than the number
+ * of online CPUs. Unfortunately, prime factorization of
+ * N and nr_cpus may be arbitrary, so have to search for it.
+ * Instead, just use the highest power of 2 dividing both
+ * nr_cpus and (bytes / page_size).
+ */
+ int x = factor_of_2(nr_cpus);
+ int y = factor_of_2(bytes / page_size);
+
+ nr_cpus = x < y ? x : y;
+ }
+ nr_pages_per_cpu = bytes / page_size / nr_cpus;
if (!nr_pages_per_cpu) {
_err("invalid MiB");
usage();
@@ -1668,7 +1841,7 @@ int main(int argc, char **argv)
}
nr_pages = nr_pages_per_cpu * nr_cpus;
- if (test_type == TEST_HUGETLB) {
+ if (test_type == TEST_HUGETLB && map_shared) {
if (argc < 5)
usage();
huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
diff --git a/tools/testing/selftests/vm/util.h b/tools/testing/selftests/vm/util.h
new file mode 100644
index 000000000000..b27d26199334
--- /dev/null
+++ b/tools/testing/selftests/vm/util.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __KSELFTEST_VM_UTIL_H
+#define __KSELFTEST_VM_UTIL_H
+
+#include <stdint.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <string.h> /* ffsl() */
+#include <unistd.h> /* _SC_PAGESIZE */
+
+static unsigned int __page_size;
+static unsigned int __page_shift;
+
+static inline unsigned int page_size(void)
+{
+ if (!__page_size)
+ __page_size = sysconf(_SC_PAGESIZE);
+ return __page_size;
+}
+
+static inline unsigned int page_shift(void)
+{
+ if (!__page_shift)
+ __page_shift = (ffsl(page_size()) - 1);
+ return __page_shift;
+}
+
+#define PAGE_SHIFT (page_shift())
+#define PAGE_SIZE (page_size())
+/*
+ * On ppc64 this will only work with radix 2M hugepage size
+ */
+#define HPAGE_SHIFT 21
+#define HPAGE_SIZE (1 << HPAGE_SHIFT)
+
+#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
+#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
+
+
+static inline int64_t allocate_transhuge(void *ptr, int pagemap_fd)
+{
+ uint64_t ent[2];
+
+ /* drop pmd */
+ if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANONYMOUS |
+ MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+ errx(2, "mmap transhuge");
+
+ if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
+ err(2, "MADV_HUGEPAGE");
+
+ /* allocate transparent huge page */
+ *(volatile void **)ptr = ptr;
+
+ if (pread(pagemap_fd, ent, sizeof(ent),
+ (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
+ err(2, "read pagemap");
+
+ if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
+ PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
+ !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
+ return PAGEMAP_PFN(ent[0]);
+
+ return -1;
+}
+
+#endif
diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c
index 83acdff26a13..1d2068989883 100644
--- a/tools/testing/selftests/vm/va_128TBswitch.c
+++ b/tools/testing/selftests/vm/va_128TBswitch.c
@@ -9,7 +9,7 @@
#include <sys/mman.h>
#include <string.h>
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#include "../kselftest.h"
#ifdef __powerpc64__
#define PAGE_SIZE (64 << 10)
@@ -231,7 +231,7 @@ static struct testcase hugetlb_testcases[] = {
static int run_test(struct testcase *test, int count)
{
void *p;
- int i, ret = 0;
+ int i, ret = KSFT_PASS;
for (i = 0; i < count; i++) {
struct testcase *t = test + i;
@@ -242,13 +242,13 @@ static int run_test(struct testcase *test, int count)
if (p == MAP_FAILED) {
printf("FAILED\n");
- ret = 1;
+ ret = KSFT_FAIL;
continue;
}
if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) {
printf("FAILED\n");
- ret = 1;
+ ret = KSFT_FAIL;
} else {
/*
* Do a dereference of the address returned so that we catch
@@ -280,7 +280,7 @@ int main(int argc, char **argv)
int ret;
if (!supported_arch())
- return 0;
+ return KSFT_SKIP;
ret = run_test(testcases, ARRAY_SIZE(testcases));
if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
diff --git a/tools/testing/selftests/vm/va_128TBswitch.sh b/tools/testing/selftests/vm/va_128TBswitch.sh
new file mode 100755
index 000000000000..41580751dc51
--- /dev/null
+++ b/tools/testing/selftests/vm/va_128TBswitch.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Adam Sindelar (Meta) <adam@wowsignal.io>
+#
+# This is a test for mmap behavior with 5-level paging. This script wraps the
+# real test to check that the kernel is configured to support at least 5
+# pagetable levels.
+
+# 1 means the test failed
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+fail()
+{
+ echo "$1"
+ exit $exitcode
+}
+
+check_supported_x86_64()
+{
+ local config="/proc/config.gz"
+ [[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
+ [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
+
+ # gzip -dcfq automatically handles both compressed and plaintext input.
+ # See man 1 gzip under '-f'.
+ local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+
+ if [[ "${pg_table_levels}" -lt 5 ]]; then
+ echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
+ exit $ksft_skip
+ fi
+}
+
+check_test_requirements()
+{
+ # The test supports x86_64 and powerpc64. We currently have no useful
+ # eligibility check for powerpc64, and the test itself will reject other
+ # architectures.
+ case `uname -m` in
+ "x86_64")
+ check_supported_x86_64
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+}
+
+check_test_requirements
+./va_128TBswitch
diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c
new file mode 100644
index 000000000000..f11f8adda521
--- /dev/null
+++ b/tools/testing/selftests/vm/vm_util.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <fcntl.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+#define SMAP_FILE_PATH "/proc/self/smaps"
+#define MAX_LINE_LENGTH 500
+
+uint64_t pagemap_get_entry(int fd, char *start)
+{
+ const unsigned long pfn = (unsigned long)start / getpagesize();
+ uint64_t entry;
+ int ret;
+
+ ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
+ if (ret != sizeof(entry))
+ ksft_exit_fail_msg("reading pagemap failed\n");
+ return entry;
+}
+
+bool pagemap_is_softdirty(int fd, char *start)
+{
+ uint64_t entry = pagemap_get_entry(fd, start);
+
+ // Check if dirty bit (55th bit) is set
+ return entry & 0x0080000000000000ull;
+}
+
+void clear_softdirty(void)
+{
+ int ret;
+ const char *ctrl = "4";
+ int fd = open("/proc/self/clear_refs", O_WRONLY);
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening clear_refs failed\n");
+ ret = write(fd, ctrl, strlen(ctrl));
+ close(fd);
+ if (ret != strlen(ctrl))
+ ksft_exit_fail_msg("writing clear_refs failed\n");
+}
+
+bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
+{
+ while (fgets(buf, len, fp)) {
+ if (!strncmp(buf, pattern, strlen(pattern)))
+ return true;
+ }
+ return false;
+}
+
+uint64_t read_pmd_pagesize(void)
+{
+ int fd;
+ char buf[20];
+ ssize_t num_read;
+
+ fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
+ if (fd == -1)
+ ksft_exit_fail_msg("Open hpage_pmd_size failed\n");
+
+ num_read = read(fd, buf, 19);
+ if (num_read < 1) {
+ close(fd);
+ ksft_exit_fail_msg("Read hpage_pmd_size failed\n");
+ }
+ buf[num_read] = '\0';
+ close(fd);
+
+ return strtoul(buf, NULL, 10);
+}
+
+bool __check_huge(void *addr, char *pattern, int nr_hpages,
+ uint64_t hpage_size)
+{
+ uint64_t thp = -1;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH)
+ ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
+
+ fp = fopen(SMAP_FILE_PATH, "r");
+ if (!fp)
+ ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
+
+ if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
+ goto err_out;
+
+ /*
+ * Fetch the pattern in the same block and check the number of
+ * hugepages.
+ */
+ if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
+ goto err_out;
+
+ snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);
+
+ if (sscanf(buffer, addr_pattern, &thp) != 1)
+ ksft_exit_fail_msg("Reading smap error\n");
+
+err_out:
+ fclose(fp);
+ return thp == (nr_hpages * (hpage_size >> 10));
+}
+
+bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
+}
+
+bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
+}
+
+bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
+}
diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h
new file mode 100644
index 000000000000..5c35de454e08
--- /dev/null
+++ b/tools/testing/selftests/vm/vm_util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdint.h>
+#include <stdbool.h>
+
+uint64_t pagemap_get_entry(int fd, char *start);
+bool pagemap_is_softdirty(int fd, char *start);
+void clear_softdirty(void);
+bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
+uint64_t read_pmd_pagesize(void);
+bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size);
+bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size);
+bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
index d3d0d108924d..70a02301f4c2 100644
--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
@@ -14,7 +14,7 @@ want_sleep=$8
reserve=$9
echo "Putting task in cgroup '$cgroup'"
-echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
+echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs
echo "Method is $method"