diff options
Diffstat (limited to '')
33 files changed, 410 insertions, 7108 deletions
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 18d33684faad..d680c00d2853 100644..100755 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -1,13 +1,18 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + set -e if [[ $(id -u) -ne 0 ]]; then echo "This test must be run as root. Skipping..." - exit 0 + exit $ksft_skip fi +nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) + fault_limit_file=limit_in_bytes reservation_limit_file=rsvd.limit_in_bytes fault_usage_file=usage_in_bytes @@ -21,19 +26,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then reservation_usage_file=rsvd.current fi -cgroup_path=/dev/cgroup/memory -if [[ ! -e $cgroup_path ]]; then - mkdir -p $cgroup_path - if [[ $cgroup2 ]]; then +if [[ $cgroup2 ]]; then + cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') + if [[ -z "$cgroup_path" ]]; then + cgroup_path=/dev/cgroup/memory mount -t cgroup2 none $cgroup_path - else + do_umount=1 + fi + echo "+hugetlb" >$cgroup_path/cgroup.subtree_control +else + cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') + if [[ -z "$cgroup_path" ]]; then + cgroup_path=/dev/cgroup/memory mount -t cgroup memory,hugetlb $cgroup_path + do_umount=1 fi fi - -if [[ $cgroup2 ]]; then - echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control -fi +export cgroup_path function cleanup() { if [[ $cgroup2 ]]; then @@ -105,7 +114,7 @@ function setup_cgroup() { function wait_for_hugetlb_memory_to_get_depleted() { local cgroup="$1" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" # Wait for hugetlbfs memory to get depleted. while [ $(cat $path) != 0 ]; do echo Waiting for hugetlb memory to get depleted. @@ -118,7 +127,7 @@ function wait_for_hugetlb_memory_to_get_reserved() { local cgroup="$1" local size="$2" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" # Wait for hugetlbfs memory to get written. while [ $(cat $path) != $size ]; do echo Waiting for hugetlb memory reservation to reach size $size. @@ -131,7 +140,7 @@ function wait_for_hugetlb_memory_to_get_written() { local cgroup="$1" local size="$2" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file" # Wait for hugetlbfs memory to get written. while [ $(cat $path) != $size ]; do echo Waiting for hugetlb memory to reach size $size. @@ -571,5 +580,9 @@ for populate in "" "-o"; do done # populate done # method -umount $cgroup_path -rmdir $cgroup_path +if [[ $do_umount ]]; then + umount $cgroup_path + rmdir $cgroup_path +fi + +echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c index 93f9e7b81331..267eea2e0e0b 100644 --- a/tools/testing/selftests/vm/hugepage-mmap.c +++ b/tools/testing/selftests/mm/hugepage-mmap.c @@ -16,14 +16,14 @@ * range. * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. */ - +#define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <sys/mman.h> #include <fcntl.h> +#include "../kselftest.h" -#define FILE_NAME "huge/hugepagefile" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -38,7 +38,7 @@ static void check_bytes(char *addr) { - printf("First hex is %x\n", *((unsigned int *)addr)); + ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); } static void write_bytes(char *addr) @@ -56,7 +56,7 @@ static int read_bytes(char *addr) check_bytes(addr); for (i = 0; i < LENGTH; i++) if (*(addr + i) != (char)i) { - printf("Mismatch at %lu\n", i); + ksft_print_msg("Error: Mismatch at %lu\n", i); return 1; } return 0; @@ -67,27 +67,28 @@ int main(void) void *addr; int fd, ret; - fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); - if (fd < 0) { - perror("Open failed"); - exit(1); - } + ksft_print_header(); + ksft_set_plan(1); + + fd = memfd_create("hugepage-mmap", MFD_HUGETLB); + if (fd < 0) + ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno)); addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); if (addr == MAP_FAILED) { - perror("mmap"); - unlink(FILE_NAME); - exit(1); + close(fd); + ksft_exit_fail_msg("mmap(): %s\n", strerror(errno)); } - printf("Returned address is %p\n", addr); + ksft_print_msg("Returned address is %p\n", addr); check_bytes(addr); write_bytes(addr); ret = read_bytes(addr); munmap(addr, LENGTH); close(fd); - unlink(FILE_NAME); - return ret; + ksft_test_result(!ret, "Read same data\n"); + + ksft_exit(!ret); } diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/mm/hugepage-shm.c index e2527f32005b..478bb1e989e9 100644 --- a/tools/testing/selftests/vm/hugepage-shm.c +++ b/tools/testing/selftests/mm/hugepage-shm.c @@ -35,10 +35,6 @@ #include <sys/shm.h> #include <sys/mman.h> -#ifndef SHM_HUGETLB -#define SHM_HUGETLB 04000 -#endif - #define LENGTH (256UL*1024*1024) #define dprintf(x) printf(x) diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index d11d1febccc3..11f9bbe7dc22 100644..100755 --- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -1,13 +1,17 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + set -e if [[ $(id -u) -ne 0 ]]; then echo "This test must be run as root. Skipping..." - exit 0 + exit $ksft_skip fi +nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) usage_file=usage_in_bytes if [[ "$1" == "-cgroup-v2" ]]; then @@ -15,19 +19,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then usage_file=current fi -CGROUP_ROOT='/dev/cgroup/memory' -MNT='/mnt/huge/' -if [[ ! -e $CGROUP_ROOT ]]; then - mkdir -p $CGROUP_ROOT - if [[ $cgroup2 ]]; then +if [[ $cgroup2 ]]; then + CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') + if [[ -z "$CGROUP_ROOT" ]]; then + CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup2 none $CGROUP_ROOT - sleep 1 - echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control - else + do_umount=1 + fi + echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control +else + CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') + if [[ -z "$CGROUP_ROOT" ]]; then + CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup memory,hugetlb $CGROUP_ROOT + do_umount=1 fi fi +MNT='/mnt/huge/' function get_machine_hugepage_size() { hpz=$(grep -i hugepagesize /proc/meminfo) @@ -240,5 +249,9 @@ cleanup echo ALL PASS -umount $CGROUP_ROOT -rm -rf $CGROUP_ROOT +if [[ $do_umount ]]; then + umount $CGROUP_ROOT + rm -rf $CGROUP_ROOT +fi + +echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 2a6e76c226bc..1e5731bab499 100644 --- a/tools/testing/selftests/vm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -3,23 +3,11 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> - -#ifndef MLOCK_ONFAULT -#define MLOCK_ONFAULT 1 -#endif - -#ifndef MCL_ONFAULT -#define MCL_ONFAULT (MCL_FUTURE << 1) -#endif +#include <asm-generic/unistd.h> static int mlock2_(void *start, size_t len, int flags) { -#ifdef __NR_mlock2 return syscall(__NR_mlock2, start, len, flags); -#else - errno = ENOSYS; - return -1; -#endif } static FILE *seek_to_smaps_entry(unsigned long addr) @@ -35,10 +23,8 @@ static FILE *seek_to_smaps_entry(unsigned long addr) char path[BUFSIZ]; file = fopen("/proc/self/smaps", "r"); - if (!file) { - perror("fopen smaps"); - _exit(1); - } + if (!file) + ksft_exit_fail_msg("fopen smaps: %s\n", strerror(errno)); while (getline(&line, &size, file) > 0) { if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n", diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c index 3a7b5ef0b0c6..1d75084b9ca5 100644 --- a/tools/testing/selftests/vm/mremap_dontunmap.c +++ b/tools/testing/selftests/mm/mremap_dontunmap.c @@ -7,6 +7,7 @@ */ #define _GNU_SOURCE #include <sys/mman.h> +#include <linux/mman.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> @@ -15,10 +16,6 @@ #include "../kselftest.h" -#ifndef MREMAP_DONTUNMAP -#define MREMAP_DONTUNMAP 4 -#endif - unsigned long page_size; char *page_buffer; @@ -30,14 +27,14 @@ static void dump_maps(void) system(cmd); } -#define BUG_ON(condition, description) \ - do { \ - if (condition) { \ - fprintf(stderr, "[FAIL]\t%s():%d\t%s:%s\n", __func__, \ - __LINE__, (description), strerror(errno)); \ - dump_maps(); \ - exit(1); \ - } \ +#define BUG_ON(condition, description) \ + do { \ + if (condition) { \ + dump_maps(); \ + ksft_exit_fail_msg("[FAIL]\t%s:%d\t%s:%s\n", \ + __func__, __LINE__, (description), \ + strerror(errno)); \ + } \ } while (0) // Try a simple operation for to "test" for kernel support this prevents @@ -125,6 +122,59 @@ static void mremap_dontunmap_simple() "unable to unmap destination mapping"); BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, "unable to unmap source mapping"); + ksft_test_result_pass("%s\n", __func__); +} + +// This test validates that MREMAP_DONTUNMAP on a shared mapping works as expected. +static void mremap_dontunmap_simple_shmem() +{ + unsigned long num_pages = 5; + + int mem_fd = memfd_create("memfd", MFD_CLOEXEC); + BUG_ON(mem_fd < 0, "memfd_create"); + + BUG_ON(ftruncate(mem_fd, num_pages * page_size) < 0, + "ftruncate"); + + void *source_mapping = + mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE, + MAP_FILE | MAP_SHARED, mem_fd, 0); + BUG_ON(source_mapping == MAP_FAILED, "mmap"); + + BUG_ON(close(mem_fd) < 0, "close"); + + memset(source_mapping, 'a', num_pages * page_size); + + // Try to just move the whole mapping anywhere (not fixed). + void *dest_mapping = + mremap(source_mapping, num_pages * page_size, num_pages * page_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL); + if (dest_mapping == MAP_FAILED && errno == EINVAL) { + // Old kernel which doesn't support MREMAP_DONTUNMAP on shmem. + BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, + "unable to unmap source mapping"); + return; + } + + BUG_ON(dest_mapping == MAP_FAILED, "mremap"); + + // Validate that the pages have been moved, we know they were moved if + // the dest_mapping contains a's. + BUG_ON(check_region_contains_byte + (dest_mapping, num_pages * page_size, 'a') != 0, + "pages did not migrate"); + + // Because the region is backed by shmem, we will actually see the same + // memory at the source location still. + BUG_ON(check_region_contains_byte + (source_mapping, num_pages * page_size, 'a') != 0, + "source should have no ptes"); + + BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1, + "unable to unmap destination mapping"); + BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, + "unable to unmap source mapping"); + ksft_test_result_pass("%s\n", __func__); } // This test validates MREMAP_DONTUNMAP will move page tables to a specific @@ -171,6 +221,7 @@ static void mremap_dontunmap_simple_fixed() "unable to unmap destination mapping"); BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, "unable to unmap source mapping"); + ksft_test_result_pass("%s\n", __func__); } // This test validates that we can MREMAP_DONTUNMAP for a portion of an @@ -221,6 +272,7 @@ static void mremap_dontunmap_partial_mapping() "unable to unmap destination mapping"); BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, "unable to unmap source mapping"); + ksft_test_result_pass("%s\n", __func__); } // This test validates that we can remap over only a portion of a mapping. @@ -280,19 +332,24 @@ static void mremap_dontunmap_partial_mapping_overwrite(void) "unable to unmap destination mapping"); BUG_ON(munmap(source_mapping, 5 * page_size) == -1, "unable to unmap source mapping"); + ksft_test_result_pass("%s\n", __func__); } int main(void) { + ksft_print_header(); + page_size = sysconf(_SC_PAGE_SIZE); // test for kernel support for MREMAP_DONTUNMAP skipping the test if // not. if (kernel_support_for_mremap_dontunmap() != 0) { - printf("No kernel support for MREMAP_DONTUNMAP\n"); - return KSFT_SKIP; + ksft_print_msg("No kernel support for MREMAP_DONTUNMAP\n"); + ksft_finished(); } + ksft_set_plan(5); + // Keep a page sized buffer around for when we need it. page_buffer = mmap(NULL, page_size, PROT_READ | PROT_WRITE, @@ -300,6 +357,7 @@ int main(void) BUG_ON(page_buffer == MAP_FAILED, "unable to mmap a page."); mremap_dontunmap_simple(); + mremap_dontunmap_simple_shmem(); mremap_dontunmap_simple_fixed(); mremap_dontunmap_partial_mapping(); mremap_dontunmap_partial_mapping_overwrite(); @@ -307,6 +365,5 @@ int main(void) BUG_ON(munmap(page_buffer, page_size) == -1, "unable to unmap page buffer"); - printf("OK\n"); - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 622a85848f61..1af3156a9db8 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -13,6 +13,8 @@ #include <ucontext.h> #include <sys/mman.h> +#include "../kselftest.h" + /* Define some kernel-like types */ #define u8 __u8 #define u16 __u16 @@ -32,7 +34,7 @@ extern int test_nr; extern int iteration_nr; #ifdef __GNUC__ -__attribute__((format(printf, 1, 2))) +__printf(1, 2) #endif static inline void sigsafe_printf(const char *format, ...) { @@ -175,7 +177,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); } -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) #define ALIGN_PTR_UP(p, ptr_align_to) \ diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 1ebb586b2fbc..ae5df26104e5 100644 --- a/tools/testing/selftests/vm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -3,9 +3,6 @@ #ifndef _PKEYS_POWERPC_H #define _PKEYS_POWERPC_H -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 386 -#endif #ifndef SYS_pkey_alloc # define SYS_pkey_alloc 384 # define SYS_pkey_free 385 diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 3be20f5d5275..814758e109c0 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -5,29 +5,11 @@ #ifdef __i386__ -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif - #define REG_IP_IDX REG_EIP #define si_pkey_offset 0x14 #else -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif - #define REG_IP_IDX REG_RIP #define si_pkey_offset 0x20 @@ -80,19 +62,6 @@ static inline void __write_pkey_reg(u64 pkey_reg) assert(pkey_reg == __read_pkey_reg()); } -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ #define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ @@ -104,9 +73,7 @@ static inline int cpu_has_pkeys(void) unsigned int ecx; unsigned int edx; - eax = 0x7; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(0x7, 0x0, eax, ebx, ecx, edx); if (!(ecx & X86_FEATURE_PKU)) { dprintf2("cpu does not have PKU\n"); @@ -119,6 +86,18 @@ static inline int cpu_has_pkeys(void) return 1; } +static inline int cpu_max_xsave_size(void) +{ + unsigned long XSTATE_CPUID = 0xd; + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx); + return ecx; +} + static inline u32 pkey_bit_position(int pkey) { return pkey * PKEY_BITS_PER_PKEY; @@ -126,6 +105,7 @@ static inline u32 pkey_bit_position(int pkey) #define XSTATE_PKEY_BIT (9) #define XSTATE_PKEY 0x200 +#define XSTATE_BV_OFFSET 512 int pkey_reg_xstate_offset(void) { @@ -134,16 +114,14 @@ int pkey_reg_xstate_offset(void) unsigned int ecx; unsigned int edx; int xstate_offset; - int xstate_size; + int xstate_size = 0; unsigned long XSTATE_CPUID = 0xd; int leaf; /* assume that XSTATE_PKEY is set in XCR0 */ leaf = XSTATE_PKEY_BIT; { - eax = XSTATE_CPUID; - ecx = leaf; - __cpuid(&eax, &ebx, &ecx, &edx); + __cpuid_count(XSTATE_CPUID, leaf, eax, ebx, ecx, edx); if (leaf == XSTATE_PKEY_BIT) { xstate_offset = ebx; diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index fdbb602ecf32..eaa6d1fc5328 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -18,12 +18,13 @@ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ #include <errno.h> +#include <linux/elf.h> #include <linux/futex.h> #include <time.h> #include <sys/time.h> @@ -41,7 +42,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include <fcntl.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <sys/ptrace.h> #include <setjmp.h> @@ -97,7 +98,7 @@ int tracing_root_ok(void) void tracing_on(void) { #if CONTROL_TRACING > 0 -#define TRACEDIR "/sys/kernel/debug/tracing" +#define TRACEDIR "/sys/kernel/tracing" char pidstr[32]; if (!tracing_root_ok()) @@ -123,7 +124,7 @@ void tracing_off(void) #if CONTROL_TRACING > 0 if (!tracing_root_ok()) return; - cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); + cat_into_file("0", "/sys/kernel/tracing/tracing_on"); #endif } @@ -293,15 +294,6 @@ void pkey_access_deny(int pkey) pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); } -/* Failed address bound checks: */ -#ifndef SEGV_BNDERR -# define SEGV_BNDERR 3 -#endif - -#ifndef SEGV_PKUERR -# define SEGV_PKUERR 4 -#endif - static char *si_code_str(int si_code) { if (si_code == SEGV_MAPERR) @@ -475,7 +467,7 @@ int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, ptr, size, orig_prot, pkey); errno = 0; - sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); + sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); if (errno) { dprintf2("SYS_mprotect_key sret: %d\n", sret); dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); @@ -510,7 +502,7 @@ int alloc_pkey(void) " shadow: 0x%016llx\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); - if (ret) { + if (ret > 0) { /* clear both the bits: */ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, ~PKEY_MASK); @@ -561,7 +553,6 @@ int alloc_random_pkey(void) int nr_alloced = 0; int random_index; memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); - srand((unsigned int)time(NULL)); /* allocate every possible key and make a note of which ones we got */ max_nr_pkey_allocs = NR_PKEYS; @@ -1278,6 +1269,78 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } +void arch_force_pkey_reg_init(void) +{ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + u64 *buf; + + /* + * All keys should be allocated and set to allow reads and + * writes, so the register should be all 0. If not, just + * skip the test. + */ + if (read_pkey_reg()) + return; + + /* + * Just allocate an absurd about of memory rather than + * doing the XSAVE size enumeration dance. + */ + buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + + /* These __builtins require compiling with -mxsave */ + + /* XSAVE to build a valid buffer: */ + __builtin_ia32_xsave(buf, XSTATE_PKEY); + /* Clear XSTATE_BV[PKRU]: */ + buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY; + /* XRSTOR will likely get PKRU back to the init state: */ + __builtin_ia32_xrstor(buf, XSTATE_PKEY); + + munmap(buf, 1*MB); +#endif +} + + +/* + * This is mostly useless on ppc for now. But it will not + * hurt anything and should give some better coverage as + * a long-running test that continually checks the pkey + * register. + */ +void test_pkey_init_state(int *ptr, u16 pkey) +{ + int err; + int allocated_pkeys[NR_PKEYS] = {0}; + int nr_allocated_pkeys = 0; + int i; + + for (i = 0; i < NR_PKEYS; i++) { + int new_pkey = alloc_pkey(); + + if (new_pkey < 0) + continue; + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + } + + dprintf3("%s()::%d\n", __func__, __LINE__); + + arch_force_pkey_reg_init(); + + /* + * Loop for a bit, hoping to get exercise the kernel + * context switch code. + */ + for (i = 0; i < 1000000; i++) + read_pkey_reg(); + + for (i = 0; i < nr_allocated_pkeys; i++) { + err = sys_pkey_free(allocated_pkeys[i]); + pkey_assert(!err); + read_pkey_reg(); /* for shadow checking */ + } +} + /* * pkey 0 is special. It is allocated by default, so you do not * have to call pkey_alloc() to use it first. Make sure that it @@ -1449,6 +1512,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); pkey_assert(!ret); + /* + * Reset the shadow, assuming that the above mprotect() + * correctly changed PKRU, but to an unknown value since + * the actual allocated pkey is unknown. + */ + shadow_pkey_reg = __read_pkey_reg(); + dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); /* Make sure this is an *instruction* fault */ @@ -1472,6 +1542,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); } +#if defined(__i386__) || defined(__x86_64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + u32 new_pkru; + pid_t child; + int status, ret; + int pkey_offset = pkey_reg_xstate_offset(); + size_t xsave_size = cpu_max_xsave_size(); + void *xsave; + u32 *pkey_register; + u64 *xstate_bv; + struct iovec iov; + + new_pkru = ~read_pkey_reg(); + /* Don't make PROT_EXEC mappings inaccessible */ + new_pkru &= ~3; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkru) + exit(1); + + /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ + raise(SIGSTOP); + + if (__read_pkey_reg() != 0) + exit(1); + + /* Stop and allow the tracer to examine PKRU */ + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + xsave = (void *)malloc(xsave_size); + pkey_assert(xsave > 0); + + /* Modify the PKRU register directly */ + iov.iov_base = xsave; + iov.iov_len = xsave_size; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + pkey_register = (u32 *)(xsave + pkey_offset); + pkey_assert(*pkey_register == read_pkey_reg()); + + *pkey_register = new_pkru; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Clear the PKRU bit from XSTATE_BV */ + xstate_bv = (u64 *)(xsave + 512); + *xstate_bv &= ~(1 << 9); + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value go to 0 */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); + free(xsave); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1482,7 +1675,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) return; } - sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); + sret = syscall(__NR_pkey_mprotect, ptr, size, PROT_READ, pkey); pkey_assert(sret < 0); } @@ -1502,10 +1695,14 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_implicit_mprotect_exec_only_memory, test_mprotect_with_pkey_0, test_ptrace_of_child, + test_pkey_init_state, test_pkey_syscalls_on_non_allocated_pkey, test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, +#if defined(__i386__) || defined(__x86_64__) + test_ptrace_modifies_pkru, +#endif }; void run_tests_once(void) @@ -1552,6 +1749,8 @@ int main(void) int nr_iterations = 22; int pkeys_supported = is_pkeys_supported(); + srand((unsigned int)time(NULL)); + setup_handlers(); printf("has pkeys: %d\n", pkeys_supported); diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/mm/test_hmm.sh index 0647b525a625..46e19b5d648d 100755 --- a/tools/testing/selftests/vm/test_hmm.sh +++ b/tools/testing/selftests/mm/test_hmm.sh @@ -40,25 +40,30 @@ check_test_requirements() load_driver() { - modprobe $DRIVER > /dev/null 2>&1 - if [ $? == 0 ]; then - major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) - mknod /dev/hmm_dmirror0 c $major 0 - mknod /dev/hmm_dmirror1 c $major 1 + if [ $# -eq 0 ]; then + modprobe $DRIVER > /dev/null 2>&1 + else + if [ $# -eq 2 ]; then + modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2 + > /dev/null 2>&1 + else + echo "Missing module parameters. Make sure pass"\ + "spm_addr_dev0 and spm_addr_dev1" + usage + fi fi } unload_driver() { modprobe -r $DRIVER > /dev/null 2>&1 - rm -f /dev/hmm_dmirror? } run_smoke() { echo "Running smoke test. Note, this test provides basic coverage." - load_driver + load_driver $1 $2 $(dirname "${BASH_SOURCE[0]}")/hmm-tests unload_driver } @@ -75,6 +80,9 @@ usage() echo "# Smoke testing" echo "./${TEST_NAME}.sh smoke" echo + echo "# Smoke testing with SPM enabled" + echo "./${TEST_NAME}.sh smoke <spm_addr_dev0> <spm_addr_dev1>" + echo exit 0 } @@ -84,7 +92,7 @@ function run_test() usage else if [ "$1" = "smoke" ]; then - run_smoke + run_smoke $2 $3 else usage fi diff --git a/tools/testing/selftests/vm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh index 06d2bb109f06..d73b846736f1 100755 --- a/tools/testing/selftests/vm/test_vmalloc.sh +++ b/tools/testing/selftests/mm/test_vmalloc.sh @@ -11,6 +11,7 @@ TEST_NAME="vmalloc" DRIVER="test_${TEST_NAME}" +NUM_CPUS=`grep -c ^processor /proc/cpuinfo` # 1 if fails exitcode=1 @@ -22,9 +23,9 @@ ksft_skip=4 # Static templates for performance, stressing and smoke tests. # Also it is possible to pass any supported parameters manualy. # -PERF_PARAM="single_cpu_test=1 sequential_test_order=1 test_repeat_count=3" -SMOKE_PARAM="single_cpu_test=1 test_loop_count=10000 test_repeat_count=10" -STRESS_PARAM="test_repeat_count=20" +PERF_PARAM="sequential_test_order=1 test_repeat_count=3" +SMOKE_PARAM="test_loop_count=10000 test_repeat_count=10" +STRESS_PARAM="nr_threads=$NUM_CPUS test_repeat_count=20" check_test_requirements() { @@ -58,8 +59,8 @@ run_perfformance_check() run_stability_check() { - echo "Run stability tests. In order to stress vmalloc subsystem we run" - echo "all available test cases on all available CPUs simultaneously." + echo "Run stability tests. In order to stress vmalloc subsystem all" + echo "available test cases are run by NUM_CPUS workers simultaneously." echo "It will take time, so be patient." modprobe $DRIVER $STRESS_PARAM > /dev/null 2>&1 @@ -92,17 +93,17 @@ usage() echo "# Shows help message" echo "./${DRIVER}.sh" echo - echo "# Runs 1 test(id_1), repeats it 5 times on all online CPUs" - echo "./${DRIVER}.sh run_test_mask=1 test_repeat_count=5" + echo "# Runs 1 test(id_1), repeats it 5 times by NUM_CPUS workers" + echo "./${DRIVER}.sh nr_threads=$NUM_CPUS run_test_mask=1 test_repeat_count=5" echo echo -n "# Runs 4 tests(id_1|id_2|id_4|id_16) on one CPU with " echo "sequential order" - echo -n "./${DRIVER}.sh single_cpu_test=1 sequential_test_order=1 " + echo -n "./${DRIVER}.sh sequential_test_order=1 " echo "run_test_mask=23" echo - echo -n "# Runs all tests on all online CPUs, shuffled order, repeats " + echo -n "# Runs all tests by NUM_CPUS workers, shuffled order, repeats " echo "20 times" - echo "./${DRIVER}.sh test_repeat_count=20" + echo "./${DRIVER}.sh nr_threads=$NUM_CPUS test_repeat_count=20" echo echo "# Performance analysis" echo "./${DRIVER}.sh performance" diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh index d3d0d108924d..3d2d2eb9d6ff 100644..100755 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e @@ -14,7 +14,7 @@ want_sleep=$8 reserve=$9 echo "Putting task in cgroup '$cgroup'" -echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs +echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs echo "Method is $method" diff --git a/tools/testing/selftests/vm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c index 6a2caba19ee1..6a2caba19ee1 100644 --- a/tools/testing/selftests/vm/write_to_hugetlbfs.c +++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore deleted file mode 100644 index 849e8226395a..000000000000 --- a/tools/testing/selftests/vm/.gitignore +++ /dev/null @@ -1,22 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -hugepage-mmap -hugepage-shm -khugepaged -map_hugetlb -map_populate -thuge-gen -compaction_test -mlock2-tests -mremap_dontunmap -on-fault-limit -transhuge-stress -protection_keys -userfaultfd -mlock-intersect-test -mlock-random-test -virtual_address_range -gup_benchmark -va_128TBswitch -map_fixed_noreplace -write_to_hugetlbfs -hmm-tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile deleted file mode 100644 index a9026706d597..000000000000 --- a/tools/testing/selftests/vm/Makefile +++ /dev/null @@ -1,115 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Makefile for vm selftests -uname_M := $(shell uname -m 2>/dev/null || echo not) -MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') - -CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) -LDLIBS = -lrt -TEST_GEN_FILES = compaction_test -TEST_GEN_FILES += gup_benchmark -TEST_GEN_FILES += hmm-tests -TEST_GEN_FILES += hugepage-mmap -TEST_GEN_FILES += hugepage-shm -TEST_GEN_FILES += map_hugetlb -TEST_GEN_FILES += map_fixed_noreplace -TEST_GEN_FILES += map_populate -TEST_GEN_FILES += mlock-random-test -TEST_GEN_FILES += mlock2-tests -TEST_GEN_FILES += mremap_dontunmap -TEST_GEN_FILES += on-fault-limit -TEST_GEN_FILES += thuge-gen -TEST_GEN_FILES += transhuge-stress -TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += khugepaged - -ifeq ($(ARCH),x86_64) -CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) -CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) - -TARGETS := protection_keys -BINARIES_32 := $(TARGETS:%=%_32) -BINARIES_64 := $(TARGETS:%=%_64) - -ifeq ($(CAN_BUILD_WITH_NOPIE),1) -CFLAGS += -no-pie -endif - -ifeq ($(CAN_BUILD_I386),1) -TEST_GEN_FILES += $(BINARIES_32) -endif - -ifeq ($(CAN_BUILD_X86_64),1) -TEST_GEN_FILES += $(BINARIES_64) -endif -else -TEST_GEN_FILES += protection_keys -endif - -ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) -TEST_GEN_FILES += va_128TBswitch -TEST_GEN_FILES += virtual_address_range -TEST_GEN_FILES += write_to_hugetlbfs -endif - -TEST_PROGS := run_vmtests - -TEST_FILES := test_vmalloc.sh - -KSFT_KHDR_INSTALL := 1 -include ../lib.mk - -$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread - -ifeq ($(ARCH),x86_64) -BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) -BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) - -define gen-target-rule-32 -$(1) $(1)_32: $(OUTPUT)/$(1)_32 -.PHONY: $(1) $(1)_32 -endef - -define gen-target-rule-64 -$(1) $(1)_64: $(OUTPUT)/$(1)_64 -.PHONY: $(1) $(1)_64 -endef - -ifeq ($(CAN_BUILD_I386),1) -$(BINARIES_32): CFLAGS += -m32 -$(BINARIES_32): LDLIBS += -lrt -ldl -lm -$(BINARIES_32): %_32: %.c - $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ -$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) -endif - -ifeq ($(CAN_BUILD_X86_64),1) -$(BINARIES_64): CFLAGS += -m64 -$(BINARIES_64): LDLIBS += -lrt -ldl -$(BINARIES_64): %_64: %.c - $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ -$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) -endif - -# x86_64 users should be encouraged to install 32-bit libraries -ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01) -all: warn_32bit_failure - -warn_32bit_failure: - @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \ - echo "environment. This will reduce test coverage of 64-bit" 2>&1; \ - echo "kernels. If you are using a Debian-like distribution," 2>&1; \ - echo "try:"; 2>&1; \ - echo ""; \ - echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \ - echo ""; \ - echo "If you are using a Fedora-like distribution, try:"; \ - echo ""; \ - echo " yum install glibc-devel.*i686"; \ - exit 0; -endif -endif - -$(OUTPUT)/userfaultfd: LDLIBS += -lpthread - -$(OUTPUT)/mlock-random-test: LDLIBS += -lcap diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c deleted file mode 100644 index bcec71250873..000000000000 --- a/tools/testing/selftests/vm/compaction_test.c +++ /dev/null @@ -1,230 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * - * A test for the patch "Allow compaction of unevictable pages". - * With this patch we should be able to allocate at least 1/4 - * of RAM in huge pages. Without the patch much less is - * allocated. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <sys/mman.h> -#include <sys/resource.h> -#include <fcntl.h> -#include <errno.h> -#include <unistd.h> -#include <string.h> - -#include "../kselftest.h" - -#define MAP_SIZE 1048576 - -struct map_list { - void *map; - struct map_list *next; -}; - -int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) -{ - char buffer[256] = {0}; - char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'"; - FILE *cmdfile = popen(cmd, "r"); - - if (!(fgets(buffer, sizeof(buffer), cmdfile))) { - perror("Failed to read meminfo\n"); - return -1; - } - - pclose(cmdfile); - - *memfree = atoll(buffer); - cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'"; - cmdfile = popen(cmd, "r"); - - if (!(fgets(buffer, sizeof(buffer), cmdfile))) { - perror("Failed to read meminfo\n"); - return -1; - } - - pclose(cmdfile); - *hugepagesize = atoll(buffer); - - return 0; -} - -int prereq(void) -{ - char allowed; - int fd; - - fd = open("/proc/sys/vm/compact_unevictable_allowed", - O_RDONLY | O_NONBLOCK); - if (fd < 0) { - perror("Failed to open\n" - "/proc/sys/vm/compact_unevictable_allowed\n"); - return -1; - } - - if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { - perror("Failed to read from\n" - "/proc/sys/vm/compact_unevictable_allowed\n"); - close(fd); - return -1; - } - - close(fd); - if (allowed == '1') - return 0; - - return -1; -} - -int check_compaction(unsigned long mem_free, unsigned int hugepage_size) -{ - int fd; - int compaction_index = 0; - char initial_nr_hugepages[10] = {0}; - char nr_hugepages[10] = {0}; - - /* We want to test with 80% of available memory. Else, OOM killer comes - in to play */ - mem_free = mem_free * 0.8; - - fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); - if (fd < 0) { - perror("Failed to open /proc/sys/vm/nr_hugepages"); - return -1; - } - - if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { - perror("Failed to read from /proc/sys/vm/nr_hugepages"); - goto close_fd; - } - - /* Start with the initial condition of 0 huge pages*/ - if (write(fd, "0", sizeof(char)) != sizeof(char)) { - perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n"); - goto close_fd; - } - - lseek(fd, 0, SEEK_SET); - - /* Request a large number of huge pages. The Kernel will allocate - as much as it can */ - if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n"); - goto close_fd; - } - - lseek(fd, 0, SEEK_SET); - - if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { - perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n"); - goto close_fd; - } - - /* We should have been able to request at least 1/3 rd of the memory in - huge pages */ - compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); - - if (compaction_index > 3) { - printf("No of huge pages allocated = %d\n", - (atoi(nr_hugepages))); - fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" - "as huge pages\n", compaction_index); - goto close_fd; - } - - printf("No of huge pages allocated = %d\n", - (atoi(nr_hugepages))); - - lseek(fd, 0, SEEK_SET); - - if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) - != strlen(initial_nr_hugepages)) { - perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); - goto close_fd; - } - - close(fd); - return 0; - - close_fd: - close(fd); - printf("Not OK. Compaction test failed."); - return -1; -} - - -int main(int argc, char **argv) -{ - struct rlimit lim; - struct map_list *list, *entry; - size_t page_size, i; - void *map = NULL; - unsigned long mem_free = 0; - unsigned long hugepage_size = 0; - unsigned long mem_fragmentable = 0; - - if (prereq() != 0) { - printf("Either the sysctl compact_unevictable_allowed is not\n" - "set to 1 or couldn't read the proc file.\n" - "Skipping the test\n"); - return KSFT_SKIP; - } - - lim.rlim_cur = RLIM_INFINITY; - lim.rlim_max = RLIM_INFINITY; - if (setrlimit(RLIMIT_MEMLOCK, &lim)) { - perror("Failed to set rlimit:\n"); - return -1; - } - - page_size = getpagesize(); - - list = NULL; - - if (read_memory_info(&mem_free, &hugepage_size) != 0) { - printf("ERROR: Cannot read meminfo\n"); - return -1; - } - - mem_fragmentable = mem_free * 0.8 / 1024; - - while (mem_fragmentable > 0) { - map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); - if (map == MAP_FAILED) - break; - - entry = malloc(sizeof(struct map_list)); - if (!entry) { - munmap(map, MAP_SIZE); - break; - } - entry->map = map; - entry->next = list; - list = entry; - - /* Write something (in this case the address of the map) to - * ensure that KSM can't merge the mapped pages - */ - for (i = 0; i < MAP_SIZE; i += page_size) - *(unsigned long *)(map + i) = (unsigned long)map + i; - - mem_fragmentable--; - } - - for (entry = list; entry != NULL; entry = entry->next) { - munmap(entry->map, MAP_SIZE); - if (!entry->next) - break; - entry = entry->next; - } - - if (check_compaction(mem_free, hugepage_size) == 0) - return 0; - - return -1; -} diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config deleted file mode 100644 index 3ba674b64fa9..000000000000 --- a/tools/testing/selftests/vm/config +++ /dev/null @@ -1,5 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_USERFAULTFD=y -CONFIG_TEST_VMALLOC=m -CONFIG_DEVICE_PRIVATE=y -CONFIG_TEST_HMM=m diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c deleted file mode 100644 index 43b4dfe161a2..000000000000 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ /dev/null @@ -1,137 +0,0 @@ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/prctl.h> -#include <sys/stat.h> -#include <sys/types.h> - -#include <linux/types.h> - -#define MB (1UL << 20) -#define PAGE_SIZE sysconf(_SC_PAGESIZE) - -#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) -#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) -#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) - -/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */ -#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark) -#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark) - -/* Just the flags we need, copied from mm.h: */ -#define FOLL_WRITE 0x01 /* check pte is writable */ - -struct gup_benchmark { - __u64 get_delta_usec; - __u64 put_delta_usec; - __u64 addr; - __u64 size; - __u32 nr_pages_per_call; - __u32 flags; - __u64 expansion[10]; /* For future use */ -}; - -int main(int argc, char **argv) -{ - struct gup_benchmark gup; - unsigned long size = 128 * MB; - int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; - int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE; - char *file = "/dev/zero"; - char *p; - - while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) { - switch (opt) { - case 'a': - cmd = PIN_FAST_BENCHMARK; - break; - case 'b': - cmd = PIN_BENCHMARK; - break; - case 'm': - size = atoi(optarg) * MB; - break; - case 'r': - repeats = atoi(optarg); - break; - case 'n': - nr_pages = atoi(optarg); - break; - case 't': - thp = 1; - break; - case 'T': - thp = 0; - break; - case 'L': - cmd = GUP_LONGTERM_BENCHMARK; - break; - case 'U': - cmd = GUP_BENCHMARK; - break; - case 'u': - cmd = GUP_FAST_BENCHMARK; - break; - case 'w': - write = 1; - break; - case 'f': - file = optarg; - break; - case 'S': - flags &= ~MAP_PRIVATE; - flags |= MAP_SHARED; - break; - case 'H': - flags |= (MAP_HUGETLB | MAP_ANONYMOUS); - break; - default: - return -1; - } - } - - filed = open(file, O_RDWR|O_CREAT); - if (filed < 0) { - perror("open"); - exit(filed); - } - - gup.nr_pages_per_call = nr_pages; - if (write) - gup.flags |= FOLL_WRITE; - - fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR); - if (fd == -1) - perror("open"), exit(1); - - p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); - if (p == MAP_FAILED) - perror("mmap"), exit(1); - gup.addr = (unsigned long)p; - - if (thp == 1) - madvise(p, size, MADV_HUGEPAGE); - else if (thp == 0) - madvise(p, size, MADV_NOHUGEPAGE); - - for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) - p[0] = 0; - - for (i = 0; i < repeats; i++) { - gup.size = size; - if (ioctl(fd, cmd, &gup)) - perror("ioctl"), exit(1); - - printf("Time: get:%lld put:%lld us", gup.get_delta_usec, - gup.put_delta_usec); - if (gup.size != size) - printf(", truncated (size: %lld)", gup.size); - printf("\n"); - } - - return 0; -} diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c deleted file mode 100644 index 93fc5cadce61..000000000000 --- a/tools/testing/selftests/vm/hmm-tests.c +++ /dev/null @@ -1,1480 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * HMM stands for Heterogeneous Memory Management, it is a helper layer inside - * the linux kernel to help device drivers mirror a process address space in - * the device. This allows the device to use the same address space which - * makes communication and data exchange a lot easier. - * - * This framework's sole purpose is to exercise various code paths inside - * the kernel to make sure that HMM performs as expected and to flush out any - * bugs. - */ - -#include "../kselftest_harness.h" - -#include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <unistd.h> -#include <strings.h> -#include <time.h> -#include <pthread.h> -#include <hugetlbfs.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <sys/ioctl.h> - -/* - * This is a private UAPI to the kernel test module so it isn't exported - * in the usual include/uapi/... directory. - */ -#include "../../../../lib/test_hmm_uapi.h" - -struct hmm_buffer { - void *ptr; - void *mirror; - unsigned long size; - int fd; - uint64_t cpages; - uint64_t faults; -}; - -#define TWOMEG (1 << 21) -#define HMM_BUFFER_SIZE (1024 << 12) -#define HMM_PATH_MAX 64 -#define NTIMES 256 - -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) - -FIXTURE(hmm) -{ - int fd; - unsigned int page_size; - unsigned int page_shift; -}; - -FIXTURE(hmm2) -{ - int fd0; - int fd1; - unsigned int page_size; - unsigned int page_shift; -}; - -static int hmm_open(int unit) -{ - char pathname[HMM_PATH_MAX]; - int fd; - - snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit); - fd = open(pathname, O_RDWR, 0); - if (fd < 0) - fprintf(stderr, "could not open hmm dmirror driver (%s)\n", - pathname); - return fd; -} - -FIXTURE_SETUP(hmm) -{ - self->page_size = sysconf(_SC_PAGE_SIZE); - self->page_shift = ffs(self->page_size) - 1; - - self->fd = hmm_open(0); - ASSERT_GE(self->fd, 0); -} - -FIXTURE_SETUP(hmm2) -{ - self->page_size = sysconf(_SC_PAGE_SIZE); - self->page_shift = ffs(self->page_size) - 1; - - self->fd0 = hmm_open(0); - ASSERT_GE(self->fd0, 0); - self->fd1 = hmm_open(1); - ASSERT_GE(self->fd1, 0); -} - -FIXTURE_TEARDOWN(hmm) -{ - int ret = close(self->fd); - - ASSERT_EQ(ret, 0); - self->fd = -1; -} - -FIXTURE_TEARDOWN(hmm2) -{ - int ret = close(self->fd0); - - ASSERT_EQ(ret, 0); - self->fd0 = -1; - - ret = close(self->fd1); - ASSERT_EQ(ret, 0); - self->fd1 = -1; -} - -static int hmm_dmirror_cmd(int fd, - unsigned long request, - struct hmm_buffer *buffer, - unsigned long npages) -{ - struct hmm_dmirror_cmd cmd; - int ret; - - /* Simulate a device reading system memory. */ - cmd.addr = (__u64)buffer->ptr; - cmd.ptr = (__u64)buffer->mirror; - cmd.npages = npages; - - for (;;) { - ret = ioctl(fd, request, &cmd); - if (ret == 0) - break; - if (errno == EINTR) - continue; - return -errno; - } - buffer->cpages = cmd.cpages; - buffer->faults = cmd.faults; - - return 0; -} - -static void hmm_buffer_free(struct hmm_buffer *buffer) -{ - if (buffer == NULL) - return; - - if (buffer->ptr) - munmap(buffer->ptr, buffer->size); - free(buffer->mirror); - free(buffer); -} - -/* - * Create a temporary file that will be deleted on close. - */ -static int hmm_create_file(unsigned long size) -{ - char path[HMM_PATH_MAX]; - int fd; - - strcpy(path, "/tmp"); - fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600); - if (fd >= 0) { - int r; - - do { - r = ftruncate(fd, size); - } while (r == -1 && errno == EINTR); - if (!r) - return fd; - close(fd); - } - return -1; -} - -/* - * Return a random unsigned number. - */ -static unsigned int hmm_random(void) -{ - static int fd = -1; - unsigned int r; - - if (fd < 0) { - fd = open("/dev/urandom", O_RDONLY); - if (fd < 0) { - fprintf(stderr, "%s:%d failed to open /dev/urandom\n", - __FILE__, __LINE__); - return ~0U; - } - } - read(fd, &r, sizeof(r)); - return r; -} - -static void hmm_nanosleep(unsigned int n) -{ - struct timespec t; - - t.tv_sec = 0; - t.tv_nsec = n; - nanosleep(&t, NULL); -} - -/* - * Simple NULL test of device open/close. - */ -TEST_F(hmm, open_close) -{ -} - -/* - * Read private anonymous memory. - */ -TEST_F(hmm, anon_read) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - int val; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* - * Initialize buffer in system memory but leave the first two pages - * zero (pte_none and pfn_zero). - */ - i = 2 * self->page_size / sizeof(*ptr); - for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Set buffer permission to read-only. */ - ret = mprotect(buffer->ptr, size, PROT_READ); - ASSERT_EQ(ret, 0); - - /* Populate the CPU page table with a special zero page. */ - val = *(int *)(buffer->ptr + self->page_size); - ASSERT_EQ(val, 0); - - /* Simulate a device reading system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device read. */ - ptr = buffer->mirror; - for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], 0); - for (; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - hmm_buffer_free(buffer); -} - -/* - * Read private anonymous memory which has been protected with - * mprotect() PROT_NONE. - */ -TEST_F(hmm, anon_read_prot) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer in system memory. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Initialize mirror buffer so we can verify it isn't written. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = -i; - - /* Protect buffer from reading. */ - ret = mprotect(buffer->ptr, size, PROT_NONE); - ASSERT_EQ(ret, 0); - - /* Simulate a device reading system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); - ASSERT_EQ(ret, -EFAULT); - - /* Allow CPU to read the buffer so we can check it. */ - ret = mprotect(buffer->ptr, size, PROT_READ); - ASSERT_EQ(ret, 0); - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], -i); - - hmm_buffer_free(buffer); -} - -/* - * Write private anonymous memory. - */ -TEST_F(hmm, anon_write) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize data that the device will write to buffer->ptr. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Simulate a device writing system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device wrote. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - hmm_buffer_free(buffer); -} - -/* - * Write private anonymous memory which has been protected with - * mprotect() PROT_READ. - */ -TEST_F(hmm, anon_write_prot) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Simulate a device reading a zero page of memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, 1); - ASSERT_EQ(buffer->faults, 1); - - /* Initialize data that the device will write to buffer->ptr. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Simulate a device writing system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); - ASSERT_EQ(ret, -EPERM); - - /* Check what the device wrote. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], 0); - - /* Now allow writing and see that the zero page is replaced. */ - ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ); - ASSERT_EQ(ret, 0); - - /* Simulate a device writing system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device wrote. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - hmm_buffer_free(buffer); -} - -/* - * Check that a device writing an anonymous private mapping - * will copy-on-write if a child process inherits the mapping. - */ -TEST_F(hmm, anon_write_child) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - pid_t pid; - int child_fd; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer->ptr so we can tell if it is written. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Initialize data that the device will write to buffer->ptr. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = -i; - - pid = fork(); - if (pid == -1) - ASSERT_EQ(pid, 0); - if (pid != 0) { - waitpid(pid, &ret, 0); - ASSERT_EQ(WIFEXITED(ret), 1); - - /* Check that the parent's buffer did not change. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - return; - } - - /* Check that we see the parent's values. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], -i); - - /* The child process needs its own mirror to its own mm. */ - child_fd = hmm_open(0); - ASSERT_GE(child_fd, 0); - - /* Simulate a device writing system memory. */ - ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device wrote. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], -i); - - close(child_fd); - exit(0); -} - -/* - * Check that a device writing an anonymous shared mapping - * will not copy-on-write if a child process inherits the mapping. - */ -TEST_F(hmm, anon_write_child_shared) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - pid_t pid; - int child_fd; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer->ptr so we can tell if it is written. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Initialize data that the device will write to buffer->ptr. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = -i; - - pid = fork(); - if (pid == -1) - ASSERT_EQ(pid, 0); - if (pid != 0) { - waitpid(pid, &ret, 0); - ASSERT_EQ(WIFEXITED(ret), 1); - - /* Check that the parent's buffer did change. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], -i); - return; - } - - /* Check that we see the parent's values. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], -i); - - /* The child process needs its own mirror to its own mm. */ - child_fd = hmm_open(0); - ASSERT_GE(child_fd, 0); - - /* Simulate a device writing system memory. */ - ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device wrote. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], -i); - - close(child_fd); - exit(0); -} - -/* - * Write private anonymous huge page. - */ -TEST_F(hmm, anon_write_huge) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - void *old_ptr; - void *map; - int *ptr; - int ret; - - size = 2 * TWOMEG; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - size = TWOMEG; - npages = size >> self->page_shift; - map = (void *)ALIGN((uintptr_t)buffer->ptr, size); - ret = madvise(map, size, MADV_HUGEPAGE); - ASSERT_EQ(ret, 0); - old_ptr = buffer->ptr; - buffer->ptr = map; - - /* Initialize data that the device will write to buffer->ptr. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Simulate a device writing system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device wrote. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - buffer->ptr = old_ptr; - hmm_buffer_free(buffer); -} - -/* - * Write huge TLBFS page. - */ -TEST_F(hmm, anon_write_hugetlbfs) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - long pagesizes[4]; - int n, idx; - - /* Skip test if we can't allocate a hugetlbfs page. */ - - n = gethugepagesizes(pagesizes, 4); - if (n <= 0) - return; - for (idx = 0; --n > 0; ) { - if (pagesizes[n] < pagesizes[idx]) - idx = n; - } - size = ALIGN(TWOMEG, pagesizes[idx]); - npages = size >> self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->ptr = get_hugepage_region(size, GHR_STRICT); - if (buffer->ptr == NULL) { - free(buffer); - return; - } - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - /* Initialize data that the device will write to buffer->ptr. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Simulate a device writing system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device wrote. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - free_hugepage_region(buffer->ptr); - buffer->ptr = NULL; - hmm_buffer_free(buffer); -} - -/* - * Read mmap'ed file memory. - */ -TEST_F(hmm, file_read) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - int fd; - ssize_t len; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - fd = hmm_create_file(size); - ASSERT_GE(fd, 0); - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = fd; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - /* Write initial contents of the file. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - len = pwrite(fd, buffer->mirror, size, 0); - ASSERT_EQ(len, size); - memset(buffer->mirror, 0, size); - - buffer->ptr = mmap(NULL, size, - PROT_READ, - MAP_SHARED, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Simulate a device reading system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - hmm_buffer_free(buffer); -} - -/* - * Write mmap'ed file memory. - */ -TEST_F(hmm, file_write) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - int fd; - ssize_t len; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - fd = hmm_create_file(size); - ASSERT_GE(fd, 0); - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = fd; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize data that the device will write to buffer->ptr. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Simulate a device writing system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device wrote. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - /* Check that the device also wrote the file. */ - len = pread(fd, buffer->mirror, size, 0); - ASSERT_EQ(len, size); - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - hmm_buffer_free(buffer); -} - -/* - * Migrate anonymous memory to device private memory. - */ -TEST_F(hmm, migrate) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer in system memory. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - hmm_buffer_free(buffer); -} - -/* - * Migrate anonymous memory to device private memory and fault some of it back - * to system memory, then try migrating the resulting mix of system and device - * private memory to the device. - */ -TEST_F(hmm, migrate_fault) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer in system memory. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - /* Fault half the pages back to system memory and check them. */ - for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i) - ASSERT_EQ(ptr[i], i); - - /* Migrate memory to the device again. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - hmm_buffer_free(buffer); -} - -/* - * Migrate anonymous shared memory to device private memory. - */ -TEST_F(hmm, migrate_shared) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); - ASSERT_EQ(ret, -ENOENT); - - hmm_buffer_free(buffer); -} - -/* - * Try to migrate various memory types to device private memory. - */ -TEST_F(hmm2, migrate_mixed) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - int *ptr; - unsigned char *p; - int ret; - int val; - - npages = 6; - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - /* Reserve a range of addresses. */ - buffer->ptr = mmap(NULL, size, - PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - p = buffer->ptr; - - /* Migrating a protected area should be an error. */ - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages); - ASSERT_EQ(ret, -EINVAL); - - /* Punch a hole after the first page address. */ - ret = munmap(buffer->ptr + self->page_size, self->page_size); - ASSERT_EQ(ret, 0); - - /* We expect an error if the vma doesn't cover the range. */ - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3); - ASSERT_EQ(ret, -EINVAL); - - /* Page 2 will be a read-only zero page. */ - ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, - PROT_READ); - ASSERT_EQ(ret, 0); - ptr = (int *)(buffer->ptr + 2 * self->page_size); - val = *ptr + 3; - ASSERT_EQ(val, 3); - - /* Page 3 will be read-only. */ - ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, - PROT_READ | PROT_WRITE); - ASSERT_EQ(ret, 0); - ptr = (int *)(buffer->ptr + 3 * self->page_size); - *ptr = val; - ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, - PROT_READ); - ASSERT_EQ(ret, 0); - - /* Page 4-5 will be read-write. */ - ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size, - PROT_READ | PROT_WRITE); - ASSERT_EQ(ret, 0); - ptr = (int *)(buffer->ptr + 4 * self->page_size); - *ptr = val; - ptr = (int *)(buffer->ptr + 5 * self->page_size); - *ptr = val; - - /* Now try to migrate pages 2-5 to device 1. */ - buffer->ptr = p + 2 * self->page_size; - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, 4); - - /* Page 5 won't be migrated to device 0 because it's on device 1. */ - buffer->ptr = p + 5 * self->page_size; - ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); - ASSERT_EQ(ret, -ENOENT); - buffer->ptr = p; - - buffer->ptr = p; - hmm_buffer_free(buffer); -} - -/* - * Migrate anonymous memory to device private memory and fault it back to system - * memory multiple times. - */ -TEST_F(hmm, migrate_multiple) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - unsigned long c; - int *ptr; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - for (c = 0; c < NTIMES; c++) { - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer in system memory. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, - npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - /* Fault pages back to system memory and check them. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - hmm_buffer_free(buffer); - } -} - -/* - * Read anonymous memory multiple times. - */ -TEST_F(hmm, anon_read_multiple) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - unsigned long c; - int *ptr; - int ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - for (c = 0; c < NTIMES; c++) { - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer in system memory. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i + c; - - /* Simulate a device reading system memory. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, - npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i + c); - - hmm_buffer_free(buffer); - } -} - -void *unmap_buffer(void *p) -{ - struct hmm_buffer *buffer = p; - - /* Delay for a bit and then unmap buffer while it is being read. */ - hmm_nanosleep(hmm_random() % 32000); - munmap(buffer->ptr + buffer->size / 2, buffer->size / 2); - buffer->ptr = NULL; - - return NULL; -} - -/* - * Try reading anonymous memory while it is being unmapped. - */ -TEST_F(hmm, anon_teardown) -{ - unsigned long npages; - unsigned long size; - unsigned long c; - void *ret; - - npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; - ASSERT_NE(npages, 0); - size = npages << self->page_shift; - - for (c = 0; c < NTIMES; ++c) { - pthread_t thread; - struct hmm_buffer *buffer; - unsigned long i; - int *ptr; - int rc; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(size); - ASSERT_NE(buffer->mirror, NULL); - - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer in system memory. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i + c; - - rc = pthread_create(&thread, NULL, unmap_buffer, buffer); - ASSERT_EQ(rc, 0); - - /* Simulate a device reading system memory. */ - rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, - npages); - if (rc == 0) { - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; - i < size / sizeof(*ptr); - ++i) - ASSERT_EQ(ptr[i], i + c); - } - - pthread_join(thread, &ret); - hmm_buffer_free(buffer); - } -} - -/* - * Test memory snapshot without faulting in pages accessed by the device. - */ -TEST_F(hmm2, snapshot) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - int *ptr; - unsigned char *p; - unsigned char *m; - int ret; - int val; - - npages = 7; - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(npages); - ASSERT_NE(buffer->mirror, NULL); - - /* Reserve a range of addresses. */ - buffer->ptr = mmap(NULL, size, - PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - p = buffer->ptr; - - /* Punch a hole after the first page address. */ - ret = munmap(buffer->ptr + self->page_size, self->page_size); - ASSERT_EQ(ret, 0); - - /* Page 2 will be read-only zero page. */ - ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, - PROT_READ); - ASSERT_EQ(ret, 0); - ptr = (int *)(buffer->ptr + 2 * self->page_size); - val = *ptr + 3; - ASSERT_EQ(val, 3); - - /* Page 3 will be read-only. */ - ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, - PROT_READ | PROT_WRITE); - ASSERT_EQ(ret, 0); - ptr = (int *)(buffer->ptr + 3 * self->page_size); - *ptr = val; - ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, - PROT_READ); - ASSERT_EQ(ret, 0); - - /* Page 4-6 will be read-write. */ - ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size, - PROT_READ | PROT_WRITE); - ASSERT_EQ(ret, 0); - ptr = (int *)(buffer->ptr + 4 * self->page_size); - *ptr = val; - - /* Page 5 will be migrated to device 0. */ - buffer->ptr = p + 5 * self->page_size; - ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, 1); - - /* Page 6 will be migrated to device 1. */ - buffer->ptr = p + 6 * self->page_size; - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, 1); - - /* Simulate a device snapshotting CPU pagetables. */ - buffer->ptr = p; - ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - - /* Check what the device saw. */ - m = buffer->mirror; - ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR); - ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR); - ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ); - ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ); - ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE); - ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | - HMM_DMIRROR_PROT_WRITE); - ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE); - - hmm_buffer_free(buffer); -} - -/* - * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that - * should be mapped by a large page table entry. - */ -TEST_F(hmm, compound) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - int *ptr; - unsigned char *m; - int ret; - long pagesizes[4]; - int n, idx; - unsigned long i; - - /* Skip test if we can't allocate a hugetlbfs page. */ - - n = gethugepagesizes(pagesizes, 4); - if (n <= 0) - return; - for (idx = 0; --n > 0; ) { - if (pagesizes[n] < pagesizes[idx]) - idx = n; - } - size = ALIGN(TWOMEG, pagesizes[idx]); - npages = size >> self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->ptr = get_hugepage_region(size, GHR_STRICT); - if (buffer->ptr == NULL) { - free(buffer); - return; - } - - buffer->size = size; - buffer->mirror = malloc(npages); - ASSERT_NE(buffer->mirror, NULL); - - /* Initialize the pages the device will snapshot in buffer->ptr. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Simulate a device snapshotting CPU pagetables. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - - /* Check what the device saw. */ - m = buffer->mirror; - for (i = 0; i < npages; ++i) - ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE | - HMM_DMIRROR_PROT_PMD); - - /* Make the region read-only. */ - ret = mprotect(buffer->ptr, size, PROT_READ); - ASSERT_EQ(ret, 0); - - /* Simulate a device snapshotting CPU pagetables. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - - /* Check what the device saw. */ - m = buffer->mirror; - for (i = 0; i < npages; ++i) - ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ | - HMM_DMIRROR_PROT_PMD); - - free_hugepage_region(buffer->ptr); - buffer->ptr = NULL; - hmm_buffer_free(buffer); -} - -/* - * Test two devices reading the same memory (double mapped). - */ -TEST_F(hmm2, double_map) -{ - struct hmm_buffer *buffer; - unsigned long npages; - unsigned long size; - unsigned long i; - int *ptr; - int ret; - - npages = 6; - size = npages << self->page_shift; - - buffer = malloc(sizeof(*buffer)); - ASSERT_NE(buffer, NULL); - - buffer->fd = -1; - buffer->size = size; - buffer->mirror = malloc(npages); - ASSERT_NE(buffer->mirror, NULL); - - /* Reserve a range of addresses. */ - buffer->ptr = mmap(NULL, size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - buffer->fd, 0); - ASSERT_NE(buffer->ptr, MAP_FAILED); - - /* Initialize buffer in system memory. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) - ptr[i] = i; - - /* Make region read-only. */ - ret = mprotect(buffer->ptr, size, PROT_READ); - ASSERT_EQ(ret, 0); - - /* Simulate device 0 reading system memory. */ - ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - /* Simulate device 1 reading system memory. */ - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages); - ASSERT_EQ(ret, 0); - ASSERT_EQ(buffer->cpages, npages); - ASSERT_EQ(buffer->faults, 1); - - /* Check what the device read. */ - for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) - ASSERT_EQ(ptr[i], i); - - /* Punch a hole after the first page address. */ - ret = munmap(buffer->ptr + self->page_size, self->page_size); - ASSERT_EQ(ret, 0); - - hmm_buffer_free(buffer); -} - -TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c deleted file mode 100644 index 8b75821302a7..000000000000 --- a/tools/testing/selftests/vm/khugepaged.c +++ /dev/null @@ -1,1035 +0,0 @@ -#define _GNU_SOURCE -#include <fcntl.h> -#include <limits.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <unistd.h> - -#include <sys/mman.h> -#include <sys/wait.h> - -#ifndef MADV_PAGEOUT -#define MADV_PAGEOUT 21 -#endif - -#define BASE_ADDR ((void *)(1UL << 30)) -static unsigned long hpage_pmd_size; -static unsigned long page_size; -static int hpage_pmd_nr; - -#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" -#define PID_SMAPS "/proc/self/smaps" - -enum thp_enabled { - THP_ALWAYS, - THP_MADVISE, - THP_NEVER, -}; - -static const char *thp_enabled_strings[] = { - "always", - "madvise", - "never", - NULL -}; - -enum thp_defrag { - THP_DEFRAG_ALWAYS, - THP_DEFRAG_DEFER, - THP_DEFRAG_DEFER_MADVISE, - THP_DEFRAG_MADVISE, - THP_DEFRAG_NEVER, -}; - -static const char *thp_defrag_strings[] = { - "always", - "defer", - "defer+madvise", - "madvise", - "never", - NULL -}; - -enum shmem_enabled { - SHMEM_ALWAYS, - SHMEM_WITHIN_SIZE, - SHMEM_ADVISE, - SHMEM_NEVER, - SHMEM_DENY, - SHMEM_FORCE, -}; - -static const char *shmem_enabled_strings[] = { - "always", - "within_size", - "advise", - "never", - "deny", - "force", - NULL -}; - -struct khugepaged_settings { - bool defrag; - unsigned int alloc_sleep_millisecs; - unsigned int scan_sleep_millisecs; - unsigned int max_ptes_none; - unsigned int max_ptes_swap; - unsigned int max_ptes_shared; - unsigned long pages_to_scan; -}; - -struct settings { - enum thp_enabled thp_enabled; - enum thp_defrag thp_defrag; - enum shmem_enabled shmem_enabled; - bool debug_cow; - bool use_zero_page; - struct khugepaged_settings khugepaged; -}; - -static struct settings default_settings = { - .thp_enabled = THP_MADVISE, - .thp_defrag = THP_DEFRAG_ALWAYS, - .shmem_enabled = SHMEM_NEVER, - .debug_cow = 0, - .use_zero_page = 0, - .khugepaged = { - .defrag = 1, - .alloc_sleep_millisecs = 10, - .scan_sleep_millisecs = 10, - }, -}; - -static struct settings saved_settings; -static bool skip_settings_restore; - -static int exit_status; - -static void success(const char *msg) -{ - printf(" \e[32m%s\e[0m\n", msg); -} - -static void fail(const char *msg) -{ - printf(" \e[31m%s\e[0m\n", msg); - exit_status++; -} - -static int read_file(const char *path, char *buf, size_t buflen) -{ - int fd; - ssize_t numread; - - fd = open(path, O_RDONLY); - if (fd == -1) - return 0; - - numread = read(fd, buf, buflen - 1); - if (numread < 1) { - close(fd); - return 0; - } - - buf[numread] = '\0'; - close(fd); - - return (unsigned int) numread; -} - -static int write_file(const char *path, const char *buf, size_t buflen) -{ - int fd; - ssize_t numwritten; - - fd = open(path, O_WRONLY); - if (fd == -1) - return 0; - - numwritten = write(fd, buf, buflen - 1); - close(fd); - if (numwritten < 1) - return 0; - - return (unsigned int) numwritten; -} - -static int read_string(const char *name, const char *strings[]) -{ - char path[PATH_MAX]; - char buf[256]; - char *c; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!read_file(path, buf, sizeof(buf))) { - perror(path); - exit(EXIT_FAILURE); - } - - c = strchr(buf, '['); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - - c++; - memmove(buf, c, sizeof(buf) - (c - buf)); - - c = strchr(buf, ']'); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - *c = '\0'; - - ret = 0; - while (strings[ret]) { - if (!strcmp(strings[ret], buf)) - return ret; - ret++; - } - - printf("Failed to parse %s\n", name); - exit(EXIT_FAILURE); -} - -static void write_string(const char *name, const char *val) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!write_file(path, val, strlen(val) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static const unsigned long read_num(const char *name) -{ - char path[PATH_MAX]; - char buf[21]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - ret = read_file(path, buf, sizeof(buf)); - if (ret < 0) { - perror("read_file(read_num)"); - exit(EXIT_FAILURE); - } - - return strtoul(buf, NULL, 10); -} - -static void write_num(const char *name, unsigned long num) -{ - char path[PATH_MAX]; - char buf[21]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - sprintf(buf, "%ld", num); - if (!write_file(path, buf, strlen(buf) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static void write_settings(struct settings *settings) -{ - struct khugepaged_settings *khugepaged = &settings->khugepaged; - - write_string("enabled", thp_enabled_strings[settings->thp_enabled]); - write_string("defrag", thp_defrag_strings[settings->thp_defrag]); - write_string("shmem_enabled", - shmem_enabled_strings[settings->shmem_enabled]); - write_num("debug_cow", settings->debug_cow); - write_num("use_zero_page", settings->use_zero_page); - - write_num("khugepaged/defrag", khugepaged->defrag); - write_num("khugepaged/alloc_sleep_millisecs", - khugepaged->alloc_sleep_millisecs); - write_num("khugepaged/scan_sleep_millisecs", - khugepaged->scan_sleep_millisecs); - write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); - write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); - write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); - write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); -} - -static void restore_settings(int sig) -{ - if (skip_settings_restore) - goto out; - - printf("Restore THP and khugepaged settings..."); - write_settings(&saved_settings); - success("OK"); - if (sig) - exit(EXIT_FAILURE); -out: - exit(exit_status); -} - -static void save_settings(void) -{ - printf("Save THP and khugepaged settings..."); - saved_settings = (struct settings) { - .thp_enabled = read_string("enabled", thp_enabled_strings), - .thp_defrag = read_string("defrag", thp_defrag_strings), - .shmem_enabled = - read_string("shmem_enabled", shmem_enabled_strings), - .debug_cow = read_num("debug_cow"), - .use_zero_page = read_num("use_zero_page"), - }; - saved_settings.khugepaged = (struct khugepaged_settings) { - .defrag = read_num("khugepaged/defrag"), - .alloc_sleep_millisecs = - read_num("khugepaged/alloc_sleep_millisecs"), - .scan_sleep_millisecs = - read_num("khugepaged/scan_sleep_millisecs"), - .max_ptes_none = read_num("khugepaged/max_ptes_none"), - .max_ptes_swap = read_num("khugepaged/max_ptes_swap"), - .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), - .pages_to_scan = read_num("khugepaged/pages_to_scan"), - }; - success("OK"); - - signal(SIGTERM, restore_settings); - signal(SIGINT, restore_settings); - signal(SIGHUP, restore_settings); - signal(SIGQUIT, restore_settings); -} - -static void adjust_settings(void) -{ - - printf("Adjust settings..."); - write_settings(&default_settings); - success("OK"); -} - -#define MAX_LINE_LENGTH 500 - -static bool check_for_pattern(FILE *fp, char *pattern, char *buf) -{ - while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { - if (!strncmp(buf, pattern, strlen(pattern))) - return true; - } - return false; -} - -static bool check_huge(void *addr) -{ - bool thp = false; - int ret; - FILE *fp; - char buffer[MAX_LINE_LENGTH]; - char addr_pattern[MAX_LINE_LENGTH]; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", - (unsigned long) addr); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - - - fp = fopen(PID_SMAPS, "r"); - if (!fp) { - printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); - exit(EXIT_FAILURE); - } - if (!check_for_pattern(fp, addr_pattern, buffer)) - goto err_out; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB", - hpage_pmd_size >> 10); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - /* - * Fetch the AnonHugePages: in the same block and check whether it got - * the expected number of hugeepages next. - */ - if (!check_for_pattern(fp, "AnonHugePages:", buffer)) - goto err_out; - - if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) - goto err_out; - - thp = true; -err_out: - fclose(fp); - return thp; -} - - -static bool check_swap(void *addr, unsigned long size) -{ - bool swap = false; - int ret; - FILE *fp; - char buffer[MAX_LINE_LENGTH]; - char addr_pattern[MAX_LINE_LENGTH]; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", - (unsigned long) addr); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - - - fp = fopen(PID_SMAPS, "r"); - if (!fp) { - printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); - exit(EXIT_FAILURE); - } - if (!check_for_pattern(fp, addr_pattern, buffer)) - goto err_out; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB", - size >> 10); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - /* - * Fetch the Swap: in the same block and check whether it got - * the expected number of hugeepages next. - */ - if (!check_for_pattern(fp, "Swap:", buffer)) - goto err_out; - - if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) - goto err_out; - - swap = true; -err_out: - fclose(fp); - return swap; -} - -static void *alloc_mapping(void) -{ - void *p; - - p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (p != BASE_ADDR) { - printf("Failed to allocate VMA at %p\n", BASE_ADDR); - exit(EXIT_FAILURE); - } - - return p; -} - -static void fill_memory(int *p, unsigned long start, unsigned long end) -{ - int i; - - for (i = start / page_size; i < end / page_size; i++) - p[i * page_size / sizeof(*p)] = i + 0xdead0000; -} - -static void validate_memory(int *p, unsigned long start, unsigned long end) -{ - int i; - - for (i = start / page_size; i < end / page_size; i++) { - if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) { - printf("Page %d is corrupted: %#x\n", - i, p[i * page_size / sizeof(*p)]); - exit(EXIT_FAILURE); - } - } -} - -#define TICK 500000 -static bool wait_for_scan(const char *msg, char *p) -{ - int full_scans; - int timeout = 6; /* 3 seconds */ - - /* Sanity check */ - if (check_huge(p)) { - printf("Unexpected huge page\n"); - exit(EXIT_FAILURE); - } - - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - - /* Wait until the second full_scan completed */ - full_scans = read_num("khugepaged/full_scans") + 2; - - printf("%s...", msg); - while (timeout--) { - if (check_huge(p)) - break; - if (read_num("khugepaged/full_scans") >= full_scans) - break; - printf("."); - usleep(TICK); - } - - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - - return timeout == -1; -} - -static void alloc_at_fault(void) -{ - struct settings settings = default_settings; - char *p; - - settings.thp_enabled = THP_ALWAYS; - write_settings(&settings); - - p = alloc_mapping(); - *p = 1; - printf("Allocate huge page on fault..."); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - write_settings(&default_settings); - - madvise(p, page_size, MADV_DONTNEED); - printf("Split huge PMD on MADV_DONTNEED..."); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - munmap(p, hpage_pmd_size); -} - -static void collapse_full(void) -{ - void *p; - - p = alloc_mapping(); - fill_memory(p, 0, hpage_pmd_size); - if (wait_for_scan("Collapse fully populated PTE table", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); -} - -static void collapse_empty(void) -{ - void *p; - - p = alloc_mapping(); - if (wait_for_scan("Do not collapse empty PTE table", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); - munmap(p, hpage_pmd_size); -} - -static void collapse_single_pte_entry(void) -{ - void *p; - - p = alloc_mapping(); - fill_memory(p, 0, page_size); - if (wait_for_scan("Collapse PTE table with single PTE entry present", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); -} - -static void collapse_max_ptes_none(void) -{ - int max_ptes_none = hpage_pmd_nr / 2; - struct settings settings = default_settings; - void *p; - - settings.khugepaged.max_ptes_none = max_ptes_none; - write_settings(&settings); - - p = alloc_mapping(); - - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); - if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); - - munmap(p, hpage_pmd_size); - write_settings(&default_settings); -} - -static void collapse_swapin_single_pte(void) -{ - void *p; - p = alloc_mapping(); - fill_memory(p, 0, hpage_pmd_size); - - printf("Swapout one page..."); - if (madvise(p, page_size, MADV_PAGEOUT)) { - perror("madvise(MADV_PAGEOUT)"); - exit(EXIT_FAILURE); - } - if (check_swap(p, page_size)) { - success("OK"); - } else { - fail("Fail"); - goto out; - } - - if (wait_for_scan("Collapse with swapping in single PTE entry", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); -out: - munmap(p, hpage_pmd_size); -} - -static void collapse_max_ptes_swap(void) -{ - int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); - void *p; - - p = alloc_mapping(); - - fill_memory(p, 0, hpage_pmd_size); - printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr); - if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) { - perror("madvise(MADV_PAGEOUT)"); - exit(EXIT_FAILURE); - } - if (check_swap(p, (max_ptes_swap + 1) * page_size)) { - success("OK"); - } else { - fail("Fail"); - goto out; - } - - if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); - validate_memory(p, 0, hpage_pmd_size); - - fill_memory(p, 0, hpage_pmd_size); - printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr); - if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { - perror("madvise(MADV_PAGEOUT)"); - exit(EXIT_FAILURE); - } - if (check_swap(p, max_ptes_swap * page_size)) { - success("OK"); - } else { - fail("Fail"); - goto out; - } - - if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); -out: - munmap(p, hpage_pmd_size); -} - -static void collapse_single_pte_entry_compound(void) -{ - void *p; - - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - - printf("Split huge page leaving single PTE mapping compound page..."); - madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); -} - -static void collapse_full_of_compound(void) -{ - void *p; - - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Split huge page leaving single PTE page table full of compound pages..."); - madvise(p, page_size, MADV_NOHUGEPAGE); - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - if (wait_for_scan("Collapse PTE table full of compound pages", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); -} - -static void collapse_compound_extreme(void) -{ - void *p; - int i; - - p = alloc_mapping(); - for (i = 0; i < hpage_pmd_nr; i++) { - printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...", - i + 1, hpage_pmd_nr); - - madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(BASE_ADDR, 0, hpage_pmd_size); - if (!check_huge(BASE_ADDR)) { - printf("Failed to allocate huge page\n"); - exit(EXIT_FAILURE); - } - madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE); - - p = mremap(BASE_ADDR - i * page_size, - i * page_size + hpage_pmd_size, - (i + 1) * page_size, - MREMAP_MAYMOVE | MREMAP_FIXED, - BASE_ADDR + 2 * hpage_pmd_size); - if (p == MAP_FAILED) { - perror("mremap+unmap"); - exit(EXIT_FAILURE); - } - - p = mremap(BASE_ADDR + 2 * hpage_pmd_size, - (i + 1) * page_size, - (i + 1) * page_size + hpage_pmd_size, - MREMAP_MAYMOVE | MREMAP_FIXED, - BASE_ADDR - (i + 1) * page_size); - if (p == MAP_FAILED) { - perror("mremap+alloc"); - exit(EXIT_FAILURE); - } - } - - munmap(BASE_ADDR, hpage_pmd_size); - fill_memory(p, 0, hpage_pmd_size); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - if (wait_for_scan("Collapse PTE table full of different compound pages", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); -} - -static void collapse_fork(void) -{ - int wstatus; - void *p; - - p = alloc_mapping(); - - printf("Allocate small page..."); - fill_memory(p, 0, page_size); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Share small page over fork()..."); - if (!fork()) { - /* Do not touch settings on child exit */ - skip_settings_restore = true; - exit_status = 0; - - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - fill_memory(p, page_size, 2 * page_size); - - if (wait_for_scan("Collapse PTE table with single page shared with parent process", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); - exit(exit_status); - } - - wait(&wstatus); - exit_status += WEXITSTATUS(wstatus); - - printf("Check if parent still has small page..."); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); -} - -static void collapse_fork_compound(void) -{ - int wstatus; - void *p; - - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Share huge page over fork()..."); - if (!fork()) { - /* Do not touch settings on child exit */ - skip_settings_restore = true; - exit_status = 0; - - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Split huge page PMD in child process..."); - madvise(p, page_size, MADV_NOHUGEPAGE); - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - fill_memory(p, 0, page_size); - - write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); - if (wait_for_scan("Collapse PTE table full of compound pages in child", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - write_num("khugepaged/max_ptes_shared", - default_settings.khugepaged.max_ptes_shared); - - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); - exit(exit_status); - } - - wait(&wstatus); - exit_status += WEXITSTATUS(wstatus); - - printf("Check if parent still has huge page..."); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); -} - -static void collapse_max_ptes_shared() -{ - int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); - int wstatus; - void *p; - - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Share huge page over fork()..."); - if (!fork()) { - /* Do not touch settings on child exit */ - skip_settings_restore = true; - exit_status = 0; - - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Trigger CoW on page %d of %d...", - hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p)) - fail("Timeout"); - else if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Trigger CoW on page %d of %d...", - hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - - if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); - exit(exit_status); - } - - wait(&wstatus); - exit_status += WEXITSTATUS(wstatus); - - printf("Check if parent still has huge page..."); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); -} - -int main(void) -{ - setbuf(stdout, NULL); - - page_size = getpagesize(); - hpage_pmd_size = read_num("hpage_pmd_size"); - hpage_pmd_nr = hpage_pmd_size / page_size; - - default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; - default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; - default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; - default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; - - save_settings(); - adjust_settings(); - - alloc_at_fault(); - collapse_full(); - collapse_empty(); - collapse_single_pte_entry(); - collapse_max_ptes_none(); - collapse_swapin_single_pte(); - collapse_max_ptes_swap(); - collapse_single_pte_entry_compound(); - collapse_full_of_compound(); - collapse_compound_extreme(); - collapse_fork(); - collapse_fork_compound(); - collapse_max_ptes_shared(); - - restore_settings(0); -} diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c deleted file mode 100644 index d91bde511268..000000000000 --- a/tools/testing/selftests/vm/map_fixed_noreplace.c +++ /dev/null @@ -1,206 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Test that MAP_FIXED_NOREPLACE works. - * - * Copyright 2018, Jann Horn <jannh@google.com> - * Copyright 2018, Michael Ellerman, IBM Corporation. - */ - -#include <sys/mman.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#ifndef MAP_FIXED_NOREPLACE -#define MAP_FIXED_NOREPLACE 0x100000 -#endif - -#define BASE_ADDRESS (256ul * 1024 * 1024) - - -static void dump_maps(void) -{ - char cmd[32]; - - snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid()); - system(cmd); -} - -int main(void) -{ - unsigned long flags, addr, size, page_size; - char *p; - - page_size = sysconf(_SC_PAGE_SIZE); - - flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; - - // Check we can map all the areas we need below - errno = 0; - addr = BASE_ADDRESS; - size = 5 * page_size; - p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); - - printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - - if (p == MAP_FAILED) { - dump_maps(); - printf("Error: couldn't map the space we need for the test\n"); - return 1; - } - - errno = 0; - if (munmap((void *)addr, 5 * page_size) != 0) { - dump_maps(); - printf("Error: munmap failed!?\n"); - return 1; - } - printf("unmap() successful\n"); - - errno = 0; - addr = BASE_ADDRESS + page_size; - size = 3 * page_size; - p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); - printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - - if (p == MAP_FAILED) { - dump_maps(); - printf("Error: first mmap() failed unexpectedly\n"); - return 1; - } - - /* - * Exact same mapping again: - * base | free | new - * +1 | mapped | new - * +2 | mapped | new - * +3 | mapped | new - * +4 | free | new - */ - errno = 0; - addr = BASE_ADDRESS; - size = 5 * page_size; - p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); - printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - - if (p != MAP_FAILED) { - dump_maps(); - printf("Error:1: mmap() succeeded when it shouldn't have\n"); - return 1; - } - - /* - * Second mapping contained within first: - * - * base | free | - * +1 | mapped | - * +2 | mapped | new - * +3 | mapped | - * +4 | free | - */ - errno = 0; - addr = BASE_ADDRESS + (2 * page_size); - size = page_size; - p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); - printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - - if (p != MAP_FAILED) { - dump_maps(); - printf("Error:2: mmap() succeeded when it shouldn't have\n"); - return 1; - } - - /* - * Overlap end of existing mapping: - * base | free | - * +1 | mapped | - * +2 | mapped | - * +3 | mapped | new - * +4 | free | new - */ - errno = 0; - addr = BASE_ADDRESS + (3 * page_size); - size = 2 * page_size; - p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); - printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - - if (p != MAP_FAILED) { - dump_maps(); - printf("Error:3: mmap() succeeded when it shouldn't have\n"); - return 1; - } - - /* - * Overlap start of existing mapping: - * base | free | new - * +1 | mapped | new - * +2 | mapped | - * +3 | mapped | - * +4 | free | - */ - errno = 0; - addr = BASE_ADDRESS; - size = 2 * page_size; - p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); - printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - - if (p != MAP_FAILED) { - dump_maps(); - printf("Error:4: mmap() succeeded when it shouldn't have\n"); - return 1; - } - - /* - * Adjacent to start of existing mapping: - * base | free | new - * +1 | mapped | - * +2 | mapped | - * +3 | mapped | - * +4 | free | - */ - errno = 0; - addr = BASE_ADDRESS; - size = page_size; - p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); - printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - - if (p == MAP_FAILED) { - dump_maps(); - printf("Error:5: mmap() failed when it shouldn't have\n"); - return 1; - } - - /* - * Adjacent to end of existing mapping: - * base | free | - * +1 | mapped | - * +2 | mapped | - * +3 | mapped | - * +4 | free | new - */ - errno = 0; - addr = BASE_ADDRESS + (4 * page_size); - size = page_size; - p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); - printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - - if (p == MAP_FAILED) { - dump_maps(); - printf("Error:6: mmap() failed when it shouldn't have\n"); - return 1; - } - - addr = BASE_ADDRESS; - size = 5 * page_size; - if (munmap((void *)addr, size) != 0) { - dump_maps(); - printf("Error: munmap failed!?\n"); - return 1; - } - printf("unmap() successful\n"); - - printf("OK\n"); - return 0; -} diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c deleted file mode 100644 index 312889edb84a..000000000000 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Example of using hugepage memory in a user application using the mmap - * system call with MAP_HUGETLB flag. Before running this program make - * sure the administrator has allocated enough default sized huge pages - * to cover the 256 MB allocation. - * - * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. - * That means the addresses starting with 0x800000... will need to be - * specified. Specifying a fixed address is not required on ppc64, i386 - * or x86_64. - */ -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/mman.h> -#include <fcntl.h> - -#define LENGTH (256UL*1024*1024) -#define PROTECTION (PROT_READ | PROT_WRITE) - -#ifndef MAP_HUGETLB -#define MAP_HUGETLB 0x40000 /* arch specific */ -#endif - -#ifndef MAP_HUGE_SHIFT -#define MAP_HUGE_SHIFT 26 -#endif - -#ifndef MAP_HUGE_MASK -#define MAP_HUGE_MASK 0x3f -#endif - -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) -#else -#define ADDR (void *)(0x0UL) -#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) -#endif - -static void check_bytes(char *addr) -{ - printf("First hex is %x\n", *((unsigned int *)addr)); -} - -static void write_bytes(char *addr, size_t length) -{ - unsigned long i; - - for (i = 0; i < length; i++) - *(addr + i) = (char)i; -} - -static int read_bytes(char *addr, size_t length) -{ - unsigned long i; - - check_bytes(addr); - for (i = 0; i < length; i++) - if (*(addr + i) != (char)i) { - printf("Mismatch at %lu\n", i); - return 1; - } - return 0; -} - -int main(int argc, char **argv) -{ - void *addr; - int ret; - size_t length = LENGTH; - int flags = FLAGS; - int shift = 0; - - if (argc > 1) - length = atol(argv[1]) << 20; - if (argc > 2) { - shift = atoi(argv[2]); - if (shift) - flags |= (shift & MAP_HUGE_MASK) << MAP_HUGE_SHIFT; - } - - if (shift) - printf("%u kB hugepages\n", 1 << (shift - 10)); - else - printf("Default size hugepages\n"); - printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20); - - addr = mmap(ADDR, length, PROTECTION, flags, -1, 0); - if (addr == MAP_FAILED) { - perror("mmap"); - exit(1); - } - - printf("Returned address is %p\n", addr); - check_bytes(addr); - write_bytes(addr, length); - ret = read_bytes(addr, length); - - /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ - if (munmap(addr, length)) { - perror("munmap"); - exit(1); - } - - return ret; -} diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/vm/map_populate.c deleted file mode 100644 index 6b8aeaa0bf7a..000000000000 --- a/tools/testing/selftests/vm/map_populate.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2018 Dmitry Safonov, Arista Networks - * - * MAP_POPULATE | MAP_PRIVATE should COW VMA pages. - */ - -#define _GNU_SOURCE -#include <errno.h> -#include <fcntl.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#ifndef MMAP_SZ -#define MMAP_SZ 4096 -#endif - -#define BUG_ON(condition, description) \ - do { \ - if (condition) { \ - fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \ - __LINE__, (description), strerror(errno)); \ - exit(1); \ - } \ - } while (0) - -static int parent_f(int sock, unsigned long *smap, int child) -{ - int status, ret; - - ret = read(sock, &status, sizeof(int)); - BUG_ON(ret <= 0, "read(sock)"); - - *smap = 0x22222BAD; - ret = msync(smap, MMAP_SZ, MS_SYNC); - BUG_ON(ret, "msync()"); - - ret = write(sock, &status, sizeof(int)); - BUG_ON(ret <= 0, "write(sock)"); - - waitpid(child, &status, 0); - BUG_ON(!WIFEXITED(status), "child in unexpected state"); - - return WEXITSTATUS(status); -} - -static int child_f(int sock, unsigned long *smap, int fd) -{ - int ret, buf = 0; - - smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_POPULATE, fd, 0); - BUG_ON(smap == MAP_FAILED, "mmap()"); - - BUG_ON(*smap != 0xdeadbabe, "MAP_PRIVATE | MAP_POPULATE changed file"); - - ret = write(sock, &buf, sizeof(int)); - BUG_ON(ret <= 0, "write(sock)"); - - ret = read(sock, &buf, sizeof(int)); - BUG_ON(ret <= 0, "read(sock)"); - - BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page"); - BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted"); - - return 0; -} - -int main(int argc, char **argv) -{ - int sock[2], child, ret; - FILE *ftmp; - unsigned long *smap; - - ftmp = tmpfile(); - BUG_ON(ftmp == 0, "tmpfile()"); - - ret = ftruncate(fileno(ftmp), MMAP_SZ); - BUG_ON(ret, "ftruncate()"); - - smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, - MAP_SHARED, fileno(ftmp), 0); - BUG_ON(smap == MAP_FAILED, "mmap()"); - - *smap = 0xdeadbabe; - /* Probably unnecessary, but let it be. */ - ret = msync(smap, MMAP_SZ, MS_SYNC); - BUG_ON(ret, "msync()"); - - ret = socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sock); - BUG_ON(ret, "socketpair()"); - - child = fork(); - BUG_ON(child == -1, "fork()"); - - if (child) { - ret = close(sock[0]); - BUG_ON(ret, "close()"); - - return parent_f(sock[1], smap, child); - } - - ret = close(sock[1]); - BUG_ON(ret, "close()"); - - return child_f(sock[0], smap, fileno(ftmp)); -} diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c deleted file mode 100644 index ff4d72eb74b9..000000000000 --- a/tools/testing/selftests/vm/mlock-random-test.c +++ /dev/null @@ -1,294 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * It tests the mlock/mlock2() when they are invoked - * on randomly memory region. - */ -#include <unistd.h> -#include <sys/resource.h> -#include <sys/capability.h> -#include <sys/mman.h> -#include <fcntl.h> -#include <string.h> -#include <sys/ipc.h> -#include <sys/shm.h> -#include <time.h> -#include "mlock2.h" - -#define CHUNK_UNIT (128 * 1024) -#define MLOCK_RLIMIT_SIZE (CHUNK_UNIT * 2) -#define MLOCK_WITHIN_LIMIT_SIZE CHUNK_UNIT -#define MLOCK_OUTOF_LIMIT_SIZE (CHUNK_UNIT * 3) - -#define TEST_LOOP 100 -#define PAGE_ALIGN(size, ps) (((size) + ((ps) - 1)) & ~((ps) - 1)) - -int set_cap_limits(rlim_t max) -{ - struct rlimit new; - cap_t cap = cap_init(); - - new.rlim_cur = max; - new.rlim_max = max; - if (setrlimit(RLIMIT_MEMLOCK, &new)) { - perror("setrlimit() returns error\n"); - return -1; - } - - /* drop capabilities including CAP_IPC_LOCK */ - if (cap_set_proc(cap)) { - perror("cap_set_proc() returns error\n"); - return -2; - } - - return 0; -} - -int get_proc_locked_vm_size(void) -{ - FILE *f; - int ret = -1; - char line[1024] = {0}; - unsigned long lock_size = 0; - - f = fopen("/proc/self/status", "r"); - if (!f) { - perror("fopen"); - return -1; - } - - while (fgets(line, 1024, f)) { - if (strstr(line, "VmLck")) { - ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size); - if (ret <= 0) { - printf("sscanf() on VmLck error: %s: %d\n", - line, ret); - fclose(f); - return -1; - } - fclose(f); - return (int)(lock_size << 10); - } - } - - perror("cann't parse VmLck in /proc/self/status\n"); - fclose(f); - return -1; -} - -/* - * Get the MMUPageSize of the memory region including input - * address from proc file. - * - * return value: on error case, 0 will be returned. - * Otherwise the page size(in bytes) is returned. - */ -int get_proc_page_size(unsigned long addr) -{ - FILE *smaps; - char *line; - unsigned long mmupage_size = 0; - size_t size; - - smaps = seek_to_smaps_entry(addr); - if (!smaps) { - printf("Unable to parse /proc/self/smaps\n"); - return 0; - } - - while (getline(&line, &size, smaps) > 0) { - if (!strstr(line, "MMUPageSize")) { - free(line); - line = NULL; - size = 0; - continue; - } - - /* found the MMUPageSize of this section */ - if (sscanf(line, "MMUPageSize: %8lu kB", - &mmupage_size) < 1) { - printf("Unable to parse smaps entry for Size:%s\n", - line); - break; - } - - } - free(line); - if (smaps) - fclose(smaps); - return mmupage_size << 10; -} - -/* - * Test mlock/mlock2() on provided memory chunk. - * It expects the mlock/mlock2() to be successful (within rlimit) - * - * With allocated memory chunk [p, p + alloc_size), this - * test will choose start/len randomly to perform mlock/mlock2 - * [start, start + len] memory range. The range is within range - * of the allocated chunk. - * - * The memory region size alloc_size is within the rlimit. - * So we always expect a success of mlock/mlock2. - * - * VmLck is assumed to be 0 before this test. - * - * return value: 0 - success - * else: failure - */ -int test_mlock_within_limit(char *p, int alloc_size) -{ - int i; - int ret = 0; - int locked_vm_size = 0; - struct rlimit cur; - int page_size = 0; - - getrlimit(RLIMIT_MEMLOCK, &cur); - if (cur.rlim_cur < alloc_size) { - printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n", - alloc_size, (unsigned int)cur.rlim_cur); - return -1; - } - - srand(time(NULL)); - for (i = 0; i < TEST_LOOP; i++) { - /* - * - choose mlock/mlock2 randomly - * - choose lock_size randomly but lock_size < alloc_size - * - choose start_offset randomly but p+start_offset+lock_size - * < p+alloc_size - */ - int is_mlock = !!(rand() % 2); - int lock_size = rand() % alloc_size; - int start_offset = rand() % (alloc_size - lock_size); - - if (is_mlock) - ret = mlock(p + start_offset, lock_size); - else - ret = mlock2_(p + start_offset, lock_size, - MLOCK_ONFAULT); - - if (ret) { - printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n", - is_mlock ? "mlock" : "mlock2", - p, alloc_size, - p + start_offset, lock_size); - return ret; - } - } - - /* - * Check VmLck left by the tests. - */ - locked_vm_size = get_proc_locked_vm_size(); - page_size = get_proc_page_size((unsigned long)p); - if (page_size == 0) { - printf("cannot get proc MMUPageSize\n"); - return -1; - } - - if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) { - printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n", - locked_vm_size, alloc_size); - return -1; - } - - return 0; -} - - -/* - * We expect the mlock/mlock2() to be fail (outof limitation) - * - * With allocated memory chunk [p, p + alloc_size), this - * test will randomly choose start/len and perform mlock/mlock2 - * on [start, start+len] range. - * - * The memory region size alloc_size is above the rlimit. - * And the len to be locked is higher than rlimit. - * So we always expect a failure of mlock/mlock2. - * No locked page number should be increased as a side effect. - * - * return value: 0 - success - * else: failure - */ -int test_mlock_outof_limit(char *p, int alloc_size) -{ - int i; - int ret = 0; - int locked_vm_size = 0, old_locked_vm_size = 0; - struct rlimit cur; - - getrlimit(RLIMIT_MEMLOCK, &cur); - if (cur.rlim_cur >= alloc_size) { - printf("alloc_size[%d] >%u rlimit, violates test condition\n", - alloc_size, (unsigned int)cur.rlim_cur); - return -1; - } - - old_locked_vm_size = get_proc_locked_vm_size(); - srand(time(NULL)); - for (i = 0; i < TEST_LOOP; i++) { - int is_mlock = !!(rand() % 2); - int lock_size = (rand() % (alloc_size - cur.rlim_cur)) - + cur.rlim_cur; - int start_offset = rand() % (alloc_size - lock_size); - - if (is_mlock) - ret = mlock(p + start_offset, lock_size); - else - ret = mlock2_(p + start_offset, lock_size, - MLOCK_ONFAULT); - if (ret == 0) { - printf("%s() succeeds? on %p(%d) mlock%p(%d)\n", - is_mlock ? "mlock" : "mlock2", - p, alloc_size, - p + start_offset, lock_size); - return -1; - } - } - - locked_vm_size = get_proc_locked_vm_size(); - if (locked_vm_size != old_locked_vm_size) { - printf("tests leads to new mlocked page: old[%d], new[%d]\n", - old_locked_vm_size, - locked_vm_size); - return -1; - } - - return 0; -} - -int main(int argc, char **argv) -{ - char *p = NULL; - int ret = 0; - - if (set_cap_limits(MLOCK_RLIMIT_SIZE)) - return -1; - - p = malloc(MLOCK_WITHIN_LIMIT_SIZE); - if (p == NULL) { - perror("malloc() failure\n"); - return -1; - } - ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE); - if (ret) - return ret; - munlock(p, MLOCK_WITHIN_LIMIT_SIZE); - free(p); - - - p = malloc(MLOCK_OUTOF_LIMIT_SIZE); - if (p == NULL) { - perror("malloc() failure\n"); - return -1; - } - ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE); - if (ret) - return ret; - munlock(p, MLOCK_OUTOF_LIMIT_SIZE); - free(p); - - return 0; -} diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c deleted file mode 100644 index 11b2301f3aa3..000000000000 --- a/tools/testing/selftests/vm/mlock2-tests.c +++ /dev/null @@ -1,520 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#include <sys/mman.h> -#include <stdint.h> -#include <unistd.h> -#include <string.h> -#include <sys/time.h> -#include <sys/resource.h> -#include <stdbool.h> -#include "mlock2.h" - -#include "../kselftest.h" - -struct vm_boundaries { - unsigned long start; - unsigned long end; -}; - -static int get_vm_area(unsigned long addr, struct vm_boundaries *area) -{ - FILE *file; - int ret = 1; - char line[1024] = {0}; - char *end_addr; - char *stop; - unsigned long start; - unsigned long end; - - if (!area) - return ret; - - file = fopen("/proc/self/maps", "r"); - if (!file) { - perror("fopen"); - return ret; - } - - memset(area, 0, sizeof(struct vm_boundaries)); - - while(fgets(line, 1024, file)) { - end_addr = strchr(line, '-'); - if (!end_addr) { - printf("cannot parse /proc/self/maps\n"); - goto out; - } - *end_addr = '\0'; - end_addr++; - stop = strchr(end_addr, ' '); - if (!stop) { - printf("cannot parse /proc/self/maps\n"); - goto out; - } - stop = '\0'; - - sscanf(line, "%lx", &start); - sscanf(end_addr, "%lx", &end); - - if (start <= addr && end > addr) { - area->start = start; - area->end = end; - ret = 0; - goto out; - } - } -out: - fclose(file); - return ret; -} - -#define VMFLAGS "VmFlags:" - -static bool is_vmflag_set(unsigned long addr, const char *vmflag) -{ - char *line = NULL; - char *flags; - size_t size = 0; - bool ret = false; - FILE *smaps; - - smaps = seek_to_smaps_entry(addr); - if (!smaps) { - printf("Unable to parse /proc/self/smaps\n"); - goto out; - } - - while (getline(&line, &size, smaps) > 0) { - if (!strstr(line, VMFLAGS)) { - free(line); - line = NULL; - size = 0; - continue; - } - - flags = line + strlen(VMFLAGS); - ret = (strstr(flags, vmflag) != NULL); - goto out; - } - -out: - free(line); - fclose(smaps); - return ret; -} - -#define SIZE "Size:" -#define RSS "Rss:" -#define LOCKED "lo" - -static unsigned long get_value_for_name(unsigned long addr, const char *name) -{ - char *line = NULL; - size_t size = 0; - char *value_ptr; - FILE *smaps = NULL; - unsigned long value = -1UL; - - smaps = seek_to_smaps_entry(addr); - if (!smaps) { - printf("Unable to parse /proc/self/smaps\n"); - goto out; - } - - while (getline(&line, &size, smaps) > 0) { - if (!strstr(line, name)) { - free(line); - line = NULL; - size = 0; - continue; - } - - value_ptr = line + strlen(name); - if (sscanf(value_ptr, "%lu kB", &value) < 1) { - printf("Unable to parse smaps entry for Size\n"); - goto out; - } - break; - } - -out: - if (smaps) - fclose(smaps); - free(line); - return value; -} - -static bool is_vma_lock_on_fault(unsigned long addr) -{ - bool locked; - unsigned long vma_size, vma_rss; - - locked = is_vmflag_set(addr, LOCKED); - if (!locked) - return false; - - vma_size = get_value_for_name(addr, SIZE); - vma_rss = get_value_for_name(addr, RSS); - - /* only one page is faulted in */ - return (vma_rss < vma_size); -} - -#define PRESENT_BIT 0x8000000000000000ULL -#define PFN_MASK 0x007FFFFFFFFFFFFFULL -#define UNEVICTABLE_BIT (1UL << 18) - -static int lock_check(unsigned long addr) -{ - bool locked; - unsigned long vma_size, vma_rss; - - locked = is_vmflag_set(addr, LOCKED); - if (!locked) - return false; - - vma_size = get_value_for_name(addr, SIZE); - vma_rss = get_value_for_name(addr, RSS); - - return (vma_rss == vma_size); -} - -static int unlock_lock_check(char *map) -{ - if (is_vmflag_set((unsigned long)map, LOCKED)) { - printf("VMA flag %s is present on page 1 after unlock\n", LOCKED); - return 1; - } - - return 0; -} - -static int test_mlock_lock() -{ - char *map; - int ret = 1; - unsigned long page_size = getpagesize(); - - map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (map == MAP_FAILED) { - perror("test_mlock_locked mmap"); - goto out; - } - - if (mlock2_(map, 2 * page_size, 0)) { - if (errno == ENOSYS) { - printf("Cannot call new mlock family, skipping test\n"); - _exit(KSFT_SKIP); - } - perror("mlock2(0)"); - goto unmap; - } - - if (!lock_check((unsigned long)map)) - goto unmap; - - /* Now unlock and recheck attributes */ - if (munlock(map, 2 * page_size)) { - perror("munlock()"); - goto unmap; - } - - ret = unlock_lock_check(map); - -unmap: - munmap(map, 2 * page_size); -out: - return ret; -} - -static int onfault_check(char *map) -{ - *map = 'a'; - if (!is_vma_lock_on_fault((unsigned long)map)) { - printf("VMA is not marked for lock on fault\n"); - return 1; - } - - return 0; -} - -static int unlock_onfault_check(char *map) -{ - unsigned long page_size = getpagesize(); - - if (is_vma_lock_on_fault((unsigned long)map) || - is_vma_lock_on_fault((unsigned long)map + page_size)) { - printf("VMA is still lock on fault after unlock\n"); - return 1; - } - - return 0; -} - -static int test_mlock_onfault() -{ - char *map; - int ret = 1; - unsigned long page_size = getpagesize(); - - map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (map == MAP_FAILED) { - perror("test_mlock_locked mmap"); - goto out; - } - - if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { - if (errno == ENOSYS) { - printf("Cannot call new mlock family, skipping test\n"); - _exit(KSFT_SKIP); - } - perror("mlock2(MLOCK_ONFAULT)"); - goto unmap; - } - - if (onfault_check(map)) - goto unmap; - - /* Now unlock and recheck attributes */ - if (munlock(map, 2 * page_size)) { - if (errno == ENOSYS) { - printf("Cannot call new mlock family, skipping test\n"); - _exit(KSFT_SKIP); - } - perror("munlock()"); - goto unmap; - } - - ret = unlock_onfault_check(map); -unmap: - munmap(map, 2 * page_size); -out: - return ret; -} - -static int test_lock_onfault_of_present() -{ - char *map; - int ret = 1; - unsigned long page_size = getpagesize(); - - map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (map == MAP_FAILED) { - perror("test_mlock_locked mmap"); - goto out; - } - - *map = 'a'; - - if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { - if (errno == ENOSYS) { - printf("Cannot call new mlock family, skipping test\n"); - _exit(KSFT_SKIP); - } - perror("mlock2(MLOCK_ONFAULT)"); - goto unmap; - } - - if (!is_vma_lock_on_fault((unsigned long)map) || - !is_vma_lock_on_fault((unsigned long)map + page_size)) { - printf("VMA with present pages is not marked lock on fault\n"); - goto unmap; - } - ret = 0; -unmap: - munmap(map, 2 * page_size); -out: - return ret; -} - -static int test_munlockall() -{ - char *map; - int ret = 1; - unsigned long page_size = getpagesize(); - - map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - - if (map == MAP_FAILED) { - perror("test_munlockall mmap"); - goto out; - } - - if (mlockall(MCL_CURRENT)) { - perror("mlockall(MCL_CURRENT)"); - goto out; - } - - if (!lock_check((unsigned long)map)) - goto unmap; - - if (munlockall()) { - perror("munlockall()"); - goto unmap; - } - - if (unlock_lock_check(map)) - goto unmap; - - munmap(map, 2 * page_size); - - map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - - if (map == MAP_FAILED) { - perror("test_munlockall second mmap"); - goto out; - } - - if (mlockall(MCL_CURRENT | MCL_ONFAULT)) { - perror("mlockall(MCL_CURRENT | MCL_ONFAULT)"); - goto unmap; - } - - if (onfault_check(map)) - goto unmap; - - if (munlockall()) { - perror("munlockall()"); - goto unmap; - } - - if (unlock_onfault_check(map)) - goto unmap; - - if (mlockall(MCL_CURRENT | MCL_FUTURE)) { - perror("mlockall(MCL_CURRENT | MCL_FUTURE)"); - goto out; - } - - if (!lock_check((unsigned long)map)) - goto unmap; - - if (munlockall()) { - perror("munlockall()"); - goto unmap; - } - - ret = unlock_lock_check(map); - -unmap: - munmap(map, 2 * page_size); -out: - munlockall(); - return ret; -} - -static int test_vma_management(bool call_mlock) -{ - int ret = 1; - void *map; - unsigned long page_size = getpagesize(); - struct vm_boundaries page1; - struct vm_boundaries page2; - struct vm_boundaries page3; - - map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (map == MAP_FAILED) { - perror("mmap()"); - return ret; - } - - if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) { - if (errno == ENOSYS) { - printf("Cannot call new mlock family, skipping test\n"); - _exit(KSFT_SKIP); - } - perror("mlock(ONFAULT)\n"); - goto out; - } - - if (get_vm_area((unsigned long)map, &page1) || - get_vm_area((unsigned long)map + page_size, &page2) || - get_vm_area((unsigned long)map + page_size * 2, &page3)) { - printf("couldn't find mapping in /proc/self/maps\n"); - goto out; - } - - /* - * Before we unlock a portion, we need to that all three pages are in - * the same VMA. If they are not we abort this test (Note that this is - * not a failure) - */ - if (page1.start != page2.start || page2.start != page3.start) { - printf("VMAs are not merged to start, aborting test\n"); - ret = 0; - goto out; - } - - if (munlock(map + page_size, page_size)) { - perror("munlock()"); - goto out; - } - - if (get_vm_area((unsigned long)map, &page1) || - get_vm_area((unsigned long)map + page_size, &page2) || - get_vm_area((unsigned long)map + page_size * 2, &page3)) { - printf("couldn't find mapping in /proc/self/maps\n"); - goto out; - } - - /* All three VMAs should be different */ - if (page1.start == page2.start || page2.start == page3.start) { - printf("failed to split VMA for munlock\n"); - goto out; - } - - /* Now unlock the first and third page and check the VMAs again */ - if (munlock(map, page_size * 3)) { - perror("munlock()"); - goto out; - } - - if (get_vm_area((unsigned long)map, &page1) || - get_vm_area((unsigned long)map + page_size, &page2) || - get_vm_area((unsigned long)map + page_size * 2, &page3)) { - printf("couldn't find mapping in /proc/self/maps\n"); - goto out; - } - - /* Now all three VMAs should be the same */ - if (page1.start != page2.start || page2.start != page3.start) { - printf("failed to merge VMAs after munlock\n"); - goto out; - } - - ret = 0; -out: - munmap(map, 3 * page_size); - return ret; -} - -static int test_mlockall(int (test_function)(bool call_mlock)) -{ - int ret = 1; - - if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) { - perror("mlockall"); - return ret; - } - - ret = test_function(false); - munlockall(); - return ret; -} - -int main(int argc, char **argv) -{ - int ret = 0; - ret += test_mlock_lock(); - ret += test_mlock_onfault(); - ret += test_munlockall(); - ret += test_lock_onfault_of_present(); - ret += test_vma_management(true); - ret += test_mlockall(test_vma_management); - return ret; -} diff --git a/tools/testing/selftests/vm/on-fault-limit.c b/tools/testing/selftests/vm/on-fault-limit.c deleted file mode 100644 index 634d87dfb2a4..000000000000 --- a/tools/testing/selftests/vm/on-fault-limit.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <sys/mman.h> -#include <stdio.h> -#include <unistd.h> -#include <string.h> -#include <sys/time.h> -#include <sys/resource.h> - -#ifndef MCL_ONFAULT -#define MCL_ONFAULT (MCL_FUTURE << 1) -#endif - -static int test_limit(void) -{ - int ret = 1; - struct rlimit lims; - void *map; - - if (getrlimit(RLIMIT_MEMLOCK, &lims)) { - perror("getrlimit"); - return ret; - } - - if (mlockall(MCL_ONFAULT | MCL_FUTURE)) { - perror("mlockall"); - return ret; - } - - map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0); - if (map != MAP_FAILED) - printf("mmap should have failed, but didn't\n"); - else { - ret = 0; - munmap(map, 2 * lims.rlim_max); - } - - munlockall(); - return ret; -} - -int main(int argc, char **argv) -{ - int ret = 0; - - ret += test_limit(); - return ret; -} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests deleted file mode 100755 index a3f4f30f0a2e..000000000000 --- a/tools/testing/selftests/vm/run_vmtests +++ /dev/null @@ -1,326 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -#please run as root - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -mnt=./huge -exitcode=0 - -#get huge pagesize and freepages from /proc/meminfo -while read name size unit; do - if [ "$name" = "HugePages_Free:" ]; then - freepgs=$size - fi - if [ "$name" = "Hugepagesize:" ]; then - hpgsize_KB=$size - fi -done < /proc/meminfo - -# Simple hugetlbfs tests have a hardcoded minimum requirement of -# huge pages totaling 256MB (262144KB) in size. The userfaultfd -# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take -# both of these requirements into account and attempt to increase -# number of huge pages available. -nr_cpus=$(nproc) -hpgsize_MB=$((hpgsize_KB / 1024)) -half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) -needmem_KB=$((half_ufd_size_MB * 2 * 1024)) - -#set proper nr_hugepages -if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then - nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` - needpgs=$((needmem_KB / hpgsize_KB)) - tries=2 - while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do - lackpgs=$(( $needpgs - $freepgs )) - echo 3 > /proc/sys/vm/drop_caches - echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages - if [ $? -ne 0 ]; then - echo "Please run this test as root" - exit $ksft_skip - fi - while read name size unit; do - if [ "$name" = "HugePages_Free:" ]; then - freepgs=$size - fi - done < /proc/meminfo - tries=$((tries - 1)) - done - if [ $freepgs -lt $needpgs ]; then - printf "Not enough huge pages available (%d < %d)\n" \ - $freepgs $needpgs - exit 1 - fi -else - echo "no hugetlbfs support in kernel?" - exit 1 -fi - -#filter 64bit architectures -ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" -if [ -z $ARCH ]; then - ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'` -fi -VADDR64=0 -echo "$ARCH64STR" | grep $ARCH && VADDR64=1 - -mkdir $mnt -mount -t hugetlbfs none $mnt - -echo "---------------------" -echo "running hugepage-mmap" -echo "---------------------" -./hugepage-mmap -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -shmmax=`cat /proc/sys/kernel/shmmax` -shmall=`cat /proc/sys/kernel/shmall` -echo 268435456 > /proc/sys/kernel/shmmax -echo 4194304 > /proc/sys/kernel/shmall -echo "--------------------" -echo "running hugepage-shm" -echo "--------------------" -./hugepage-shm -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -echo $shmmax > /proc/sys/kernel/shmmax -echo $shmall > /proc/sys/kernel/shmall - -echo "-------------------" -echo "running map_hugetlb" -echo "-------------------" -./map_hugetlb -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "NOTE: The above hugetlb tests provide minimal coverage. Use" -echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" -echo " hugetlb regression testing." - -echo "---------------------------" -echo "running map_fixed_noreplace" -echo "---------------------------" -./map_fixed_noreplace -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "--------------------------------------------" -echo "running 'gup_benchmark -U' (normal/slow gup)" -echo "--------------------------------------------" -./gup_benchmark -U -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "------------------------------------------" -echo "running gup_benchmark -b (pin_user_pages)" -echo "------------------------------------------" -./gup_benchmark -b -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-------------------" -echo "running userfaultfd" -echo "-------------------" -./userfaultfd anon 128 32 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "---------------------------" -echo "running userfaultfd_hugetlb" -echo "---------------------------" -# Test requires source and destination huge pages. Size of source -# (half_ufd_size_MB) is passed as argument to test. -./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -rm -f $mnt/ufd_test_file - -echo "-------------------------" -echo "running userfaultfd_shmem" -echo "-------------------------" -./userfaultfd shmem 128 32 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -#cleanup -umount $mnt -rm -rf $mnt -echo $nr_hugepgs > /proc/sys/vm/nr_hugepages - -echo "-----------------------" -echo "running compaction_test" -echo "-----------------------" -./compaction_test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "----------------------" -echo "running on-fault-limit" -echo "----------------------" -sudo -u nobody ./on-fault-limit -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "--------------------" -echo "running map_populate" -echo "--------------------" -./map_populate -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-------------------------" -echo "running mlock-random-test" -echo "-------------------------" -./mlock-random-test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "--------------------" -echo "running mlock2-tests" -echo "--------------------" -./mlock2-tests -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-----------------" -echo "running thuge-gen" -echo "-----------------" -./thuge-gen -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -if [ $VADDR64 -ne 0 ]; then -echo "-----------------------------" -echo "running virtual_address_range" -echo "-----------------------------" -./virtual_address_range -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-----------------------------" -echo "running virtual address 128TB switch test" -echo "-----------------------------" -./va_128TBswitch -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -fi # VADDR64 - -echo "------------------------------------" -echo "running vmalloc stability smoke test" -echo "------------------------------------" -./test_vmalloc.sh smoke -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "------------------------------------" -echo "running MREMAP_DONTUNMAP smoke test" -echo "------------------------------------" -./mremap_dontunmap -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "running HMM smoke test" -echo "------------------------------------" -./test_hmm.sh smoke -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -exit $exitcode diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c deleted file mode 100644 index 361ef7192cc6..000000000000 --- a/tools/testing/selftests/vm/thuge-gen.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Test selecting other page sizes for mmap/shmget. - - Before running this huge pages for each huge page size must have been - reserved. - For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used. - Also shmmax must be increased. - And you need to run as root to work around some weird permissions in shm. - And nothing using huge pages should run in parallel. - When the program aborts you may need to clean up the shm segments with - ipcrm -m by hand, like this - sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m - (warning this will remove all if someone else uses them) */ - -#define _GNU_SOURCE 1 -#include <sys/mman.h> -#include <stdlib.h> -#include <stdio.h> -#include <sys/ipc.h> -#include <sys/shm.h> -#include <sys/stat.h> -#include <glob.h> -#include <assert.h> -#include <unistd.h> -#include <stdarg.h> -#include <string.h> - -#define err(x) perror(x), exit(1) - -#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) -#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f -#if !defined(MAP_HUGETLB) -#define MAP_HUGETLB 0x40000 -#endif - -#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ -#define SHM_HUGE_SHIFT 26 -#define SHM_HUGE_MASK 0x3f -#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) -#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) - -#define NUM_PAGESIZES 5 - -#define NUM_PAGES 4 - -#define Dprintf(fmt...) // printf(fmt) - -unsigned long page_sizes[NUM_PAGESIZES]; -int num_page_sizes; - -int ilog2(unsigned long v) -{ - int l = 0; - while ((1UL << l) < v) - l++; - return l; -} - -void find_pagesizes(void) -{ - glob_t g; - int i; - glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g); - assert(g.gl_pathc <= NUM_PAGESIZES); - for (i = 0; i < g.gl_pathc; i++) { - sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB", - &page_sizes[i]); - page_sizes[i] <<= 10; - printf("Found %luMB\n", page_sizes[i] >> 20); - } - num_page_sizes = g.gl_pathc; - globfree(&g); -} - -unsigned long default_huge_page_size(void) -{ - unsigned long hps = 0; - char *line = NULL; - size_t linelen = 0; - FILE *f = fopen("/proc/meminfo", "r"); - if (!f) - return 0; - while (getline(&line, &linelen, f) > 0) { - if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { - hps <<= 10; - break; - } - } - free(line); - return hps; -} - -void show(unsigned long ps) -{ - char buf[100]; - if (ps == getpagesize()) - return; - printf("%luMB: ", ps >> 20); - fflush(stdout); - snprintf(buf, sizeof buf, - "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", - ps >> 10); - system(buf); -} - -unsigned long read_sysfs(int warn, char *fmt, ...) -{ - char *line = NULL; - size_t linelen = 0; - char buf[100]; - FILE *f; - va_list ap; - unsigned long val = 0; - - va_start(ap, fmt); - vsnprintf(buf, sizeof buf, fmt, ap); - va_end(ap); - - f = fopen(buf, "r"); - if (!f) { - if (warn) - printf("missing %s\n", buf); - return 0; - } - if (getline(&line, &linelen, f) > 0) { - sscanf(line, "%lu", &val); - } - fclose(f); - free(line); - return val; -} - -unsigned long read_free(unsigned long ps) -{ - return read_sysfs(ps != getpagesize(), - "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", - ps >> 10); -} - -void test_mmap(unsigned long size, unsigned flags) -{ - char *map; - unsigned long before, after; - int err; - - before = read_free(size); - map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0); - - if (map == (char *)-1) err("mmap"); - memset(map, 0xff, size*NUM_PAGES); - after = read_free(size); - Dprintf("before %lu after %lu diff %ld size %lu\n", - before, after, before - after, size); - assert(size == getpagesize() || (before - after) == NUM_PAGES); - show(size); - err = munmap(map, size); - assert(!err); -} - -void test_shmget(unsigned long size, unsigned flags) -{ - int id; - unsigned long before, after; - int err; - - before = read_free(size); - id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags); - if (id < 0) err("shmget"); - - struct shm_info i; - if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl"); - Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss); - - - Dprintf("id %d\n", id); - char *map = shmat(id, NULL, 0600); - if (map == (char*)-1) err("shmat"); - - shmctl(id, IPC_RMID, NULL); - - memset(map, 0xff, size*NUM_PAGES); - after = read_free(size); - - Dprintf("before %lu after %lu diff %ld size %lu\n", - before, after, before - after, size); - assert(size == getpagesize() || (before - after) == NUM_PAGES); - show(size); - err = shmdt(map); - assert(!err); -} - -void sanity_checks(void) -{ - int i; - unsigned long largest = getpagesize(); - - for (i = 0; i < num_page_sizes; i++) { - if (page_sizes[i] > largest) - largest = page_sizes[i]; - - if (read_free(page_sizes[i]) < NUM_PAGES) { - printf("Not enough huge pages for page size %lu MB, need %u\n", - page_sizes[i] >> 20, - NUM_PAGES); - exit(0); - } - } - - if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) { - printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); - exit(0); - } - -#if defined(__x86_64__) - if (largest != 1U<<30) { - printf("No GB pages available on x86-64\n" - "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES); - exit(0); - } -#endif -} - -int main(void) -{ - int i; - unsigned default_hps = default_huge_page_size(); - - find_pagesizes(); - - sanity_checks(); - - for (i = 0; i < num_page_sizes; i++) { - unsigned long ps = page_sizes[i]; - int arg = ilog2(ps) << MAP_HUGE_SHIFT; - printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg); - test_mmap(ps, MAP_HUGETLB | arg); - } - printf("Testing default huge mmap\n"); - test_mmap(default_hps, SHM_HUGETLB); - - puts("Testing non-huge shmget"); - test_shmget(getpagesize(), 0); - - for (i = 0; i < num_page_sizes; i++) { - unsigned long ps = page_sizes[i]; - int arg = ilog2(ps) << SHM_HUGE_SHIFT; - printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg); - test_shmget(ps, SHM_HUGETLB | arg); - } - puts("default huge shmget"); - test_shmget(default_hps, SHM_HUGETLB); - - return 0; -} diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c deleted file mode 100644 index fd7f1b4a96f9..000000000000 --- a/tools/testing/selftests/vm/transhuge-stress.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Stress test for transparent huge pages, memory compaction and migration. - * - * Authors: Konstantin Khlebnikov <koct9i@gmail.com> - * - * This is free and unencumbered software released into the public domain. - */ - -#include <stdlib.h> -#include <stdio.h> -#include <stdint.h> -#include <err.h> -#include <time.h> -#include <unistd.h> -#include <fcntl.h> -#include <string.h> -#include <sys/mman.h> - -#define PAGE_SHIFT 12 -#define HPAGE_SHIFT 21 - -#define PAGE_SIZE (1 << PAGE_SHIFT) -#define HPAGE_SIZE (1 << HPAGE_SHIFT) - -#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0) -#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1)) - -int pagemap_fd; - -int64_t allocate_transhuge(void *ptr) -{ - uint64_t ent[2]; - - /* drop pmd */ - if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANONYMOUS | - MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) - errx(2, "mmap transhuge"); - - if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) - err(2, "MADV_HUGEPAGE"); - - /* allocate transparent huge page */ - *(volatile void **)ptr = ptr; - - if (pread(pagemap_fd, ent, sizeof(ent), - (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent)) - err(2, "read pagemap"); - - if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && - PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && - !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1))) - return PAGEMAP_PFN(ent[0]); - - return -1; -} - -int main(int argc, char **argv) -{ - size_t ram, len; - void *ptr, *p; - struct timespec a, b; - double s; - uint8_t *map; - size_t map_len; - - ram = sysconf(_SC_PHYS_PAGES); - if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4) - ram = SIZE_MAX / 4; - else - ram *= sysconf(_SC_PAGESIZE); - - if (argc == 1) - len = ram; - else if (!strcmp(argv[1], "-h")) - errx(1, "usage: %s [size in MiB]", argv[0]); - else - len = atoll(argv[1]) << 20; - - warnx("allocate %zd transhuge pages, using %zd MiB virtual memory" - " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20, - len >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1)); - - pagemap_fd = open("/proc/self/pagemap", O_RDONLY); - if (pagemap_fd < 0) - err(2, "open pagemap"); - - len -= len % HPAGE_SIZE; - ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0); - if (ptr == MAP_FAILED) - err(2, "initial mmap"); - ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE; - - if (madvise(ptr, len, MADV_HUGEPAGE)) - err(2, "MADV_HUGEPAGE"); - - map_len = ram >> (HPAGE_SHIFT - 1); - map = malloc(map_len); - if (!map) - errx(2, "map malloc"); - - while (1) { - int nr_succeed = 0, nr_failed = 0, nr_pages = 0; - - memset(map, 0, map_len); - - clock_gettime(CLOCK_MONOTONIC, &a); - for (p = ptr; p < ptr + len; p += HPAGE_SIZE) { - int64_t pfn; - - pfn = allocate_transhuge(p); - - if (pfn < 0) { - nr_failed++; - } else { - size_t idx = pfn >> (HPAGE_SHIFT - PAGE_SHIFT); - - nr_succeed++; - if (idx >= map_len) { - map = realloc(map, idx + 1); - if (!map) - errx(2, "map realloc"); - memset(map + map_len, 0, idx + 1 - map_len); - map_len = idx + 1; - } - if (!map[idx]) - nr_pages++; - map[idx] = 1; - } - - /* split transhuge page, keep last page */ - if (madvise(p, HPAGE_SIZE - PAGE_SIZE, MADV_DONTNEED)) - err(2, "MADV_DONTNEED"); - } - clock_gettime(CLOCK_MONOTONIC, &b); - s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.; - - warnx("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t" - "%4d succeed, %4d failed, %4d different pages", - s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20), - nr_succeed, nr_failed, nr_pages); - } -} diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c deleted file mode 100644 index 61e5cfeb1350..000000000000 --- a/tools/testing/selftests/vm/userfaultfd.c +++ /dev/null @@ -1,1479 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Stress userfaultfd syscall. - * - * Copyright (C) 2015 Red Hat, Inc. - * - * This test allocates two virtual areas and bounces the physical - * memory across the two virtual areas (from area_src to area_dst) - * using userfaultfd. - * - * There are three threads running per CPU: - * - * 1) one per-CPU thread takes a per-page pthread_mutex in a random - * page of the area_dst (while the physical page may still be in - * area_src), and increments a per-page counter in the same page, - * and checks its value against a verification region. - * - * 2) another per-CPU thread handles the userfaults generated by - * thread 1 above. userfaultfd blocking reads or poll() modes are - * exercised interleaved. - * - * 3) one last per-CPU thread transfers the memory in the background - * at maximum bandwidth (if not already transferred by thread - * 2). Each cpu thread takes cares of transferring a portion of the - * area. - * - * When all threads of type 3 completed the transfer, one bounce is - * complete. area_src and area_dst are then swapped. All threads are - * respawned and so the bounce is immediately restarted in the - * opposite direction. - * - * per-CPU threads 1 by triggering userfaults inside - * pthread_mutex_lock will also verify the atomicity of the memory - * transfer (UFFDIO_COPY). - */ - -#define _GNU_SOURCE -#include <stdio.h> -#include <errno.h> -#include <unistd.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <time.h> -#include <signal.h> -#include <poll.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/syscall.h> -#include <sys/ioctl.h> -#include <sys/wait.h> -#include <pthread.h> -#include <linux/userfaultfd.h> -#include <setjmp.h> -#include <stdbool.h> -#include <assert.h> - -#include "../kselftest.h" - -#ifdef __NR_userfaultfd - -static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; - -#define BOUNCE_RANDOM (1<<0) -#define BOUNCE_RACINGFAULTS (1<<1) -#define BOUNCE_VERIFY (1<<2) -#define BOUNCE_POLL (1<<3) -static int bounces; - -#define TEST_ANON 1 -#define TEST_HUGETLB 2 -#define TEST_SHMEM 3 -static int test_type; - -/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ -#define ALARM_INTERVAL_SECS 10 -static volatile bool test_uffdio_copy_eexist = true; -static volatile bool test_uffdio_zeropage_eexist = true; -/* Whether to test uffd write-protection */ -static bool test_uffdio_wp = false; - -static bool map_shared; -static int huge_fd; -static char *huge_fd_off0; -static unsigned long long *count_verify; -static int uffd, uffd_flags, finished, *pipefd; -static char *area_src, *area_src_alias, *area_dst, *area_dst_alias; -static char *zeropage; -pthread_attr_t attr; - -/* Userfaultfd test statistics */ -struct uffd_stats { - int cpu; - unsigned long missing_faults; - unsigned long wp_faults; -}; - -/* pthread_mutex_t starts at page offset 0 */ -#define area_mutex(___area, ___nr) \ - ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) -/* - * count is placed in the page after pthread_mutex_t naturally aligned - * to avoid non alignment faults on non-x86 archs. - */ -#define area_count(___area, ___nr) \ - ((volatile unsigned long long *) ((unsigned long) \ - ((___area) + (___nr)*page_size + \ - sizeof(pthread_mutex_t) + \ - sizeof(unsigned long long) - 1) & \ - ~(unsigned long)(sizeof(unsigned long long) \ - - 1))) - -const char *examples = - "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" - "./userfaultfd anon 100 99999\n\n" - "# Run share memory test on 1GiB region with 99 bounces:\n" - "./userfaultfd shmem 1000 99\n\n" - "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n" - "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n" - "# Run the same hugetlb test but using shmem:\n" - "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n" - "# 10MiB-~6GiB 999 bounces anonymous test, " - "continue forever unless an error triggers\n" - "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; - -static void usage(void) -{ - fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> " - "[hugetlbfs_file]\n\n"); - fprintf(stderr, "Supported <test type>: anon, hugetlb, " - "hugetlb_shared, shmem\n\n"); - fprintf(stderr, "Examples:\n\n"); - fprintf(stderr, "%s", examples); - exit(1); -} - -static void uffd_stats_reset(struct uffd_stats *uffd_stats, - unsigned long n_cpus) -{ - int i; - - for (i = 0; i < n_cpus; i++) { - uffd_stats[i].cpu = i; - uffd_stats[i].missing_faults = 0; - uffd_stats[i].wp_faults = 0; - } -} - -static void uffd_stats_report(struct uffd_stats *stats, int n_cpus) -{ - int i; - unsigned long long miss_total = 0, wp_total = 0; - - for (i = 0; i < n_cpus; i++) { - miss_total += stats[i].missing_faults; - wp_total += stats[i].wp_faults; - } - - printf("userfaults: %llu missing (", miss_total); - for (i = 0; i < n_cpus; i++) - printf("%lu+", stats[i].missing_faults); - printf("\b), %llu wp (", wp_total); - for (i = 0; i < n_cpus; i++) - printf("%lu+", stats[i].wp_faults); - printf("\b)\n"); -} - -static int anon_release_pages(char *rel_area) -{ - int ret = 0; - - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) { - perror("madvise"); - ret = 1; - } - - return ret; -} - -static void anon_allocate_area(void **alloc_area) -{ - if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) { - fprintf(stderr, "out of memory\n"); - *alloc_area = NULL; - } -} - -static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) -{ -} - -/* HugeTLB memory */ -static int hugetlb_release_pages(char *rel_area) -{ - int ret = 0; - - if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - rel_area == huge_fd_off0 ? 0 : - nr_pages * page_size, - nr_pages * page_size)) { - perror("fallocate"); - ret = 1; - } - - return ret; -} - - -static void hugetlb_allocate_area(void **alloc_area) -{ - void *area_alias = NULL; - char **alloc_area_alias; - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - (map_shared ? MAP_SHARED : MAP_PRIVATE) | - MAP_HUGETLB, - huge_fd, *alloc_area == area_src ? 0 : - nr_pages * page_size); - if (*alloc_area == MAP_FAILED) { - fprintf(stderr, "mmap of hugetlbfs file failed\n"); - *alloc_area = NULL; - } - - if (map_shared) { - area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_HUGETLB, - huge_fd, *alloc_area == area_src ? 0 : - nr_pages * page_size); - if (area_alias == MAP_FAILED) { - if (munmap(*alloc_area, nr_pages * page_size) < 0) - perror("hugetlb munmap"), exit(1); - *alloc_area = NULL; - return; - } - } - if (*alloc_area == area_src) { - huge_fd_off0 = *alloc_area; - alloc_area_alias = &area_src_alias; - } else { - alloc_area_alias = &area_dst_alias; - } - if (area_alias) - *alloc_area_alias = area_alias; -} - -static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) -{ - if (!map_shared) - return; - /* - * We can't zap just the pagetable with hugetlbfs because - * MADV_DONTEED won't work. So exercise -EEXIST on a alias - * mapping where the pagetables are not established initially, - * this way we'll exercise the -EEXEC at the fs level. - */ - *start = (unsigned long) area_dst_alias + offset; -} - -/* Shared memory */ -static int shmem_release_pages(char *rel_area) -{ - int ret = 0; - - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) { - perror("madvise"); - ret = 1; - } - - return ret; -} - -static void shmem_allocate_area(void **alloc_area) -{ - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_SHARED, -1, 0); - if (*alloc_area == MAP_FAILED) { - fprintf(stderr, "shared memory mmap failed\n"); - *alloc_area = NULL; - } -} - -struct uffd_test_ops { - unsigned long expected_ioctls; - void (*allocate_area)(void **alloc_area); - int (*release_pages)(char *rel_area); - void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); -}; - -#define SHMEM_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ - (1 << _UFFDIO_COPY) | \ - (1 << _UFFDIO_ZEROPAGE)) - -#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ - (1 << _UFFDIO_COPY) | \ - (1 << _UFFDIO_ZEROPAGE) | \ - (1 << _UFFDIO_WRITEPROTECT)) - -static struct uffd_test_ops anon_uffd_test_ops = { - .expected_ioctls = ANON_EXPECTED_IOCTLS, - .allocate_area = anon_allocate_area, - .release_pages = anon_release_pages, - .alias_mapping = noop_alias_mapping, -}; - -static struct uffd_test_ops shmem_uffd_test_ops = { - .expected_ioctls = SHMEM_EXPECTED_IOCTLS, - .allocate_area = shmem_allocate_area, - .release_pages = shmem_release_pages, - .alias_mapping = noop_alias_mapping, -}; - -static struct uffd_test_ops hugetlb_uffd_test_ops = { - .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, - .allocate_area = hugetlb_allocate_area, - .release_pages = hugetlb_release_pages, - .alias_mapping = hugetlb_alias_mapping, -}; - -static struct uffd_test_ops *uffd_test_ops; - -static int my_bcmp(char *str1, char *str2, size_t n) -{ - unsigned long i; - for (i = 0; i < n; i++) - if (str1[i] != str2[i]) - return 1; - return 0; -} - -static void wp_range(int ufd, __u64 start, __u64 len, bool wp) -{ - struct uffdio_writeprotect prms = { 0 }; - - /* Write protection page faults */ - prms.range.start = start; - prms.range.len = len; - /* Undo write-protect, do wakeup after that */ - prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0; - - if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) - fprintf(stderr, "clear WP failed for address 0x%Lx\n", - start), exit(1); -} - -static void *locking_thread(void *arg) -{ - unsigned long cpu = (unsigned long) arg; - struct random_data rand; - unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ - int32_t rand_nr; - unsigned long long count; - char randstate[64]; - unsigned int seed; - time_t start; - - if (bounces & BOUNCE_RANDOM) { - seed = (unsigned int) time(NULL) - bounces; - if (!(bounces & BOUNCE_RACINGFAULTS)) - seed += cpu; - bzero(&rand, sizeof(rand)); - bzero(&randstate, sizeof(randstate)); - if (initstate_r(seed, randstate, sizeof(randstate), &rand)) - fprintf(stderr, "srandom_r error\n"), exit(1); - } else { - page_nr = -bounces; - if (!(bounces & BOUNCE_RACINGFAULTS)) - page_nr += cpu * nr_pages_per_cpu; - } - - while (!finished) { - if (bounces & BOUNCE_RANDOM) { - if (random_r(&rand, &rand_nr)) - fprintf(stderr, "random_r 1 error\n"), exit(1); - page_nr = rand_nr; - if (sizeof(page_nr) > sizeof(rand_nr)) { - if (random_r(&rand, &rand_nr)) - fprintf(stderr, "random_r 2 error\n"), exit(1); - page_nr |= (((unsigned long) rand_nr) << 16) << - 16; - } - } else - page_nr += 1; - page_nr %= nr_pages; - - start = time(NULL); - if (bounces & BOUNCE_VERIFY) { - count = *area_count(area_dst, page_nr); - if (!count) - fprintf(stderr, - "page_nr %lu wrong count %Lu %Lu\n", - page_nr, count, - count_verify[page_nr]), exit(1); - - - /* - * We can't use bcmp (or memcmp) because that - * returns 0 erroneously if the memory is - * changing under it (even if the end of the - * page is never changing and always - * different). - */ -#if 1 - if (!my_bcmp(area_dst + page_nr * page_size, zeropage, - page_size)) - fprintf(stderr, - "my_bcmp page_nr %lu wrong count %Lu %Lu\n", - page_nr, count, - count_verify[page_nr]), exit(1); -#else - unsigned long loops; - - loops = 0; - /* uncomment the below line to test with mutex */ - /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */ - while (!bcmp(area_dst + page_nr * page_size, zeropage, - page_size)) { - loops += 1; - if (loops > 10) - break; - } - /* uncomment below line to test with mutex */ - /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */ - if (loops) { - fprintf(stderr, - "page_nr %lu all zero thread %lu %p %lu\n", - page_nr, cpu, area_dst + page_nr * page_size, - loops); - if (loops > 10) - exit(1); - } -#endif - } - - pthread_mutex_lock(area_mutex(area_dst, page_nr)); - count = *area_count(area_dst, page_nr); - if (count != count_verify[page_nr]) { - fprintf(stderr, - "page_nr %lu memory corruption %Lu %Lu\n", - page_nr, count, - count_verify[page_nr]), exit(1); - } - count++; - *area_count(area_dst, page_nr) = count_verify[page_nr] = count; - pthread_mutex_unlock(area_mutex(area_dst, page_nr)); - - if (time(NULL) - start > 1) - fprintf(stderr, - "userfault too slow %ld " - "possible false positive with overcommit\n", - time(NULL) - start); - } - - return NULL; -} - -static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, - unsigned long offset) -{ - uffd_test_ops->alias_mapping(&uffdio_copy->dst, - uffdio_copy->len, - offset); - if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { - /* real retval in ufdio_copy.copy */ - if (uffdio_copy->copy != -EEXIST) - fprintf(stderr, "UFFDIO_COPY retry error %Ld\n", - uffdio_copy->copy), exit(1); - } else { - fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n", - uffdio_copy->copy), exit(1); - } -} - -static int __copy_page(int ufd, unsigned long offset, bool retry) -{ - struct uffdio_copy uffdio_copy; - - if (offset >= nr_pages * page_size) - fprintf(stderr, "unexpected offset %lu\n", - offset), exit(1); - uffdio_copy.dst = (unsigned long) area_dst + offset; - uffdio_copy.src = (unsigned long) area_src + offset; - uffdio_copy.len = page_size; - if (test_uffdio_wp) - uffdio_copy.mode = UFFDIO_COPY_MODE_WP; - else - uffdio_copy.mode = 0; - uffdio_copy.copy = 0; - if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { - /* real retval in ufdio_copy.copy */ - if (uffdio_copy.copy != -EEXIST) - fprintf(stderr, "UFFDIO_COPY error %Ld\n", - uffdio_copy.copy), exit(1); - } else if (uffdio_copy.copy != page_size) { - fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", - uffdio_copy.copy), exit(1); - } else { - if (test_uffdio_copy_eexist && retry) { - test_uffdio_copy_eexist = false; - retry_copy_page(ufd, &uffdio_copy, offset); - } - return 1; - } - return 0; -} - -static int copy_page_retry(int ufd, unsigned long offset) -{ - return __copy_page(ufd, offset, true); -} - -static int copy_page(int ufd, unsigned long offset) -{ - return __copy_page(ufd, offset, false); -} - -static int uffd_read_msg(int ufd, struct uffd_msg *msg) -{ - int ret = read(uffd, msg, sizeof(*msg)); - - if (ret != sizeof(*msg)) { - if (ret < 0) { - if (errno == EAGAIN) - return 1; - else - perror("blocking read error"), exit(1); - } else { - fprintf(stderr, "short read\n"), exit(1); - } - } - - return 0; -} - -static void uffd_handle_page_fault(struct uffd_msg *msg, - struct uffd_stats *stats) -{ - unsigned long offset; - - if (msg->event != UFFD_EVENT_PAGEFAULT) - fprintf(stderr, "unexpected msg event %u\n", - msg->event), exit(1); - - if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { - wp_range(uffd, msg->arg.pagefault.address, page_size, false); - stats->wp_faults++; - } else { - /* Missing page faults */ - if (bounces & BOUNCE_VERIFY && - msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); - - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); - - if (copy_page(uffd, offset)) - stats->missing_faults++; - } -} - -static void *uffd_poll_thread(void *arg) -{ - struct uffd_stats *stats = (struct uffd_stats *)arg; - unsigned long cpu = stats->cpu; - struct pollfd pollfd[2]; - struct uffd_msg msg; - struct uffdio_register uffd_reg; - int ret; - char tmp_chr; - - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd[cpu*2]; - pollfd[1].events = POLLIN; - - for (;;) { - ret = poll(pollfd, 2, -1); - if (!ret) - fprintf(stderr, "poll error %d\n", ret), exit(1); - if (ret < 0) - perror("poll"), exit(1); - if (pollfd[1].revents & POLLIN) { - if (read(pollfd[1].fd, &tmp_chr, 1) != 1) - fprintf(stderr, "read pipefd error\n"), - exit(1); - break; - } - if (!(pollfd[0].revents & POLLIN)) - fprintf(stderr, "pollfd[0].revents %d\n", - pollfd[0].revents), exit(1); - if (uffd_read_msg(uffd, &msg)) - continue; - switch (msg.event) { - default: - fprintf(stderr, "unexpected msg event %u\n", - msg.event), exit(1); - break; - case UFFD_EVENT_PAGEFAULT: - uffd_handle_page_fault(&msg, stats); - break; - case UFFD_EVENT_FORK: - close(uffd); - uffd = msg.arg.fork.ufd; - pollfd[0].fd = uffd; - break; - case UFFD_EVENT_REMOVE: - uffd_reg.range.start = msg.arg.remove.start; - uffd_reg.range.len = msg.arg.remove.end - - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) - fprintf(stderr, "remove failure\n"), exit(1); - break; - case UFFD_EVENT_REMAP: - area_dst = (char *)(unsigned long)msg.arg.remap.to; - break; - } - } - - return NULL; -} - -pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; - -static void *uffd_read_thread(void *arg) -{ - struct uffd_stats *stats = (struct uffd_stats *)arg; - struct uffd_msg msg; - - pthread_mutex_unlock(&uffd_read_mutex); - /* from here cancellation is ok */ - - for (;;) { - if (uffd_read_msg(uffd, &msg)) - continue; - uffd_handle_page_fault(&msg, stats); - } - - return NULL; -} - -static void *background_thread(void *arg) -{ - unsigned long cpu = (unsigned long) arg; - unsigned long page_nr, start_nr, mid_nr, end_nr; - - start_nr = cpu * nr_pages_per_cpu; - end_nr = (cpu+1) * nr_pages_per_cpu; - mid_nr = (start_nr + end_nr) / 2; - - /* Copy the first half of the pages */ - for (page_nr = start_nr; page_nr < mid_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); - - /* - * If we need to test uffd-wp, set it up now. Then we'll have - * at least the first half of the pages mapped already which - * can be write-protected for testing - */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst + start_nr * page_size, - nr_pages_per_cpu * page_size, true); - - /* - * Continue the 2nd half of the page copying, handling write - * protection faults if any - */ - for (page_nr = mid_nr; page_nr < end_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); - - return NULL; -} - -static int stress(struct uffd_stats *uffd_stats) -{ - unsigned long cpu; - pthread_t locking_threads[nr_cpus]; - pthread_t uffd_threads[nr_cpus]; - pthread_t background_threads[nr_cpus]; - - finished = 0; - for (cpu = 0; cpu < nr_cpus; cpu++) { - if (pthread_create(&locking_threads[cpu], &attr, - locking_thread, (void *)cpu)) - return 1; - if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, - uffd_poll_thread, - (void *)&uffd_stats[cpu])) - return 1; - } else { - if (pthread_create(&uffd_threads[cpu], &attr, - uffd_read_thread, - (void *)&uffd_stats[cpu])) - return 1; - pthread_mutex_lock(&uffd_read_mutex); - } - if (pthread_create(&background_threads[cpu], &attr, - background_thread, (void *)cpu)) - return 1; - } - for (cpu = 0; cpu < nr_cpus; cpu++) - if (pthread_join(background_threads[cpu], NULL)) - return 1; - - /* - * Be strict and immediately zap area_src, the whole area has - * been transferred already by the background treads. The - * area_src could then be faulted in in a racy way by still - * running uffdio_threads reading zeropages after we zapped - * area_src (but they're guaranteed to get -EEXIST from - * UFFDIO_COPY without writing zero pages into area_dst - * because the background threads already completed). - */ - if (uffd_test_ops->release_pages(area_src)) - return 1; - - - finished = 1; - for (cpu = 0; cpu < nr_cpus; cpu++) - if (pthread_join(locking_threads[cpu], NULL)) - return 1; - - for (cpu = 0; cpu < nr_cpus; cpu++) { - char c; - if (bounces & BOUNCE_POLL) { - if (write(pipefd[cpu*2+1], &c, 1) != 1) { - fprintf(stderr, "pipefd write error\n"); - return 1; - } - if (pthread_join(uffd_threads[cpu], - (void *)&uffd_stats[cpu])) - return 1; - } else { - if (pthread_cancel(uffd_threads[cpu])) - return 1; - if (pthread_join(uffd_threads[cpu], NULL)) - return 1; - } - } - - return 0; -} - -static int userfaultfd_open(int features) -{ - struct uffdio_api uffdio_api; - - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - if (uffd < 0) { - fprintf(stderr, - "userfaultfd syscall not available in this kernel\n"); - return 1; - } - uffd_flags = fcntl(uffd, F_GETFD, NULL); - - uffdio_api.api = UFFD_API; - uffdio_api.features = features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { - fprintf(stderr, "UFFDIO_API\n"); - return 1; - } - if (uffdio_api.api != UFFD_API) { - fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api); - return 1; - } - - return 0; -} - -sigjmp_buf jbuf, *sigbuf; - -static void sighndl(int sig, siginfo_t *siginfo, void *ptr) -{ - if (sig == SIGBUS) { - if (sigbuf) - siglongjmp(*sigbuf, 1); - abort(); - } -} - -/* - * For non-cooperative userfaultfd test we fork() a process that will - * generate pagefaults, will mremap the area monitored by the - * userfaultfd and at last this process will release the monitored - * area. - * For the anonymous and shared memory the area is divided into two - * parts, the first part is accessed before mremap, and the second - * part is accessed after mremap. Since hugetlbfs does not support - * mremap, the entire monitored area is accessed in a single pass for - * HUGETLB_TEST. - * The release of the pages currently generates event for shmem and - * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked - * for hugetlb. - * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register - * monitored area, generate pagefaults and test that signal is delivered. - * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2 - * test robustness use case - we release monitored area, fork a process - * that will generate pagefaults and verify signal is generated. - * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal - * feature. Using monitor thread, verify no userfault events are generated. - */ -static int faulting_process(int signal_test) -{ - unsigned long nr; - unsigned long long count; - unsigned long split_nr_pages; - unsigned long lastnr; - struct sigaction act; - unsigned long signalled = 0; - - if (test_type != TEST_HUGETLB) - split_nr_pages = (nr_pages + 1) / 2; - else - split_nr_pages = nr_pages; - - if (signal_test) { - sigbuf = &jbuf; - memset(&act, 0, sizeof(act)); - act.sa_sigaction = sighndl; - act.sa_flags = SA_SIGINFO; - if (sigaction(SIGBUS, &act, 0)) { - perror("sigaction"); - return 1; - } - lastnr = (unsigned long)-1; - } - - for (nr = 0; nr < split_nr_pages; nr++) { - int steps = 1; - unsigned long offset = nr * page_size; - - if (signal_test) { - if (sigsetjmp(*sigbuf, 1) != 0) { - if (steps == 1 && nr == lastnr) { - fprintf(stderr, "Signal repeated\n"); - return 1; - } - - lastnr = nr; - if (signal_test == 1) { - if (steps == 1) { - /* This is a MISSING request */ - steps++; - if (copy_page(uffd, offset)) - signalled++; - } else { - /* This is a WP request */ - assert(steps == 2); - wp_range(uffd, - (__u64)area_dst + - offset, - page_size, false); - } - } else { - signalled++; - continue; - } - } - } - - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) { - fprintf(stderr, - "nr %lu memory corruption %Lu %Lu\n", - nr, count, - count_verify[nr]); - } - /* - * Trigger write protection if there is by writting - * the same value back. - */ - *area_count(area_dst, nr) = count; - } - - if (signal_test) - return signalled != split_nr_pages; - - if (test_type == TEST_HUGETLB) - return 0; - - area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, - MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) - perror("mremap"), exit(1); - - for (; nr < nr_pages; nr++) { - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) { - fprintf(stderr, - "nr %lu memory corruption %Lu %Lu\n", - nr, count, - count_verify[nr]), exit(1); - } - /* - * Trigger write protection if there is by writting - * the same value back. - */ - *area_count(area_dst, nr) = count; - } - - if (uffd_test_ops->release_pages(area_dst)) - return 1; - - for (nr = 0; nr < nr_pages; nr++) { - if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) - fprintf(stderr, "nr %lu is not zero\n", nr), exit(1); - } - - return 0; -} - -static void retry_uffdio_zeropage(int ufd, - struct uffdio_zeropage *uffdio_zeropage, - unsigned long offset) -{ - uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, - uffdio_zeropage->range.len, - offset); - if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { - if (uffdio_zeropage->zeropage != -EEXIST) - fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n", - uffdio_zeropage->zeropage), exit(1); - } else { - fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n", - uffdio_zeropage->zeropage), exit(1); - } -} - -static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) -{ - struct uffdio_zeropage uffdio_zeropage; - int ret; - unsigned long has_zeropage; - - has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); - - if (offset >= nr_pages * page_size) - fprintf(stderr, "unexpected offset %lu\n", - offset), exit(1); - uffdio_zeropage.range.start = (unsigned long) area_dst + offset; - uffdio_zeropage.range.len = page_size; - uffdio_zeropage.mode = 0; - ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); - if (ret) { - /* real retval in ufdio_zeropage.zeropage */ - if (has_zeropage) { - if (uffdio_zeropage.zeropage == -EEXIST) - fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"), - exit(1); - else - fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n", - uffdio_zeropage.zeropage), exit(1); - } else { - if (uffdio_zeropage.zeropage != -EINVAL) - fprintf(stderr, - "UFFDIO_ZEROPAGE not -EINVAL %Ld\n", - uffdio_zeropage.zeropage), exit(1); - } - } else if (has_zeropage) { - if (uffdio_zeropage.zeropage != page_size) { - fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", - uffdio_zeropage.zeropage), exit(1); - } else { - if (test_uffdio_zeropage_eexist && retry) { - test_uffdio_zeropage_eexist = false; - retry_uffdio_zeropage(ufd, &uffdio_zeropage, - offset); - } - return 1; - } - } else { - fprintf(stderr, - "UFFDIO_ZEROPAGE succeeded %Ld\n", - uffdio_zeropage.zeropage), exit(1); - } - - return 0; -} - -static int uffdio_zeropage(int ufd, unsigned long offset) -{ - return __uffdio_zeropage(ufd, offset, false); -} - -/* exercise UFFDIO_ZEROPAGE */ -static int userfaultfd_zeropage_test(void) -{ - struct uffdio_register uffdio_register; - unsigned long expected_ioctls; - - printf("testing UFFDIO_ZEROPAGE: "); - fflush(stdout); - - if (uffd_test_ops->release_pages(area_dst)) - return 1; - - if (userfaultfd_open(0) < 0) - return 1; - uffdio_register.range.start = (unsigned long) area_dst; - uffdio_register.range.len = nr_pages * page_size; - uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; - if (test_uffdio_wp) - uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); - - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); - - if (uffdio_zeropage(uffd, 0)) { - if (my_bcmp(area_dst, zeropage, page_size)) - fprintf(stderr, "zeropage is not zero\n"), exit(1); - } - - close(uffd); - printf("done.\n"); - return 0; -} - -static int userfaultfd_events_test(void) -{ - struct uffdio_register uffdio_register; - unsigned long expected_ioctls; - pthread_t uffd_mon; - int err, features; - pid_t pid; - char c; - struct uffd_stats stats = { 0 }; - - printf("testing events (fork, remap, remove): "); - fflush(stdout); - - if (uffd_test_ops->release_pages(area_dst)) - return 1; - - features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP | - UFFD_FEATURE_EVENT_REMOVE; - if (userfaultfd_open(features) < 0) - return 1; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - - uffdio_register.range.start = (unsigned long) area_dst; - uffdio_register.range.len = nr_pages * page_size; - uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; - if (test_uffdio_wp) - uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); - - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); - - if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) - perror("uffd_poll_thread create"), exit(1); - - pid = fork(); - if (pid < 0) - perror("fork"), exit(1); - - if (!pid) - return faulting_process(0); - - waitpid(pid, &err, 0); - if (err) - fprintf(stderr, "faulting process failed\n"), exit(1); - - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) - perror("pipe write"), exit(1); - if (pthread_join(uffd_mon, NULL)) - return 1; - - close(uffd); - - uffd_stats_report(&stats, 1); - - return stats.missing_faults != nr_pages; -} - -static int userfaultfd_sig_test(void) -{ - struct uffdio_register uffdio_register; - unsigned long expected_ioctls; - unsigned long userfaults; - pthread_t uffd_mon; - int err, features; - pid_t pid; - char c; - struct uffd_stats stats = { 0 }; - - printf("testing signal delivery: "); - fflush(stdout); - - if (uffd_test_ops->release_pages(area_dst)) - return 1; - - features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS; - if (userfaultfd_open(features) < 0) - return 1; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - - uffdio_register.range.start = (unsigned long) area_dst; - uffdio_register.range.len = nr_pages * page_size; - uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; - if (test_uffdio_wp) - uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); - - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); - - if (faulting_process(1)) - fprintf(stderr, "faulting process failed\n"), exit(1); - - if (uffd_test_ops->release_pages(area_dst)) - return 1; - - if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) - perror("uffd_poll_thread create"), exit(1); - - pid = fork(); - if (pid < 0) - perror("fork"), exit(1); - - if (!pid) - exit(faulting_process(2)); - - waitpid(pid, &err, 0); - if (err) - fprintf(stderr, "faulting process failed\n"), exit(1); - - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) - perror("pipe write"), exit(1); - if (pthread_join(uffd_mon, (void **)&userfaults)) - return 1; - - printf("done.\n"); - if (userfaults) - fprintf(stderr, "Signal test failed, userfaults: %ld\n", - userfaults); - close(uffd); - return userfaults != 0; -} - -static int userfaultfd_stress(void) -{ - void *area; - char *tmp_area; - unsigned long nr; - struct uffdio_register uffdio_register; - unsigned long cpu; - int err; - struct uffd_stats uffd_stats[nr_cpus]; - - uffd_test_ops->allocate_area((void **)&area_src); - if (!area_src) - return 1; - uffd_test_ops->allocate_area((void **)&area_dst); - if (!area_dst) - return 1; - - if (userfaultfd_open(0) < 0) - return 1; - - count_verify = malloc(nr_pages * sizeof(unsigned long long)); - if (!count_verify) { - perror("count_verify"); - return 1; - } - - for (nr = 0; nr < nr_pages; nr++) { - *area_mutex(area_src, nr) = (pthread_mutex_t) - PTHREAD_MUTEX_INITIALIZER; - count_verify[nr] = *area_count(area_src, nr) = 1; - /* - * In the transition between 255 to 256, powerpc will - * read out of order in my_bcmp and see both bytes as - * zero, so leave a placeholder below always non-zero - * after the count, to avoid my_bcmp to trigger false - * positives. - */ - *(area_count(area_src, nr) + 1) = 1; - } - - pipefd = malloc(sizeof(int) * nr_cpus * 2); - if (!pipefd) { - perror("pipefd"); - return 1; - } - for (cpu = 0; cpu < nr_cpus; cpu++) { - if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) { - perror("pipe"); - return 1; - } - } - - if (posix_memalign(&area, page_size, page_size)) { - fprintf(stderr, "out of memory\n"); - return 1; - } - zeropage = area; - bzero(zeropage, page_size); - - pthread_mutex_lock(&uffd_read_mutex); - - pthread_attr_init(&attr); - pthread_attr_setstacksize(&attr, 16*1024*1024); - - err = 0; - while (bounces--) { - unsigned long expected_ioctls; - - printf("bounces: %d, mode:", bounces); - if (bounces & BOUNCE_RANDOM) - printf(" rnd"); - if (bounces & BOUNCE_RACINGFAULTS) - printf(" racing"); - if (bounces & BOUNCE_VERIFY) - printf(" ver"); - if (bounces & BOUNCE_POLL) - printf(" poll"); - printf(", "); - fflush(stdout); - - if (bounces & BOUNCE_POLL) - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - else - fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); - - /* register */ - uffdio_register.range.start = (unsigned long) area_dst; - uffdio_register.range.len = nr_pages * page_size; - uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; - if (test_uffdio_wp) - uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { - fprintf(stderr, "register failure\n"); - return 1; - } - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) { - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"); - return 1; - } - - if (area_dst_alias) { - uffdio_register.range.start = (unsigned long) - area_dst_alias; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { - fprintf(stderr, "register failure alias\n"); - return 1; - } - } - - /* - * The madvise done previously isn't enough: some - * uffd_thread could have read userfaults (one of - * those already resolved by the background thread) - * and it may be in the process of calling - * UFFDIO_COPY. UFFDIO_COPY will read the zapped - * area_src and it would map a zero page in it (of - * course such a UFFDIO_COPY is perfectly safe as it'd - * return -EEXIST). The problem comes at the next - * bounce though: that racing UFFDIO_COPY would - * generate zeropages in the area_src, so invalidating - * the previous MADV_DONTNEED. Without this additional - * MADV_DONTNEED those zeropages leftovers in the - * area_src would lead to -EEXIST failure during the - * next bounce, effectively leaving a zeropage in the - * area_dst. - * - * Try to comment this out madvise to see the memory - * corruption being caught pretty quick. - * - * khugepaged is also inhibited to collapse THP after - * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's - * required to MADV_DONTNEED here. - */ - if (uffd_test_ops->release_pages(area_dst)) - return 1; - - uffd_stats_reset(uffd_stats, nr_cpus); - - /* bounce pass */ - if (stress(uffd_stats)) - return 1; - - /* Clear all the write protections if there is any */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst, - nr_pages * page_size, false); - - /* unregister */ - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) { - fprintf(stderr, "unregister failure\n"); - return 1; - } - if (area_dst_alias) { - uffdio_register.range.start = (unsigned long) area_dst; - if (ioctl(uffd, UFFDIO_UNREGISTER, - &uffdio_register.range)) { - fprintf(stderr, "unregister failure alias\n"); - return 1; - } - } - - /* verification */ - if (bounces & BOUNCE_VERIFY) { - for (nr = 0; nr < nr_pages; nr++) { - if (*area_count(area_dst, nr) != count_verify[nr]) { - fprintf(stderr, - "error area_count %Lu %Lu %lu\n", - *area_count(area_src, nr), - count_verify[nr], - nr); - err = 1; - bounces = 0; - } - } - } - - /* prepare next bounce */ - tmp_area = area_src; - area_src = area_dst; - area_dst = tmp_area; - - tmp_area = area_src_alias; - area_src_alias = area_dst_alias; - area_dst_alias = tmp_area; - - uffd_stats_report(uffd_stats, nr_cpus); - } - - if (err) - return err; - - close(uffd); - return userfaultfd_zeropage_test() || userfaultfd_sig_test() - || userfaultfd_events_test(); -} - -/* - * Copied from mlock2-tests.c - */ -unsigned long default_huge_page_size(void) -{ - unsigned long hps = 0; - char *line = NULL; - size_t linelen = 0; - FILE *f = fopen("/proc/meminfo", "r"); - - if (!f) - return 0; - while (getline(&line, &linelen, f) > 0) { - if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { - hps <<= 10; - break; - } - } - - free(line); - fclose(f); - return hps; -} - -static void set_test_type(const char *type) -{ - if (!strcmp(type, "anon")) { - test_type = TEST_ANON; - uffd_test_ops = &anon_uffd_test_ops; - /* Only enable write-protect test for anonymous test */ - test_uffdio_wp = true; - } else if (!strcmp(type, "hugetlb")) { - test_type = TEST_HUGETLB; - uffd_test_ops = &hugetlb_uffd_test_ops; - } else if (!strcmp(type, "hugetlb_shared")) { - map_shared = true; - test_type = TEST_HUGETLB; - uffd_test_ops = &hugetlb_uffd_test_ops; - } else if (!strcmp(type, "shmem")) { - map_shared = true; - test_type = TEST_SHMEM; - uffd_test_ops = &shmem_uffd_test_ops; - } else { - fprintf(stderr, "Unknown test type: %s\n", type), exit(1); - } - - if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); - else - page_size = sysconf(_SC_PAGE_SIZE); - - if (!page_size) - fprintf(stderr, "Unable to determine page size\n"), - exit(2); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) - fprintf(stderr, "Impossible to run this test\n"), exit(2); -} - -static void sigalrm(int sig) -{ - if (sig != SIGALRM) - abort(); - test_uffdio_copy_eexist = true; - test_uffdio_zeropage_eexist = true; - alarm(ALARM_INTERVAL_SECS); -} - -int main(int argc, char **argv) -{ - if (argc < 4) - usage(); - - if (signal(SIGALRM, sigalrm) == SIG_ERR) - fprintf(stderr, "failed to arm SIGALRM"), exit(1); - alarm(ALARM_INTERVAL_SECS); - - set_test_type(argv[1]); - - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size / - nr_cpus; - if (!nr_pages_per_cpu) { - fprintf(stderr, "invalid MiB\n"); - usage(); - } - - bounces = atoi(argv[3]); - if (bounces <= 0) { - fprintf(stderr, "invalid bounces\n"); - usage(); - } - nr_pages = nr_pages_per_cpu * nr_cpus; - - if (test_type == TEST_HUGETLB) { - if (argc < 5) - usage(); - huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); - if (huge_fd < 0) { - fprintf(stderr, "Open of %s failed", argv[3]); - perror("open"); - exit(1); - } - if (ftruncate(huge_fd, 0)) { - fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]); - perror("ftruncate"); - exit(1); - } - } - printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", - nr_pages, nr_pages_per_cpu); - return userfaultfd_stress(); -} - -#else /* __NR_userfaultfd */ - -#warning "missing __NR_userfaultfd definition" - -int main(void) -{ - printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); - return KSFT_SKIP; -} - -#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c deleted file mode 100644 index 83acdff26a13..000000000000 --- a/tools/testing/selftests/vm/va_128TBswitch.c +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * - * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> - * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - */ - -#include <stdio.h> -#include <sys/mman.h> -#include <string.h> - -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - -#ifdef __powerpc64__ -#define PAGE_SIZE (64 << 10) -/* - * This will work with 16M and 2M hugepage size - */ -#define HUGETLB_SIZE (16 << 20) -#else -#define PAGE_SIZE (4 << 10) -#define HUGETLB_SIZE (2 << 20) -#endif - -/* - * >= 128TB is the hint addr value we used to select - * large address space. - */ -#define ADDR_SWITCH_HINT (1UL << 47) -#define LOW_ADDR ((void *) (1UL << 30)) -#define HIGH_ADDR ((void *) (1UL << 48)) - -struct testcase { - void *addr; - unsigned long size; - unsigned long flags; - const char *msg; - unsigned int low_addr_required:1; - unsigned int keep_mapped:1; -}; - -static struct testcase testcases[] = { - { - /* - * If stack is moved, we could possibly allocate - * this at the requested address. - */ - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", - .low_addr_required = 1, - }, - { - /* - * We should never allocate at the requested address or above it - * The len cross the 128TB boundary. Without MAP_FIXED - * we will always search in the lower address space. - */ - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))", - .low_addr_required = 1, - }, - { - /* - * Exact mapping at 128TB, the area is free we should get that - * even without MAP_FIXED. - */ - .addr = ((void *)(ADDR_SWITCH_HINT)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", - }, - { - .addr = NULL, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED)", - }, - { - .addr = (void *) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1)", - .keep_mapped = 1, - }, - { - .addr = (void *) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1) again", - }, - { - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", - .low_addr_required = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = ((void *)(ADDR_SWITCH_HINT)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", - }, -}; - -static struct testcase hugetlb_testcases[] = { - { - .addr = NULL, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", - }, - { - .addr = (void *) -1, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = (void *) -1, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB) again", - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), - .size = 2 * HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)", - }, -}; - -static int run_test(struct testcase *test, int count) -{ - void *p; - int i, ret = 0; - - for (i = 0; i < count; i++) { - struct testcase *t = test + i; - - p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0); - - printf("%s: %p - ", t->msg, p); - - if (p == MAP_FAILED) { - printf("FAILED\n"); - ret = 1; - continue; - } - - if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { - printf("FAILED\n"); - ret = 1; - } else { - /* - * Do a dereference of the address returned so that we catch - * bugs in page fault handling - */ - memset(p, 0, t->size); - printf("OK\n"); - } - if (!t->keep_mapped) - munmap(p, t->size); - } - - return ret; -} - -static int supported_arch(void) -{ -#if defined(__powerpc64__) - return 1; -#elif defined(__x86_64__) - return 1; -#else - return 0; -#endif -} - -int main(int argc, char **argv) -{ - int ret; - - if (!supported_arch()) - return 0; - - ret = run_test(testcases, ARRAY_SIZE(testcases)); - if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) - ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases)); - return ret; -} diff --git a/tools/testing/selftests/vm/virtual_address_range.c b/tools/testing/selftests/vm/virtual_address_range.c deleted file mode 100644 index c0592646ed93..000000000000 --- a/tools/testing/selftests/vm/virtual_address_range.c +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright 2017, Anshuman Khandual, IBM Corp. - * - * Works on architectures which support 128TB virtual - * address range and beyond. - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/mman.h> -#include <sys/time.h> - -/* - * Maximum address range mapped with a single mmap() - * call is little bit more than 16GB. Hence 16GB is - * chosen as the single chunk size for address space - * mapping. - */ -#define MAP_CHUNK_SIZE 17179869184UL /* 16GB */ - -/* - * Address space till 128TB is mapped without any hint - * and is enabled by default. Address space beyond 128TB - * till 512TB is obtained by passing hint address as the - * first argument into mmap() system call. - * - * The process heap address space is divided into two - * different areas one below 128TB and one above 128TB - * till it reaches 512TB. One with size 128TB and the - * other being 384TB. - * - * On Arm64 the address space is 256TB and no high mappings - * are supported so far. - */ - -#define NR_CHUNKS_128TB 8192UL /* Number of 16GB chunks for 128TB */ -#define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL) -#define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL) - -#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */ -#define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */ - -#ifdef __aarch64__ -#define HIGH_ADDR_MARK ADDR_MARK_256TB -#define HIGH_ADDR_SHIFT 49 -#define NR_CHUNKS_LOW NR_CHUNKS_256TB -#define NR_CHUNKS_HIGH 0 -#else -#define HIGH_ADDR_MARK ADDR_MARK_128TB -#define HIGH_ADDR_SHIFT 48 -#define NR_CHUNKS_LOW NR_CHUNKS_128TB -#define NR_CHUNKS_HIGH NR_CHUNKS_384TB -#endif - -static char *hind_addr(void) -{ - int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT); - - return (char *) (1UL << bits); -} - -static int validate_addr(char *ptr, int high_addr) -{ - unsigned long addr = (unsigned long) ptr; - - if (high_addr) { - if (addr < HIGH_ADDR_MARK) { - printf("Bad address %lx\n", addr); - return 1; - } - return 0; - } - - if (addr > HIGH_ADDR_MARK) { - printf("Bad address %lx\n", addr); - return 1; - } - return 0; -} - -static int validate_lower_address_hint(void) -{ - char *ptr; - - ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ | - PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - - if (ptr == MAP_FAILED) - return 0; - - return 1; -} - -int main(int argc, char *argv[]) -{ - char *ptr[NR_CHUNKS_LOW]; - char *hptr[NR_CHUNKS_HIGH]; - char *hint; - unsigned long i, lchunks, hchunks; - - for (i = 0; i < NR_CHUNKS_LOW; i++) { - ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - - if (ptr[i] == MAP_FAILED) { - if (validate_lower_address_hint()) - return 1; - break; - } - - if (validate_addr(ptr[i], 0)) - return 1; - } - lchunks = i; - - for (i = 0; i < NR_CHUNKS_HIGH; i++) { - hint = hind_addr(); - hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - - if (hptr[i] == MAP_FAILED) - break; - - if (validate_addr(hptr[i], 1)) - return 1; - } - hchunks = i; - - for (i = 0; i < lchunks; i++) - munmap(ptr[i], MAP_CHUNK_SIZE); - - for (i = 0; i < hchunks; i++) - munmap(hptr[i], MAP_CHUNK_SIZE); - - return 0; -} |