diff options
Diffstat (limited to 'tools/testing/selftests/proc')
-rw-r--r-- | tools/testing/selftests/proc/.gitignore | 6 | ||||
-rw-r--r-- | tools/testing/selftests/proc/Makefile | 9 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-2-is-kthread.c | 53 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-empty-vm.c | 541 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-loadavg-001.c | 1 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-maps-race.c | 741 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-pid-vm.c | 150 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-self-isnt-kthread.c | 37 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-self-syscall.c | 1 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-subset-pid.c | 121 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-tid0.c | 81 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-uptime-001.c | 25 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-uptime-002.c | 31 | ||||
-rw-r--r-- | tools/testing/selftests/proc/proc-uptime.h | 28 | ||||
-rw-r--r-- | tools/testing/selftests/proc/read.c | 4 |
15 files changed, 1784 insertions, 45 deletions
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index bed4b5318a86..19bb333e2485 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -2,14 +2,20 @@ /fd-001-lookup /fd-002-posix-eq /fd-003-kthread +/proc-2-is-kthread /proc-fsconfig-hidepid /proc-loadavg-001 +/proc-maps-race /proc-multiple-procfs +/proc-empty-vm /proc-pid-vm /proc-self-map-files-001 /proc-self-map-files-002 +/proc-self-isnt-kthread /proc-self-syscall /proc-self-wchan +/proc-subset-pid +/proc-tid0 /proc-uptime-001 /proc-uptime-002 /read diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 8be8a03d2973..50aba102201a 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -1,17 +1,24 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -Wno-unused-function -CFLAGS += -D_GNU_SOURCE +CFLAGS += $(TOOLS_INCLUDES) +LDFLAGS += -pthread TEST_GEN_PROGS := TEST_GEN_PROGS += fd-001-lookup TEST_GEN_PROGS += fd-002-posix-eq TEST_GEN_PROGS += fd-003-kthread +TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 +TEST_GEN_PROGS += proc-maps-race +TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 TEST_GEN_PROGS += proc-self-map-files-002 +TEST_GEN_PROGS += proc-self-isnt-kthread TEST_GEN_PROGS += proc-self-syscall TEST_GEN_PROGS += proc-self-wchan +TEST_GEN_PROGS += proc-subset-pid +TEST_GEN_PROGS += proc-tid0 TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read diff --git a/tools/testing/selftests/proc/proc-2-is-kthread.c b/tools/testing/selftests/proc/proc-2-is-kthread.c new file mode 100644 index 000000000000..f13668fb482e --- /dev/null +++ b/tools/testing/selftests/proc/proc-2-is-kthread.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that kernel thread is reported as such. */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +int main(void) +{ + /* + * The following solutions don't really work: + * + * 1) jit kernel module which creates kernel thread: + * test becomes arch-specific, + * problems with mandatory module signing, + * problems with lockdown mode, + * doesn't work with CONFIG_MODULES=n at all, + * kthread creation API is formally unstable internal kernel API, + * need a mechanism to report test kernel thread's PID back, + * + * 2) ksoftirqd/0 and kswapd0 look like stable enough kernel threads, + * but their PIDs are unstable. + * + * Check against kthreadd which always seem to exist under pid 2. + */ + int fd = open("/proc/2/status", O_RDONLY); + assert(fd >= 0); + + char buf[4096]; + ssize_t rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv < sizeof(buf)); + buf[rv] = '\0'; + + assert(strstr(buf, "Kthread:\t1\n")); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c new file mode 100644 index 000000000000..b3f898aab4ab --- /dev/null +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -0,0 +1,541 @@ +#if defined __amd64__ || defined __i386__ +/* + * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Create a process without mappings by unmapping everything at once and + * holding it with ptrace(2). See what happens to + * + * /proc/${pid}/maps + * /proc/${pid}/numa_maps + * /proc/${pid}/smaps + * /proc/${pid}/smaps_rollup + */ +#undef _GNU_SOURCE +#define _GNU_SOURCE + +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#ifdef __amd64__ +#define TEST_VSYSCALL +#endif + +#if defined __amd64__ + #ifndef SYS_pkey_alloc + #define SYS_pkey_alloc 330 + #endif + #ifndef SYS_pkey_free + #define SYS_pkey_free 331 + #endif +#elif defined __i386__ + #ifndef SYS_pkey_alloc + #define SYS_pkey_alloc 381 + #endif + #ifndef SYS_pkey_free + #define SYS_pkey_free 382 + #endif +#else + #error "SYS_pkey_alloc" +#endif + +static int g_protection_key_support; + +static int protection_key_support(void) +{ + long rv = syscall(SYS_pkey_alloc, 0, 0); + if (rv > 0) { + syscall(SYS_pkey_free, (int)rv); + return 1; + } else if (rv == -1 && errno == ENOSYS) { + return 0; + } else if (rv == -1 && errno == EINVAL) { + // ospke=n + return 0; + } else { + fprintf(stderr, "%s: error: rv %ld, errno %d\n", __func__, rv, errno); + exit(EXIT_FAILURE); + } +} + +/* + * 0: vsyscall VMA doesn't exist vsyscall=none + * 1: vsyscall VMA is --xp vsyscall=xonly + * 2: vsyscall VMA is r-xp vsyscall=emulate + */ +static volatile int g_vsyscall; +static const char *g_proc_pid_maps_vsyscall; +static const char *g_proc_pid_smaps_vsyscall; + +static const char proc_pid_maps_vsyscall_0[] = ""; +static const char proc_pid_maps_vsyscall_1[] = +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; +static const char proc_pid_maps_vsyscall_2[] = +"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; + +static const char proc_pid_smaps_vsyscall_0[] = ""; + +static const char proc_pid_smaps_vsyscall_1[] = +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" +"Size: 4 kB\n" +"KernelPageSize: 4 kB\n" +"MMUPageSize: 4 kB\n" +"Rss: 0 kB\n" +"Pss: 0 kB\n" +"Pss_Dirty: 0 kB\n" +"Shared_Clean: 0 kB\n" +"Shared_Dirty: 0 kB\n" +"Private_Clean: 0 kB\n" +"Private_Dirty: 0 kB\n" +"Referenced: 0 kB\n" +"Anonymous: 0 kB\n" +"KSM: 0 kB\n" +"LazyFree: 0 kB\n" +"AnonHugePages: 0 kB\n" +"ShmemPmdMapped: 0 kB\n" +"FilePmdMapped: 0 kB\n" +"Shared_Hugetlb: 0 kB\n" +"Private_Hugetlb: 0 kB\n" +"Swap: 0 kB\n" +"SwapPss: 0 kB\n" +"Locked: 0 kB\n" +"THPeligible: 0\n" +; + +static const char proc_pid_smaps_vsyscall_2[] = +"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" +"Size: 4 kB\n" +"KernelPageSize: 4 kB\n" +"MMUPageSize: 4 kB\n" +"Rss: 0 kB\n" +"Pss: 0 kB\n" +"Pss_Dirty: 0 kB\n" +"Shared_Clean: 0 kB\n" +"Shared_Dirty: 0 kB\n" +"Private_Clean: 0 kB\n" +"Private_Dirty: 0 kB\n" +"Referenced: 0 kB\n" +"Anonymous: 0 kB\n" +"KSM: 0 kB\n" +"LazyFree: 0 kB\n" +"AnonHugePages: 0 kB\n" +"ShmemPmdMapped: 0 kB\n" +"FilePmdMapped: 0 kB\n" +"Shared_Hugetlb: 0 kB\n" +"Private_Hugetlb: 0 kB\n" +"Swap: 0 kB\n" +"SwapPss: 0 kB\n" +"Locked: 0 kB\n" +"THPeligible: 0\n" +; + +static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) +{ + _exit(EXIT_FAILURE); +} + +#ifdef TEST_VSYSCALL +static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) +{ + _exit(g_vsyscall); +} + +/* + * vsyscall page can't be unmapped, probe it directly. + */ +static void vsyscall(void) +{ + pid_t pid; + int wstatus; + + pid = fork(); + if (pid < 0) { + fprintf(stderr, "fork, errno %d\n", errno); + exit(1); + } + if (pid == 0) { + setrlimit(RLIMIT_CORE, &(struct rlimit){}); + + /* Hide "segfault at ffffffffff600000" messages. */ + struct sigaction act = {}; + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = sigaction_SIGSEGV_vsyscall; + sigaction(SIGSEGV, &act, NULL); + + g_vsyscall = 0; + /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; + asm volatile ( + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" + ); + + g_vsyscall = 1; + *(volatile int *)0xffffffffff600000UL; + + g_vsyscall = 2; + exit(g_vsyscall); + } + waitpid(pid, &wstatus, 0); + if (WIFEXITED(wstatus)) { + g_vsyscall = WEXITSTATUS(wstatus); + } else { + fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus); + exit(1); + } +} +#endif + +static int test_proc_pid_maps(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + perror("open /proc/${pid}/maps"); + return EXIT_FAILURE; + } else { + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + if (g_vsyscall == 0) { + assert(rv == 0); + } else { + size_t len = strlen(g_proc_pid_maps_vsyscall); + assert(rv == len); + assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); + } + return EXIT_SUCCESS; + } +} + +static int test_proc_pid_numa_maps(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) { + /* + * /proc/${pid}/numa_maps is under CONFIG_NUMA, + * it doesn't necessarily exist. + */ + return EXIT_SUCCESS; + } + perror("open /proc/${pid}/numa_maps"); + return EXIT_FAILURE; + } else { + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + assert(rv == 0); + return EXIT_SUCCESS; + } +} + +static int test_proc_pid_smaps(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) { + /* + * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR, + * it doesn't necessarily exist. + */ + return EXIT_SUCCESS; + } + perror("open /proc/${pid}/smaps"); + return EXIT_FAILURE; + } + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + + assert(0 <= rv); + assert(rv <= sizeof(buf)); + + if (g_vsyscall == 0) { + assert(rv == 0); + } else { + size_t len = strlen(g_proc_pid_smaps_vsyscall); + assert(rv > len); + assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0); + + if (g_protection_key_support) { +#define PROTECTION_KEY "ProtectionKey: 0\n" + assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY))); + } + } + + return EXIT_SUCCESS; +} + +static const char g_smaps_rollup[] = +"00000000-00000000 ---p 00000000 00:00 0 [rollup]\n" +"Rss: 0 kB\n" +"Pss: 0 kB\n" +"Pss_Dirty: 0 kB\n" +"Pss_Anon: 0 kB\n" +"Pss_File: 0 kB\n" +"Pss_Shmem: 0 kB\n" +"Shared_Clean: 0 kB\n" +"Shared_Dirty: 0 kB\n" +"Private_Clean: 0 kB\n" +"Private_Dirty: 0 kB\n" +"Referenced: 0 kB\n" +"Anonymous: 0 kB\n" +"KSM: 0 kB\n" +"LazyFree: 0 kB\n" +"AnonHugePages: 0 kB\n" +"ShmemPmdMapped: 0 kB\n" +"FilePmdMapped: 0 kB\n" +"Shared_Hugetlb: 0 kB\n" +"Private_Hugetlb: 0 kB\n" +"Swap: 0 kB\n" +"SwapPss: 0 kB\n" +"Locked: 0 kB\n" +; + +static int test_proc_pid_smaps_rollup(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) { + /* + * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR, + * it doesn't necessarily exist. + */ + return EXIT_SUCCESS; + } + perror("open /proc/${pid}/smaps_rollup"); + return EXIT_FAILURE; + } else { + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + assert(rv == sizeof(g_smaps_rollup) - 1); + assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0); + return EXIT_SUCCESS; + } +} + +static const char *parse_u64(const char *p, const char *const end, uint64_t *rv) +{ + *rv = 0; + for (; p != end; p += 1) { + if ('0' <= *p && *p <= '9') { + assert(!__builtin_mul_overflow(*rv, 10, rv)); + assert(!__builtin_add_overflow(*rv, *p - '0', rv)); + } else { + break; + } + } + assert(p != end); + return p; +} + +/* + * There seems to be 2 types of valid output: + * "0 A A B 0 0 0\n" for dynamic exeuctables, + * "0 0 0 B 0 0 0\n" for static executables. + */ +static int test_proc_pid_statm(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + perror("open /proc/${pid}/statm"); + return EXIT_FAILURE; + } + + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + + assert(rv >= 0); + assert(rv <= sizeof(buf)); + + const char *p = buf; + const char *const end = p + rv; + + /* size */ + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + uint64_t resident; + p = parse_u64(p, end, &resident); + assert(p != end && *p++ == ' '); + + uint64_t shared; + p = parse_u64(p, end, &shared); + assert(p != end && *p++ == ' '); + + uint64_t text; + p = parse_u64(p, end, &text); + assert(p != end && *p++ == ' '); + + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + /* data */ + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == '\n'); + + assert(p == end); + + /* + * "text" is "mm->end_code - mm->start_code" at execve(2) time. + * munmap() doesn't change it. It can be anything (just link + * statically). It can't be 0 because executing to this point + * implies at least 1 page of code. + */ + assert(text > 0); + + /* + * These two are always equal. Always 0 for statically linked + * executables and sometimes 0 for dynamically linked executables. + * There is no way to tell one from another without parsing ELF + * which is too much for this test. + */ + assert(resident == shared); + + return EXIT_SUCCESS; +} + +int main(void) +{ + int rv = EXIT_SUCCESS; + +#ifdef TEST_VSYSCALL + vsyscall(); +#endif + + switch (g_vsyscall) { + case 0: + g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0; + g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0; + break; + case 1: + g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1; + g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1; + break; + case 2: + g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2; + g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2; + break; + default: + abort(); + } + + g_protection_key_support = protection_key_support(); + + pid_t pid = fork(); + if (pid == -1) { + perror("fork"); + return EXIT_FAILURE; + } else if (pid == 0) { + rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL); + if (rv != 0) { + if (errno == EPERM) { + fprintf(stderr, +"Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n" + ); + kill(getppid(), SIGTERM); + return EXIT_FAILURE; + } + perror("ptrace PTRACE_TRACEME"); + return EXIT_FAILURE; + } + + /* + * Hide "segfault at ..." messages. Signal handler won't run. + */ + struct sigaction act = {}; + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = sigaction_SIGSEGV; + sigaction(SIGSEGV, &act, NULL); + +#ifdef __amd64__ + munmap(NULL, ((size_t)1 << 47) - 4096); +#elif defined __i386__ + { + size_t len; + + for (len = -4096;; len -= 4096) { + munmap(NULL, len); + } + } +#else +#error "implement 'unmap everything'" +#endif + return EXIT_FAILURE; + } else { + /* + * TODO find reliable way to signal parent that munmap(2) completed. + * Child can't do it directly because it effectively doesn't exist + * anymore. Looking at child's VM files isn't 100% reliable either: + * due to a bug they may not become empty or empty-like. + */ + sleep(1); + + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_maps(pid); + } + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_numa_maps(pid); + } + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_smaps(pid); + } + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_smaps_rollup(pid); + } + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_statm(pid); + } + + /* Cut the rope. */ + int wstatus; + waitpid(pid, &wstatus, 0); + assert(WIFSTOPPED(wstatus)); + assert(WSTOPSIG(wstatus) == SIGSEGV); + } + + return rv; +} +#else +int main(void) +{ + return 4; +} +#endif diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index 471e2aa28077..fb4fe9188806 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include <errno.h> #include <sched.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c new file mode 100644 index 000000000000..94bba4553130 --- /dev/null +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022 Google LLC. + * Author: Suren Baghdasaryan <surenb@google.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Fork a child that concurrently modifies address space while the main + * process is reading /proc/$PID/maps and verifying the results. Address + * space modifications include: + * VMA splitting and merging + * + */ +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> + +/* /proc/pid/maps parsing routines */ +struct page_content { + char *data; + ssize_t size; +}; + +#define LINE_MAX_SIZE 256 + +struct line_content { + char text[LINE_MAX_SIZE]; + unsigned long start_addr; + unsigned long end_addr; +}; + +enum test_state { + INIT, + CHILD_READY, + PARENT_READY, + SETUP_READY, + SETUP_MODIFY_MAPS, + SETUP_MAPS_MODIFIED, + SETUP_RESTORE_MAPS, + SETUP_MAPS_RESTORED, + TEST_READY, + TEST_DONE, +}; + +struct vma_modifier_info; + +FIXTURE(proc_maps_race) +{ + struct vma_modifier_info *mod_info; + struct page_content page1; + struct page_content page2; + struct line_content last_line; + struct line_content first_line; + unsigned long duration_sec; + int shared_mem_size; + int page_size; + int vma_count; + bool verbose; + int maps_fd; + pid_t pid; +}; + +typedef bool (*vma_modifier_op)(FIXTURE_DATA(proc_maps_race) *self); +typedef bool (*vma_mod_result_check_op)(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line); + +struct vma_modifier_info { + int vma_count; + void *addr; + int prot; + void *next_addr; + vma_modifier_op vma_modify; + vma_modifier_op vma_restore; + vma_mod_result_check_op vma_mod_check; + pthread_mutex_t sync_lock; + pthread_cond_t sync_cond; + enum test_state curr_state; + bool exit; + void *child_mapped_addr[]; +}; + + +static bool read_two_pages(FIXTURE_DATA(proc_maps_race) *self) +{ + ssize_t bytes_read; + + if (lseek(self->maps_fd, 0, SEEK_SET) < 0) + return false; + + bytes_read = read(self->maps_fd, self->page1.data, self->page_size); + if (bytes_read <= 0) + return false; + + self->page1.size = bytes_read; + + bytes_read = read(self->maps_fd, self->page2.data, self->page_size); + if (bytes_read <= 0) + return false; + + self->page2.size = bytes_read; + + return true; +} + +static void copy_first_line(struct page_content *page, char *first_line) +{ + char *pos = strchr(page->data, '\n'); + + strncpy(first_line, page->data, pos - page->data); + first_line[pos - page->data] = '\0'; +} + +static void copy_last_line(struct page_content *page, char *last_line) +{ + /* Get the last line in the first page */ + const char *end = page->data + page->size - 1; + /* skip last newline */ + const char *pos = end - 1; + + /* search previous newline */ + while (pos[-1] != '\n') + pos--; + strncpy(last_line, pos, end - pos); + last_line[end - pos] = '\0'; +} + +/* Read the last line of the first page and the first line of the second page */ +static bool read_boundary_lines(FIXTURE_DATA(proc_maps_race) *self, + struct line_content *last_line, + struct line_content *first_line) +{ + if (!read_two_pages(self)) + return false; + + copy_last_line(&self->page1, last_line->text); + copy_first_line(&self->page2, first_line->text); + + return sscanf(last_line->text, "%lx-%lx", &last_line->start_addr, + &last_line->end_addr) == 2 && + sscanf(first_line->text, "%lx-%lx", &first_line->start_addr, + &first_line->end_addr) == 2; +} + +/* Thread synchronization routines */ +static void wait_for_state(struct vma_modifier_info *mod_info, enum test_state state) +{ + pthread_mutex_lock(&mod_info->sync_lock); + while (mod_info->curr_state != state) + pthread_cond_wait(&mod_info->sync_cond, &mod_info->sync_lock); + pthread_mutex_unlock(&mod_info->sync_lock); +} + +static void signal_state(struct vma_modifier_info *mod_info, enum test_state state) +{ + pthread_mutex_lock(&mod_info->sync_lock); + mod_info->curr_state = state; + pthread_cond_signal(&mod_info->sync_cond); + pthread_mutex_unlock(&mod_info->sync_lock); +} + +static void stop_vma_modifier(struct vma_modifier_info *mod_info) +{ + wait_for_state(mod_info, SETUP_READY); + mod_info->exit = true; + signal_state(mod_info, SETUP_MODIFY_MAPS); +} + +static void print_first_lines(char *text, int nr) +{ + const char *end = text; + + while (nr && (end = strchr(end, '\n')) != NULL) { + nr--; + end++; + } + + if (end) { + int offs = end - text; + + text[offs] = '\0'; + printf("%s", text); + text[offs] = '\n'; + printf("\n"); + } else { + printf("%s", text); + } +} + +static void print_last_lines(char *text, int nr) +{ + const char *start = text + strlen(text); + + nr++; /* to ignore the last newline */ + while (nr) { + while (start > text && *start != '\n') + start--; + nr--; + start--; + } + printf("%s", start); +} + +static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self) +{ + if (!self->verbose) + return; + + printf("%s", title); + /* Print 3 boundary lines from each page */ + print_last_lines(self->page1.data, 3); + printf("-----------------page boundary-----------------\n"); + print_first_lines(self->page2.data, 3); +} + +static bool print_boundaries_on(bool condition, const char *title, + FIXTURE_DATA(proc_maps_race) *self) +{ + if (self->verbose && condition) + print_boundaries(title, self); + + return condition; +} + +static void report_test_start(const char *name, bool verbose) +{ + if (verbose) + printf("==== %s ====\n", name); +} + +static struct timespec print_ts; + +static void start_test_loop(struct timespec *ts, bool verbose) +{ + if (verbose) + print_ts.tv_sec = ts->tv_sec; +} + +static void end_test_iteration(struct timespec *ts, bool verbose) +{ + if (!verbose) + return; + + /* Update every second */ + if (print_ts.tv_sec == ts->tv_sec) + return; + + printf("."); + fflush(stdout); + print_ts.tv_sec = ts->tv_sec; +} + +static void end_test_loop(bool verbose) +{ + if (verbose) + printf("\n"); +} + +static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, + struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + print_boundaries("Before modification", self); + + signal_state(self->mod_info, SETUP_MODIFY_MAPS); + wait_for_state(self->mod_info, SETUP_MAPS_MODIFIED); + + /* Copy last line of the first page and first line of the last page */ + if (!read_boundary_lines(self, mod_last_line, mod_first_line)) + return false; + + print_boundaries("After modification", self); + + signal_state(self->mod_info, SETUP_RESTORE_MAPS); + wait_for_state(self->mod_info, SETUP_MAPS_RESTORED); + + /* Copy last line of the first page and first line of the last page */ + if (!read_boundary_lines(self, restored_last_line, restored_first_line)) + return false; + + print_boundaries("After restore", self); + + if (!self->mod_info->vma_mod_check(mod_last_line, mod_first_line, + restored_last_line, restored_first_line)) + return false; + + /* + * The content of these lines after modify+resore should be the same + * as the original. + */ + return strcmp(restored_last_line->text, self->last_line.text) == 0 && + strcmp(restored_first_line->text, self->first_line.text) == 0; +} + +static inline bool split_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED; +} + +static inline bool merge_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED; +} + +static inline bool check_split_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure vmas at the boundaries are changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) != 0; +} + +static inline bool shrink_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mremap(self->mod_info->addr, self->page_size * 3, + self->page_size, 0) != MAP_FAILED; +} + +static inline bool expand_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mremap(self->mod_info->addr, self->page_size, + self->page_size * 3, 0) != MAP_FAILED; +} + +static inline bool check_shrink_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure only the last vma of the first page is changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) == 0; +} + +static inline bool remap_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + /* + * Remap the last page of the next vma into the middle of the vma. + * This splits the current vma and the first and middle parts (the + * parts at lower addresses) become the last vma objserved in the + * first page and the first vma observed in the last page. + */ + return mremap(self->mod_info->next_addr + self->page_size * 2, self->page_size, + self->page_size, MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP, + self->mod_info->addr + self->page_size) != MAP_FAILED; +} + +static inline bool patch_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mprotect(self->mod_info->addr + self->page_size, self->page_size, + self->mod_info->prot) == 0; +} + +static inline bool check_remap_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure vmas at the boundaries are changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) != 0; +} + +FIXTURE_SETUP(proc_maps_race) +{ + const char *verbose = getenv("VERBOSE"); + const char *duration = getenv("DURATION"); + struct vma_modifier_info *mod_info; + pthread_mutexattr_t mutex_attr; + pthread_condattr_t cond_attr; + unsigned long duration_sec; + char fname[32]; + + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + self->verbose = verbose && !strncmp(verbose, "1", 1); + duration_sec = duration ? atol(duration) : 0; + self->duration_sec = duration_sec ? duration_sec : 5UL; + + /* + * Have to map enough vmas for /proc/pid/maps to contain more than one + * page worth of vmas. Assume at least 32 bytes per line in maps output + */ + self->vma_count = self->page_size / 32 + 1; + self->shared_mem_size = sizeof(struct vma_modifier_info) + self->vma_count * sizeof(void *); + + /* map shared memory for communication with the child process */ + self->mod_info = (struct vma_modifier_info *)mmap(NULL, self->shared_mem_size, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(self->mod_info, MAP_FAILED); + mod_info = self->mod_info; + + /* Initialize shared members */ + pthread_mutexattr_init(&mutex_attr); + pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED); + ASSERT_EQ(pthread_mutex_init(&mod_info->sync_lock, &mutex_attr), 0); + pthread_condattr_init(&cond_attr); + pthread_condattr_setpshared(&cond_attr, PTHREAD_PROCESS_SHARED); + ASSERT_EQ(pthread_cond_init(&mod_info->sync_cond, &cond_attr), 0); + mod_info->vma_count = self->vma_count; + mod_info->curr_state = INIT; + mod_info->exit = false; + + self->pid = fork(); + if (!self->pid) { + /* Child process modifying the address space */ + int prot = PROT_READ | PROT_WRITE; + int i; + + for (i = 0; i < mod_info->vma_count; i++) { + mod_info->child_mapped_addr[i] = mmap(NULL, self->page_size * 3, prot, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(mod_info->child_mapped_addr[i], MAP_FAILED); + /* change protection in adjacent maps to prevent merging */ + prot ^= PROT_WRITE; + } + signal_state(mod_info, CHILD_READY); + wait_for_state(mod_info, PARENT_READY); + while (true) { + signal_state(mod_info, SETUP_READY); + wait_for_state(mod_info, SETUP_MODIFY_MAPS); + if (mod_info->exit) + break; + + ASSERT_TRUE(mod_info->vma_modify(self)); + signal_state(mod_info, SETUP_MAPS_MODIFIED); + wait_for_state(mod_info, SETUP_RESTORE_MAPS); + ASSERT_TRUE(mod_info->vma_restore(self)); + signal_state(mod_info, SETUP_MAPS_RESTORED); + + wait_for_state(mod_info, TEST_READY); + while (mod_info->curr_state != TEST_DONE) { + ASSERT_TRUE(mod_info->vma_modify(self)); + ASSERT_TRUE(mod_info->vma_restore(self)); + } + } + for (i = 0; i < mod_info->vma_count; i++) + munmap(mod_info->child_mapped_addr[i], self->page_size * 3); + + exit(0); + } + + sprintf(fname, "/proc/%d/maps", self->pid); + self->maps_fd = open(fname, O_RDONLY); + ASSERT_NE(self->maps_fd, -1); + + /* Wait for the child to map the VMAs */ + wait_for_state(mod_info, CHILD_READY); + + /* Read first two pages */ + self->page1.data = malloc(self->page_size); + ASSERT_NE(self->page1.data, NULL); + self->page2.data = malloc(self->page_size); + ASSERT_NE(self->page2.data, NULL); + + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + /* + * Find the addresses corresponding to the last line in the first page + * and the first line in the last page. + */ + mod_info->addr = NULL; + mod_info->next_addr = NULL; + for (int i = 0; i < mod_info->vma_count; i++) { + if (mod_info->child_mapped_addr[i] == (void *)self->last_line.start_addr) { + mod_info->addr = mod_info->child_mapped_addr[i]; + mod_info->prot = PROT_READ; + /* Even VMAs have write permission */ + if ((i % 2) == 0) + mod_info->prot |= PROT_WRITE; + } else if (mod_info->child_mapped_addr[i] == (void *)self->first_line.start_addr) { + mod_info->next_addr = mod_info->child_mapped_addr[i]; + } + + if (mod_info->addr && mod_info->next_addr) + break; + } + ASSERT_TRUE(mod_info->addr && mod_info->next_addr); + + signal_state(mod_info, PARENT_READY); + +} + +FIXTURE_TEARDOWN(proc_maps_race) +{ + int status; + + stop_vma_modifier(self->mod_info); + + free(self->page2.data); + free(self->page1.data); + + for (int i = 0; i < self->vma_count; i++) + munmap(self->mod_info->child_mapped_addr[i], self->page_size); + close(self->maps_fd); + waitpid(self->pid, &status, 0); + munmap(self->mod_info, self->shared_mem_size); +} + +TEST_F(proc_maps_race, test_maps_tearing_from_split) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content split_last_line; + struct line_content split_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = split_vma; + mod_info->vma_restore = merge_vma; + mod_info->vma_mod_check = check_split_result; + + report_test_start("Tearing from split", self->verbose); + ASSERT_TRUE(capture_mod_pattern(self, &split_last_line, &split_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); + do { + bool last_line_changed; + bool first_line_changed; + + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after split */ + if (!strcmp(new_last_line.text, split_last_line.text)) { + /* + * The vmas should be consistent with split results, + * however if vma was concurrently restored after a + * split, it can be reported twice (first the original + * split one, then the same vma but extended after the + * merge) because we found it as the next vma again. + * In that case new first line will be the same as the + * last restored line. + */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, split_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Split result invalid", self)); + } else { + /* The vmas should be consistent with merge results */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Merge result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Merge result invalid", self)); + } + /* + * First and last lines should change in unison. If the last + * line changed then the first line should change as well and + * vice versa. + */ + last_line_changed = strcmp(new_last_line.text, self->last_line.text) != 0; + first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0; + ASSERT_EQ(last_line_changed, first_line_changed); + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + +TEST_F(proc_maps_race, test_maps_tearing_from_resize) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content shrunk_last_line; + struct line_content shrunk_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = shrink_vma; + mod_info->vma_restore = expand_vma; + mod_info->vma_mod_check = check_shrink_result; + + report_test_start("Tearing from resize", self->verbose); + ASSERT_TRUE(capture_mod_pattern(self, &shrunk_last_line, &shrunk_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); + do { + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after shrinking it */ + if (!strcmp(new_last_line.text, shrunk_last_line.text)) { + /* + * The vmas should be consistent with shrunk results, + * however if the vma was concurrently restored, it + * can be reported twice (first as shrunk one, then + * as restored one) because we found it as the next vma + * again. In that case new first line will be the same + * as the last restored line. + */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, shrunk_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Shrink result invalid", self)); + } else { + /* The vmas should be consistent with the original/resored state */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Expand result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Expand result invalid", self)); + } + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + +TEST_F(proc_maps_race, test_maps_tearing_from_remap) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content remapped_last_line; + struct line_content remapped_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = remap_vma; + mod_info->vma_restore = patch_vma; + mod_info->vma_mod_check = check_remap_result; + + report_test_start("Tearing from remap", self->verbose); + ASSERT_TRUE(capture_mod_pattern(self, &remapped_last_line, &remapped_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); + do { + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after remapping it */ + if (!strcmp(new_last_line.text, remapped_last_line.text)) { + /* + * The vmas should be consistent with remap results, + * however if the vma was concurrently restored, it + * can be reported twice (first as split one, then + * as restored one) because we found it as the next vma + * again. In that case new first line will be the same + * as the last restored line. + */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, remapped_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Remap result invalid", self)); + } else { + /* The vmas should be consistent with the original/resored state */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Remap restore result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Remap restore result invalid", self)); + } + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 18a3bde8bc96..d04685771952 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -45,6 +45,9 @@ #include <linux/kdev_t.h> #include <sys/time.h> #include <sys/resource.h> +#include <linux/fs.h> + +#include "../kselftest.h" static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) { @@ -209,19 +212,28 @@ static int make_exe(const uint8_t *payload, size_t len) } #endif -static bool g_vsyscall = false; +/* + * 0: vsyscall VMA doesn't exist vsyscall=none + * 1: vsyscall VMA is --xp vsyscall=xonly + * 2: vsyscall VMA is r-xp vsyscall=emulate + */ +static volatile int g_vsyscall; +static const char *str_vsyscall; -static const char str_vsyscall[] = +static const char str_vsyscall_0[] = ""; +static const char str_vsyscall_1[] = +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; +static const char str_vsyscall_2[] = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; #ifdef __x86_64__ static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) { - _exit(1); + _exit(g_vsyscall); } /* - * vsyscall page can't be unmapped, probe it with memory load. + * vsyscall page can't be unmapped, probe it directly. */ static void vsyscall(void) { @@ -244,12 +256,28 @@ static void vsyscall(void) act.sa_sigaction = sigaction_SIGSEGV; (void)sigaction(SIGSEGV, &act, NULL); + g_vsyscall = 0; + /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; + asm volatile ( + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" + ); + + g_vsyscall = 1; *(volatile int *)0xffffffffff600000UL; - exit(0); + + g_vsyscall = 2; + exit(g_vsyscall); } waitpid(pid, &wstatus, 0); - if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { - g_vsyscall = true; + if (WIFEXITED(wstatus)) { + g_vsyscall = WEXITSTATUS(wstatus); + } else { + fprintf(stderr, "error: wstatus %08x\n", wstatus); + exit(1); } } @@ -259,6 +287,19 @@ int main(void) int exec_fd; vsyscall(); + switch (g_vsyscall) { + case 0: + str_vsyscall = str_vsyscall_0; + break; + case 1: + str_vsyscall = str_vsyscall_1; + break; + case 2: + str_vsyscall = str_vsyscall_2; + break; + default: + abort(); + } atexit(ate); @@ -312,7 +353,7 @@ int main(void) /* Test /proc/$PID/maps */ { - const size_t len = strlen(buf0) + (g_vsyscall ? strlen(str_vsyscall) : 0); + const size_t len = strlen(buf0) + strlen(str_vsyscall); char buf[256]; ssize_t rv; int fd; @@ -325,7 +366,7 @@ int main(void) rv = read(fd, buf, sizeof(buf)); assert(rv == len); assert(memcmp(buf, buf0, strlen(buf0)) == 0); - if (g_vsyscall) { + if (g_vsyscall > 0) { assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); } } @@ -368,11 +409,11 @@ int main(void) }; int i; - for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + for (i = 0; i < ARRAY_SIZE(S); i++) { assert(memmem(buf, rv, S[i], strlen(S[i]))); } - if (g_vsyscall) { + if (g_vsyscall > 0) { assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); } } @@ -417,7 +458,7 @@ int main(void) }; int i; - for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + for (i = 0; i < ARRAY_SIZE(S); i++) { assert(memmem(buf, rv, S[i], strlen(S[i]))); } } @@ -452,6 +493,91 @@ int main(void) assert(buf[13] == '\n'); } + /* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */ + { + char path_buf[256], exp_path_buf[256]; + struct procmap_query q; + int fd, err; + + snprintf(path_buf, sizeof(path_buf), "/proc/%u/maps", pid); + fd = open(path_buf, O_RDONLY); + if (fd == -1) + return 1; + + /* CASE 1: exact MATCH at VADDR */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR; + q.query_flags = 0; + q.vma_name_addr = (__u64)(unsigned long)path_buf; + q.vma_name_size = sizeof(path_buf); + + err = ioctl(fd, PROCMAP_QUERY, &q); + assert(err == 0); + + assert(q.query_addr == VADDR); + assert(q.query_flags == 0); + + assert(q.vma_flags == (PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_EXECUTABLE)); + assert(q.vma_start == VADDR); + assert(q.vma_end == VADDR + PAGE_SIZE); + assert(q.vma_page_size == PAGE_SIZE); + + assert(q.vma_offset == 0); + assert(q.inode == st.st_ino); + assert(q.dev_major == MAJOR(st.st_dev)); + assert(q.dev_minor == MINOR(st.st_dev)); + + snprintf(exp_path_buf, sizeof(exp_path_buf), + "/tmp/#%llu (deleted)", (unsigned long long)st.st_ino); + assert(q.vma_name_size == strlen(exp_path_buf) + 1); + assert(strcmp(path_buf, exp_path_buf) == 0); + + /* CASE 2: NO MATCH at VADDR-1 */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR - 1; + q.query_flags = 0; /* exact match */ + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + + /* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR - 1; + q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; + + err = ioctl(fd, PROCMAP_QUERY, &q); + assert(err == 0); + + assert(q.query_addr == VADDR - 1); + assert(q.query_flags == PROCMAP_QUERY_COVERING_OR_NEXT_VMA); + assert(q.vma_start == VADDR); + assert(q.vma_end == VADDR + PAGE_SIZE); + + /* CASE 4: NO MATCH at VADDR + PAGE_SIZE */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR + PAGE_SIZE; /* point right after the VMA */ + q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + + /* CASE 5: NO MATCH WRITABLE at VADDR */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR; + q.query_flags = PROCMAP_QUERY_VMA_WRITABLE; + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + } + return 0; } #else diff --git a/tools/testing/selftests/proc/proc-self-isnt-kthread.c b/tools/testing/selftests/proc/proc-self-isnt-kthread.c new file mode 100644 index 000000000000..e01f4e0a91b4 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-isnt-kthread.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that userspace program is not kernel thread. */ +#undef NDEBUG +#include <assert.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +int main(void) +{ + int fd = open("/proc/self/status", O_RDONLY); + assert(fd >= 0); + + char buf[4096]; + ssize_t rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv < sizeof(buf)); + buf[rv] = '\0'; + + /* This test is very much not kernel thread. */ + assert(strstr(buf, "Kthread:\t0\n")); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 9f6d000c0245..8511dcfe67c7 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-subset-pid.c b/tools/testing/selftests/proc/proc-subset-pid.c new file mode 100644 index 000000000000..d1052bcab039 --- /dev/null +++ b/tools/testing/selftests/proc/proc-subset-pid.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2021 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that "mount -t proc -o subset=pid" hides everything but pids, + * /proc/self and /proc/thread-self. + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> +#include <stdio.h> + +static inline bool streq(const char *a, const char *b) +{ + return strcmp(a, b) == 0; +} + +static void make_private_proc(void) +{ + if (unshare(CLONE_NEWNS) == -1) { + if (errno == ENOSYS || errno == EPERM) { + exit(4); + } + exit(1); + } + if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { + exit(1); + } + if (mount(NULL, "/proc", "proc", 0, "subset=pid") == -1) { + exit(1); + } +} + +static bool string_is_pid(const char *s) +{ + while (1) { + switch (*s++) { + case '0':case '1':case '2':case '3':case '4': + case '5':case '6':case '7':case '8':case '9': + continue; + + case '\0': + return true; + + default: + return false; + } + } +} + +int main(void) +{ + make_private_proc(); + + DIR *d = opendir("/proc"); + assert(d); + + struct dirent *de; + + bool dot = false; + bool dot_dot = false; + bool self = false; + bool thread_self = false; + + while ((de = readdir(d))) { + if (streq(de->d_name, ".")) { + assert(!dot); + dot = true; + assert(de->d_type == DT_DIR); + } else if (streq(de->d_name, "..")) { + assert(!dot_dot); + dot_dot = true; + assert(de->d_type == DT_DIR); + } else if (streq(de->d_name, "self")) { + assert(!self); + self = true; + assert(de->d_type == DT_LNK); + } else if (streq(de->d_name, "thread-self")) { + assert(!thread_self); + thread_self = true; + assert(de->d_type == DT_LNK); + } else { + if (!string_is_pid(de->d_name)) { + fprintf(stderr, "d_name '%s'\n", de->d_name); + assert(0); + } + assert(de->d_type == DT_DIR); + } + } + + char c; + int rv = readlink("/proc/cpuinfo", &c, 1); + assert(rv == -1 && errno == ENOENT); + + int fd = open("/proc/cpuinfo", O_RDONLY); + assert(fd == -1 && errno == ENOENT); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-tid0.c b/tools/testing/selftests/proc/proc-tid0.c new file mode 100644 index 000000000000..58c1d7c90a8e --- /dev/null +++ b/tools/testing/selftests/proc/proc-tid0.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that /proc/*/task never contains "0". +#include <sys/types.h> +#include <dirent.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> + +static pid_t pid = -1; + +static void atexit_hook(void) +{ + if (pid > 0) { + kill(pid, SIGKILL); + } +} + +static void *f(void *_) +{ + return NULL; +} + +static void sigalrm(int _) +{ + exit(0); +} + +int main(void) +{ + pid = fork(); + if (pid == 0) { + /* child */ + while (1) { + pthread_t pth; + pthread_create(&pth, NULL, f, NULL); + pthread_join(pth, NULL); + } + } else if (pid > 0) { + /* parent */ + atexit(atexit_hook); + + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/task", pid); + + signal(SIGALRM, sigalrm); + alarm(1); + + while (1) { + DIR *d = opendir(buf); + struct dirent *de; + while ((de = readdir(d))) { + if (strcmp(de->d_name, "0") == 0) { + exit(1); + } + } + closedir(d); + } + + return 0; + } else { + perror("fork"); + return 1; + } +} diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c index 781f7a50fc3f..f335eec5067e 100644 --- a/tools/testing/selftests/proc/proc-uptime-001.c +++ b/tools/testing/selftests/proc/proc-uptime-001.c @@ -13,7 +13,9 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -// Test that values in /proc/uptime increment monotonically. +// Test that boottime value in /proc/uptime and CLOCK_BOOTTIME increment +// monotonically. We don't test idle time monotonicity due to broken iowait +// task counting, cf: comment above get_cpu_idle_time_us() #undef NDEBUG #include <assert.h> #include <stdint.h> @@ -25,20 +27,31 @@ int main(void) { - uint64_t start, u0, u1, i0, i1; + uint64_t start, u0, u1, c0, c1; int fd; fd = open("/proc/uptime", O_RDONLY); assert(fd >= 0); - proc_uptime(fd, &u0, &i0); + u0 = proc_uptime(fd); start = u0; + c0 = clock_boottime(); + do { - proc_uptime(fd, &u1, &i1); + u1 = proc_uptime(fd); + c1 = clock_boottime(); + + /* Is /proc/uptime monotonic ? */ assert(u1 >= u0); - assert(i1 >= i0); + + /* Is CLOCK_BOOTTIME monotonic ? */ + assert(c1 >= c0); + + /* Is CLOCK_BOOTTIME VS /proc/uptime monotonic ? */ + assert(c0 >= u0); + u0 = u1; - i0 = i1; + c0 = c1; } while (u1 - start < 100); return 0; diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..ae453daa96c1 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -13,11 +13,13 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -// Test that values in /proc/uptime increment monotonically -// while shifting across CPUs. -#define _GNU_SOURCE +// Test that boottime value in /proc/uptime and CLOCK_BOOTTIME increment +// monotonically while shifting across CPUs. We don't test idle time +// monotonicity due to broken iowait task counting, cf: comment above +// get_cpu_idle_time_us() #undef NDEBUG #include <assert.h> +#include <errno.h> #include <unistd.h> #include <sys/syscall.h> #include <stdlib.h> @@ -42,10 +44,10 @@ static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned lo int main(void) { + uint64_t u0, u1, c0, c1; unsigned int len; unsigned long *m; unsigned int cpu; - uint64_t u0, u1, i0, i1; int fd; /* find out "nr_cpu_ids" */ @@ -55,12 +57,14 @@ int main(void) len += sizeof(unsigned long); free(m); m = malloc(len); - } while (sys_sched_getaffinity(0, len, m) == -EINVAL); + } while (sys_sched_getaffinity(0, len, m) == -1 && errno == EINVAL); fd = open("/proc/uptime", O_RDONLY); assert(fd >= 0); - proc_uptime(fd, &u0, &i0); + u0 = proc_uptime(fd); + c0 = clock_boottime(); + for (cpu = 0; cpu < len * 8; cpu++) { memset(m, 0, len); m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long))); @@ -68,11 +72,20 @@ int main(void) /* CPU might not exist, ignore error */ sys_sched_setaffinity(0, len, m); - proc_uptime(fd, &u1, &i1); + u1 = proc_uptime(fd); + c1 = clock_boottime(); + + /* Is /proc/uptime monotonic ? */ assert(u1 >= u0); - assert(i1 >= i0); + + /* Is CLOCK_BOOTTIME monotonic ? */ + assert(c1 >= c0); + + /* Is CLOCK_BOOTTIME VS /proc/uptime monotonic ? */ + assert(c0 >= u0); + u0 = u1; - i0 = i1; + c0 = c1; } return 0; diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h index dc6a42b1d6b0..730cce4a3d73 100644 --- a/tools/testing/selftests/proc/proc-uptime.h +++ b/tools/testing/selftests/proc/proc-uptime.h @@ -19,10 +19,22 @@ #include <string.h> #include <stdlib.h> #include <unistd.h> +#include <time.h> #include "proc.h" -static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) +static uint64_t clock_boottime(void) +{ + struct timespec ts; + int err; + + err = clock_gettime(CLOCK_BOOTTIME, &ts); + assert(err >= 0); + + return (ts.tv_sec * 100) + (ts.tv_nsec / 10000000); +} + +static uint64_t proc_uptime(int fd) { uint64_t val1, val2; char buf[64], *p; @@ -43,18 +55,6 @@ static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) assert(p[3] == ' '); val2 = (p[1] - '0') * 10 + p[2] - '0'; - *uptime = val1 * 100 + val2; - - p += 4; - - val1 = xstrtoull(p, &p); - assert(p[0] == '.'); - assert('0' <= p[1] && p[1] <= '9'); - assert('0' <= p[2] && p[2] <= '9'); - assert(p[3] == '\n'); - - val2 = (p[1] - '0') * 10 + p[2] - '0'; - *idle = val1 * 100 + val2; - assert(p + 4 == buf + rv); + return val1 * 100 + val2; } diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c index b3ef9e14d6cc..35ee78dff144 100644 --- a/tools/testing/selftests/proc/read.c +++ b/tools/testing/selftests/proc/read.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ // Test -// 1) read of every file in /proc +// 1) read and lseek on every file in /proc // 2) readlink of every symlink in /proc // 3) recursively (1) + (2) for every directory in /proc // 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs @@ -45,6 +45,8 @@ static void f_reg(DIR *d, const char *filename) fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK); if (fd == -1) return; + /* struct proc_ops::proc_lseek is mandatory if file is seekable. */ + (void)lseek(fd, 0, SEEK_SET); rv = read(fd, buf, sizeof(buf)); assert((0 <= rv && rv <= sizeof(buf)) || rv == -1); close(fd); |