diff options
Diffstat (limited to '')
-rw-r--r-- | tools/testing/selftests/core/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/core/Makefile | 4 | ||||
-rw-r--r-- | tools/testing/selftests/core/close_range_test.c | 469 | ||||
-rw-r--r-- | tools/testing/selftests/core/unshare_test.c | 94 | ||||
-rw-r--r-- | tools/testing/selftests/coredump/Makefile | 7 | ||||
-rw-r--r-- | tools/testing/selftests/coredump/README.rst | 50 | ||||
-rwxr-xr-x | tools/testing/selftests/coredump/stackdump | 14 | ||||
-rw-r--r-- | tools/testing/selftests/coredump/stackdump_test.c | 622 |
8 files changed, 1244 insertions, 17 deletions
diff --git a/tools/testing/selftests/core/.gitignore b/tools/testing/selftests/core/.gitignore index 6e6712ce5817..7999361992aa 100644 --- a/tools/testing/selftests/core/.gitignore +++ b/tools/testing/selftests/core/.gitignore @@ -1 +1,2 @@ close_range_test +unshare_test diff --git a/tools/testing/selftests/core/Makefile b/tools/testing/selftests/core/Makefile index f6f2d6f473c6..8e99f87f5d7c 100644 --- a/tools/testing/selftests/core/Makefile +++ b/tools/testing/selftests/core/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ +CFLAGS += -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := close_range_test +TEST_GEN_PROGS := close_range_test unshare_test include ../lib.mk diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c99b98b0d461..e0d9851fe1c9 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -11,16 +11,23 @@ #include <string.h> #include <syscall.h> #include <unistd.h> +#include <sys/resource.h> +#include <linux/close_range.h> #include "../kselftest_harness.h" #include "../clone3/clone3_selftests.h" -#ifndef __NR_close_range -#define __NR_close_range -1 + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_DUPFD_QUERY +#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) #endif -#ifndef CLOSE_RANGE_UNSHARE -#define CLOSE_RANGE_UNSHARE (1U << 1) +#ifndef F_CREATED_QUERY +#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4) #endif static inline int sys_close_range(unsigned int fd, unsigned int max_fd, @@ -29,11 +36,7 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd, return syscall(__NR_close_range, fd, max_fd, flags); } -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - -TEST(close_range) +TEST(core_close_range) { int i, ret; int open_fds[101]; @@ -44,7 +47,7 @@ TEST(close_range) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -52,7 +55,16 @@ TEST(close_range) EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { if (errno == ENOSYS) - XFAIL(return, "close_range() syscall not supported"); + SKIP(return, "close_range() syscall not supported"); + } + + for (i = 0; i < 100; i++) { + ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]); + if (ret < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(ret, 0); + } } EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); @@ -97,7 +109,7 @@ TEST(close_range_unshare) int i, ret, status; pid_t pid; int open_fds[101]; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; @@ -108,7 +120,7 @@ TEST(close_range_unshare) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -186,7 +198,7 @@ TEST(close_range_unshare_capped) int i, ret, status; pid_t pid; int open_fds[101]; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; @@ -197,7 +209,7 @@ TEST(close_range_unshare_capped) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -224,4 +236,431 @@ TEST(close_range_unshare_capped) EXPECT_EQ(0, WEXITSTATUS(status)); } +TEST(close_range_cloexec) +{ + int i, ret; + int open_fds[101]; + struct rlimit rlimit; + + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { + int fd; + + fd = open("/dev/null", O_RDONLY); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + SKIP(return, "Skipping test since /dev/null does not exist"); + } + + open_fds[i] = fd; + } + + ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); + if (ret < 0) { + if (errno == ENOSYS) + SKIP(return, "close_range() syscall not supported"); + if (errno == EINVAL) + SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); + } + + /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ + ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); + rlimit.rlim_cur = 25; + ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); + + /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ + ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC); + ASSERT_EQ(0, ret); + ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC); + ASSERT_EQ(0, ret); + + for (i = 0; i <= 50; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + for (i = 51; i <= 74; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + } + + for (i = 75; i <= 100; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + /* Test a common pattern. */ + ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC); + for (i = 0; i <= 100; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } +} + +TEST(close_range_cloexec_unshare) +{ + int i, ret; + int open_fds[101]; + struct rlimit rlimit; + + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { + int fd; + + fd = open("/dev/null", O_RDONLY); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + SKIP(return, "Skipping test since /dev/null does not exist"); + } + + open_fds[i] = fd; + } + + ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); + if (ret < 0) { + if (errno == ENOSYS) + SKIP(return, "close_range() syscall not supported"); + if (errno == EINVAL) + SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); + } + + /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ + ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); + rlimit.rlim_cur = 25; + ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); + + /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ + ret = sys_close_range(open_fds[0], open_fds[50], + CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); + ASSERT_EQ(0, ret); + ret = sys_close_range(open_fds[75], open_fds[100], + CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); + ASSERT_EQ(0, ret); + + for (i = 0; i <= 50; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + for (i = 51; i <= 74; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + } + + for (i = 75; i <= 100; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + /* Test a common pattern. */ + ret = sys_close_range(3, UINT_MAX, + CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); + for (i = 0; i <= 100; i++) { + int flags = fcntl(open_fds[i], F_GETFD); + + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + } +} + +/* + * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com + */ +TEST(close_range_cloexec_syzbot) +{ + int fd1, fd2, fd3, fd4, flags, ret, status; + pid_t pid; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* Create a huge gap in the fd table. */ + fd1 = open("/dev/null", O_RDWR); + EXPECT_GT(fd1, 0); + + fd2 = dup2(fd1, 1000); + EXPECT_GT(fd2, 0); + + flags = fcntl(fd1, F_DUPFD_QUERY, fd2); + if (flags < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(flags, 1); + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC); + if (ret) + exit(EXIT_FAILURE); + + /* + * We now have a private file descriptor table and all + * our open fds should still be open but made + * close-on-exec. + */ + flags = fcntl(fd1, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + flags = fcntl(fd2, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + fd3 = dup2(fd1, 42); + EXPECT_GT(fd3, 0); + + flags = fcntl(fd1, F_DUPFD_QUERY, fd3); + if (flags < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(flags, 1); + } + + + + /* + * Duplicating the file descriptor must remove the + * FD_CLOEXEC flag. + */ + flags = fcntl(fd3, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* + * We had a shared file descriptor table before along with requesting + * close-on-exec so the original fds must not be close-on-exec. + */ + flags = fcntl(fd1, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + flags = fcntl(fd2, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + fd3 = dup2(fd1, 42); + EXPECT_GT(fd3, 0); + + flags = fcntl(fd1, F_DUPFD_QUERY, fd3); + if (flags < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(flags, 1); + } + + fd4 = open("/dev/null", O_RDWR); + EXPECT_GT(fd4, 0); + + /* Same inode, different file pointers. */ + flags = fcntl(fd1, F_DUPFD_QUERY, fd4); + if (flags < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(flags, 0); + } + + flags = fcntl(fd3, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + EXPECT_EQ(close(fd1), 0); + EXPECT_EQ(close(fd2), 0); + EXPECT_EQ(close(fd3), 0); + EXPECT_EQ(close(fd4), 0); +} + +/* + * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com + */ +TEST(close_range_cloexec_unshare_syzbot) +{ + int i, fd1, fd2, fd3, flags, ret, status; + pid_t pid; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* + * Create a huge gap in the fd table. When we now call + * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper + * bound the kernel will only copy up to fd1 file descriptors into the + * new fd table. If the kernel is buggy and doesn't handle + * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file + * descriptors and we will oops! + * + * On a buggy kernel this should immediately oops. But let's loop just + * to be sure. + */ + fd1 = open("/dev/null", O_RDWR); + EXPECT_GT(fd1, 0); + + fd2 = dup2(fd1, 1000); + EXPECT_GT(fd2, 0); + + for (i = 0; i < 100; i++) { + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE | + CLOSE_RANGE_CLOEXEC); + if (ret) + exit(EXIT_FAILURE); + + /* + * We now have a private file descriptor table and all + * our open fds should still be open but made + * close-on-exec. + */ + flags = fcntl(fd1, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + flags = fcntl(fd2, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); + + fd3 = dup2(fd1, 42); + EXPECT_GT(fd3, 0); + + /* + * Duplicating the file descriptor must remove the + * FD_CLOEXEC flag. + */ + flags = fcntl(fd3, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + EXPECT_EQ(close(fd1), 0); + EXPECT_EQ(close(fd2), 0); + EXPECT_EQ(close(fd3), 0); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + } + + /* + * We created a private file descriptor table before along with + * requesting close-on-exec so the original fds must not be + * close-on-exec. + */ + flags = fcntl(fd1, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + flags = fcntl(fd2, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + fd3 = dup2(fd1, 42); + EXPECT_GT(fd3, 0); + + flags = fcntl(fd3, F_GETFD); + EXPECT_GT(flags, -1); + EXPECT_EQ(flags & FD_CLOEXEC, 0); + + EXPECT_EQ(close(fd1), 0); + EXPECT_EQ(close(fd2), 0); + EXPECT_EQ(close(fd3), 0); +} + +TEST(close_range_bitmap_corruption) +{ + pid_t pid; + int status; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* get the first 128 descriptors open */ + for (int i = 2; i < 128; i++) + EXPECT_GE(dup2(0, i), 0); + + /* get descriptor table shared */ + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* unshare and truncate descriptor table down to 64 */ + if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE)) + exit(EXIT_FAILURE); + + ASSERT_EQ(fcntl(64, F_GETFD), -1); + /* ... and verify that the range 64..127 is not + stuck "fully used" according to secondary bitmap */ + EXPECT_EQ(dup(0), 64) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(fcntl_created) +{ + for (int i = 0; i < 101; i++) { + int fd; + char path[PATH_MAX]; + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + SKIP(return, + "Skipping test since /dev/null does not exist"); + } + + /* We didn't create "/dev/null". */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0); + close(fd); + + sprintf(path, "aaaa_%d", i); + fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600); + ASSERT_GE(fd, 0); + + /* We created "aaaa_%d". */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1); + close(fd); + + fd = open(path, O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + + /* We're opening it again, so no positive creation check. */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0); + close(fd); + unlink(path); + } +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/core/unshare_test.c b/tools/testing/selftests/core/unshare_test.c new file mode 100644 index 000000000000..7fec9dfb1b0e --- /dev/null +++ b/tools/testing/selftests/core/unshare_test.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/kernel.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <unistd.h> +#include <sys/resource.h> +#include <linux/close_range.h> + +#include "../kselftest_harness.h" +#include "../clone3/clone3_selftests.h" + +TEST(unshare_EMFILE) +{ + pid_t pid; + int status; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + int fd; + ssize_t n, n2; + static char buf[512], buf2[512]; + struct rlimit rlimit; + int nr_open; + + fd = open("/proc/sys/fs/nr_open", O_RDWR); + ASSERT_GE(fd, 0); + + n = read(fd, buf, sizeof(buf)); + ASSERT_GT(n, 0); + ASSERT_EQ(buf[n - 1], '\n'); + + ASSERT_EQ(sscanf(buf, "%d", &nr_open), 1); + + ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); + + /* bump fs.nr_open */ + n2 = sprintf(buf2, "%d\n", nr_open + 1024); + lseek(fd, 0, SEEK_SET); + write(fd, buf2, n2); + + /* bump ulimit -n */ + rlimit.rlim_cur = nr_open + 1024; + rlimit.rlim_max = nr_open + 1024; + EXPECT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)) { + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); + exit(EXIT_FAILURE); + } + + /* get a descriptor past the old fs.nr_open */ + EXPECT_GE(dup2(2, nr_open + 64), 0) { + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); + exit(EXIT_FAILURE); + } + + /* get descriptor table shared */ + pid = sys_clone3(&args, sizeof(args)); + EXPECT_GE(pid, 0) { + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); + exit(EXIT_FAILURE); + } + + if (pid == 0) { + int err; + + /* restore fs.nr_open */ + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); + /* ... and now unshare(CLONE_FILES) must fail with EMFILE */ + err = unshare(CLONE_FILES); + EXPECT_EQ(err, -1) + exit(EXIT_FAILURE); + EXPECT_EQ(errno, EMFILE) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile new file mode 100644 index 000000000000..ed210037b29d --- /dev/null +++ b/tools/testing/selftests/coredump/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS = $(KHDR_INCLUDES) + +TEST_GEN_PROGS := stackdump_test +TEST_FILES := stackdump + +include ../lib.mk diff --git a/tools/testing/selftests/coredump/README.rst b/tools/testing/selftests/coredump/README.rst new file mode 100644 index 000000000000..164a7aa181c8 --- /dev/null +++ b/tools/testing/selftests/coredump/README.rst @@ -0,0 +1,50 @@ +coredump selftest +================= + +Background context +------------------ + +`coredump` is a feature which dumps a process's memory space when the process terminates +unexpectedly (e.g. due to segmentation fault), which can be useful for debugging. By default, +`coredump` dumps the memory to the file named `core`, but this behavior can be changed by writing a +different file name to `/proc/sys/kernel/core_pattern`. Furthermore, `coredump` can be piped to a +user-space program by writing the pipe symbol (`|`) followed by the command to be executed to +`/proc/sys/kernel/core_pattern`. For the full description, see `man 5 core`. + +The piped user program may be interested in reading the stack pointers of the crashed process. The +crashed process's stack pointers can be read from `procfs`: it is the `kstkesp` field in +`/proc/$PID/stat`. See `man 5 proc` for all the details. + +The problem +----------- +While a thread is active, the stack pointer is unsafe to read and therefore the `kstkesp` field +reads zero. But when the thread is dead (e.g. during a coredump), this field should have valid +value. + +However, this was broken in the past and `kstkesp` was zero even during coredump: + +* commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") changed kstkesp to + always be zero + +* commit fd7d56270b52 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") fixed it for the + coredumping thread. However, other threads in a coredumping process still had the problem. + +* commit cb8f381f1613 ("fs/proc/array.c: allow reporting eip/esp for all coredumping threads") fixed + for all threads in a coredumping process. + +* commit 92307383082d ("coredump: Don't perform any cleanups before dumping core") broke it again + for the other threads in a coredumping process. + +The problem has been fixed now, but considering the history, it may appear again in the future. + +The goal of this test +--------------------- +This test detects problem with reading `kstkesp` during coredump by doing the following: + +#. Tell the kernel to execute the "stackdump" script when a coredump happens. This script + reads the stack pointers of all threads of crashed processes. + +#. Spawn a child process who creates some threads and then crashes. + +#. Read the output from the "stackdump" script, and make sure all stack pointer values are + non-zero. diff --git a/tools/testing/selftests/coredump/stackdump b/tools/testing/selftests/coredump/stackdump new file mode 100755 index 000000000000..96714ce42d12 --- /dev/null +++ b/tools/testing/selftests/coredump/stackdump @@ -0,0 +1,14 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +CRASH_PROGRAM_ID=$1 +STACKDUMP_FILE=$2 + +TMP=$(mktemp) + +for t in /proc/$CRASH_PROGRAM_ID/task/*; do + tid=$(basename $t) + cat /proc/$tid/stat | awk '{print $29}' >> $TMP +done + +mv $TMP $STACKDUMP_FILE diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c new file mode 100644 index 000000000000..9984413be9f0 --- /dev/null +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <fcntl.h> +#include <inttypes.h> +#include <libgen.h> +#include <linux/limits.h> +#include <pthread.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <unistd.h> + +#include "../kselftest_harness.h" +#include "../pidfd/pidfd.h" + +#define STACKDUMP_FILE "stack_values" +#define STACKDUMP_SCRIPT "stackdump" +#define NUM_THREAD_SPAWN 128 + +static void *do_nothing(void *) +{ + while (1) + pause(); +} + +static void crashing_child(void) +{ + pthread_t thread; + int i; + + for (i = 0; i < NUM_THREAD_SPAWN; ++i) + pthread_create(&thread, NULL, do_nothing, NULL); + + /* crash on purpose */ + i = *(int *)NULL; +} + +FIXTURE(coredump) +{ + char original_core_pattern[256]; + pid_t pid_coredump_server; +}; + +FIXTURE_SETUP(coredump) +{ + char buf[PATH_MAX]; + FILE *file; + char *dir; + int ret; + + self->pid_coredump_server = -ESRCH; + file = fopen("/proc/sys/kernel/core_pattern", "r"); + ASSERT_NE(NULL, file); + + ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file); + ASSERT_TRUE(ret || feof(file)); + ASSERT_LT(ret, sizeof(self->original_core_pattern)); + + self->original_core_pattern[ret] = '\0'; + + ret = fclose(file); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(coredump) +{ + const char *reason; + FILE *file; + int ret, status; + + unlink(STACKDUMP_FILE); + + if (self->pid_coredump_server > 0) { + kill(self->pid_coredump_server, SIGTERM); + waitpid(self->pid_coredump_server, &status, 0); + } + unlink("/tmp/coredump.file"); + unlink("/tmp/coredump.socket"); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + if (!file) { + reason = "Unable to open core_pattern"; + goto fail; + } + + ret = fprintf(file, "%s", self->original_core_pattern); + if (ret < 0) { + reason = "Unable to write to core_pattern"; + goto fail; + } + + ret = fclose(file); + if (ret) { + reason = "Unable to close core_pattern"; + goto fail; + } + + return; +fail: + /* This should never happen */ + fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); +} + +TEST_F_TIMEOUT(coredump, stackdump, 120) +{ + struct sigaction action = {}; + unsigned long long stack; + char *test_dir, *line; + size_t line_length; + char buf[PATH_MAX]; + int ret, i, status; + FILE *file; + pid_t pid; + + /* + * Step 1: Setup core_pattern so that the stackdump script is executed when the child + * process crashes + */ + ret = readlink("/proc/self/exe", buf, sizeof(buf)); + ASSERT_NE(-1, ret); + ASSERT_LT(ret, sizeof(buf)); + buf[ret] = '\0'; + + test_dir = dirname(buf); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(NULL, file); + + ret = fprintf(file, "|%1$s/%2$s %%P %1$s/%3$s", test_dir, STACKDUMP_SCRIPT, STACKDUMP_FILE); + ASSERT_LT(0, ret); + + ret = fclose(file); + ASSERT_EQ(0, ret); + + /* Step 2: Create a process who spawns some threads then crashes */ + pid = fork(); + ASSERT_TRUE(pid >= 0); + if (pid == 0) + crashing_child(); + + /* + * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file + */ + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + for (i = 0; i < 10; ++i) { + file = fopen(STACKDUMP_FILE, "r"); + if (file) + break; + sleep(1); + } + ASSERT_NE(file, NULL); + + /* Step 4: Make sure all stack pointer values are non-zero */ + line = NULL; + for (i = 0; -1 != getline(&line, &line_length, file); ++i) { + stack = strtoull(line, NULL, 10); + ASSERT_NE(stack, 0); + } + free(line); + + ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); + + fclose(file); +} + +TEST_F(coredump, socket) +{ + int fd, pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + struct stat st; + char core_file[PATH_MAX]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + ret = listen(fd_server, 1); + if (ret < 0) { + fprintf(stderr, "Failed to listen on coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "Failed to accept coredump socket connection\n"); + close(fd_server); + _exit(EXIT_FAILURE); + } + + fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, + &fd_peer_pidfd, &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + _exit(EXIT_FAILURE); + } + + memset(&info, 0, sizeof(info)); + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "Missing coredump information from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) { + fprintf(stderr, "Received connection from non-coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + fd_core_file = creat("/tmp/coredump.file", 0644); + if (fd_core_file < 0) { + fprintf(stderr, "Failed to create coredump file\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_FAILURE); + } + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) { + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_FAILURE); + } + } + + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); + ASSERT_GT(st.st_size, 0); + /* + * We should somehow validate the produced core file. + * For now just allow for visual inspection + */ + system("file /tmp/coredump.file"); +} + +TEST_F(coredump, socket_detect_userspace_client) +{ + int fd, pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + struct stat st; + char core_file[PATH_MAX]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + ret = listen(fd_server, 1); + if (ret < 0) { + fprintf(stderr, "Failed to listen on coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "Failed to accept coredump socket connection\n"); + close(fd_server); + _exit(EXIT_FAILURE); + } + + fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, + &fd_peer_pidfd, &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + _exit(EXIT_FAILURE); + } + + memset(&info, 0, sizeof(info)); + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "Missing coredump information from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (info.coredump_mask & PIDFD_COREDUMPED) { + fprintf(stderr, "Received unexpected connection from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + int fd_socket; + ssize_t ret; + + fd_socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd_socket < 0) + _exit(EXIT_FAILURE); + + + ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) + _exit(EXIT_FAILURE); + + (void *)write(fd_socket, &(char){ 0 }, 1); + close(fd_socket); + _exit(EXIT_SUCCESS); + } + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + ASSERT_NE(stat("/tmp/coredump.file", &st), 0); + ASSERT_EQ(errno, ENOENT); +} + +TEST_F(coredump, socket_enoent) +{ + int pidfd, ret, status; + FILE *file; + pid_t pid; + char core_file[PATH_MAX]; + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); +} + +TEST_F(coredump, socket_no_listener) +{ + int pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + +TEST_HARNESS_MAIN |