From d612b1fd8010d0d67b5287fe146b8b55bcbb8655 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:53 +0000 Subject: seccomp: Operation for checking if an action is available Userspace code that needs to check if the kernel supports a given action may not be able to use the /proc/sys/kernel/seccomp/actions_avail sysctl. The process may be running in a sandbox and, therefore, sufficient filesystem access may not be available. This patch adds an operation to the seccomp(2) syscall that allows userspace code to ask the kernel if a given action is available. If the action is supported by the kernel, 0 is returned. If the action is not supported by the kernel, -1 is returned with errno set to -EOPNOTSUPP. If this check is attempted on a kernel that doesn't support this new operation, -1 is returned with errno set to -EINVAL meaning that userspace code will have the ability to differentiate between the two error cases. Signed-off-by: Tyler Hicks Suggested-by: Andy Lutomirski Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0f238a43ff1e..aaad61cc46bc 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -11,8 +11,9 @@ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ /* Valid operations for seccomp syscall. */ -#define SECCOMP_SET_MODE_STRICT 0 -#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_GET_ACTION_AVAIL 2 /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC 1 -- cgit v1.2.3-59-g8ed1b From 0ddec0fc8900201c0897b87b762b7c420436662f Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:54 +0000 Subject: seccomp: Sysctl to configure actions that are allowed to be logged Adminstrators can write to this sysctl to set the seccomp actions that are allowed to be logged. Any actions not found in this sysctl will not be logged. For example, all SECCOMP_RET_KILL, SECCOMP_RET_TRAP, and SECCOMP_RET_ERRNO actions would be loggable if "kill trap errno" were written to the sysctl. SECCOMP_RET_TRACE actions would not be logged since its string representation ("trace") wasn't present in the sysctl value. The path to the sysctl is: /proc/sys/kernel/seccomp/actions_logged The actions_avail sysctl can be read to discover the valid action names that can be written to the actions_logged sysctl with the exception of "allow". SECCOMP_RET_ALLOW actions cannot be configured for logging. The default setting for the sysctl is to allow all actions to be logged except SECCOMP_RET_ALLOW. While only SECCOMP_RET_KILL actions are currently logged, an upcoming patch will allow applications to request additional actions to be logged. There's one important exception to this sysctl. If a task is specifically being audited, meaning that an audit context has been allocated for the task, seccomp will log all actions other than SECCOMP_RET_ALLOW despite the value of actions_logged. This exception preserves the existing auditing behavior of tasks with an allocated audit context. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if audit_enabled && task-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- Documentation/userspace-api/seccomp_filter.rst | 18 +++ include/linux/audit.h | 6 +- kernel/seccomp.c | 171 ++++++++++++++++++++++++- 3 files changed, 187 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index 35fc7cbf1d95..2d1d8ab04ac5 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -187,6 +187,24 @@ directory. Here's a description of each file in that directory: program was built, differs from the set of actions actually supported in the current running kernel. +``actions_logged``: + A read-write ordered list of seccomp return values (refer to the + ``SECCOMP_RET_*`` macros above) that are allowed to be logged. Writes + to the file do not need to be in ordered form but reads from the file + will be ordered in the same way as the actions_avail sysctl. + + It is important to note that the value of ``actions_logged`` does not + prevent certain actions from being logged when the audit subsystem is + configured to audit a task. If the action is not found in + ``actions_logged`` list, the final decision on whether to audit the + action for that task is ultimately left up to the audit subsystem to + decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``. + + The ``allow`` string is not accepted in the ``actions_logged`` sysctl + as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting + to write ``allow`` to the sysctl will result in an EINVAL being + returned. + Adding architecture support =========================== diff --git a/include/linux/audit.h b/include/linux/audit.h index 2150bdccfbab..8c30f06d639d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -314,11 +314,7 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (!audit_enabled) - return; - - /* Force a record to be reported if a signal was delivered. */ - if (signr || unlikely(!audit_dummy_context())) + if (audit_enabled && unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 7a6089f66fed..54357e361aea 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -522,6 +522,45 @@ static void seccomp_send_sigsys(int syscall, int reason) } #endif /* CONFIG_SECCOMP_FILTER */ +/* For use with seccomp_actions_logged */ +#define SECCOMP_LOG_KILL (1 << 0) +#define SECCOMP_LOG_TRAP (1 << 2) +#define SECCOMP_LOG_ERRNO (1 << 3) +#define SECCOMP_LOG_TRACE (1 << 4) +#define SECCOMP_LOG_ALLOW (1 << 5) + +static u32 seccomp_actions_logged = SECCOMP_LOG_KILL | SECCOMP_LOG_TRAP | + SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE; + +static inline void seccomp_log(unsigned long syscall, long signr, u32 action) +{ + bool log = false; + + switch (action) { + case SECCOMP_RET_ALLOW: + case SECCOMP_RET_TRAP: + case SECCOMP_RET_ERRNO: + case SECCOMP_RET_TRACE: + break; + case SECCOMP_RET_KILL: + default: + log = seccomp_actions_logged & SECCOMP_LOG_KILL; + } + + /* + * Force an audit message to be emitted when the action is RET_KILL and + * the action is allowed to be logged by the admin. + */ + if (log) + return __audit_seccomp(syscall, signr, action); + + /* + * Let the audit subsystem decide if the action should be audited based + * on whether the current task itself is being audited. + */ + return audit_seccomp(syscall, signr, action); +} + /* * Secure computing mode 1 allows only read/write/exit/sigreturn. * To be fully secure this must be combined with rlimit @@ -547,7 +586,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif - audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); + seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL); do_exit(SIGKILL); } @@ -656,7 +695,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, case SECCOMP_RET_KILL: default: - audit_seccomp(this_syscall, SIGSYS, action); + seccomp_log(this_syscall, SIGSYS, action); /* Dump core only if this is the last remaining thread. */ if (get_nr_threads(current) == 1) { siginfo_t info; @@ -673,7 +712,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, unreachable(); skip: - audit_seccomp(this_syscall, 0, action); + seccomp_log(this_syscall, 0, action); return -1; } #else @@ -978,6 +1017,127 @@ static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME " " SECCOMP_RET_TRACE_NAME " " SECCOMP_RET_ALLOW_NAME; +struct seccomp_log_name { + u32 log; + const char *name; +}; + +static const struct seccomp_log_name seccomp_log_names[] = { + { SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME }, + { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, + { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, + { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, + { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, + { } +}; + +static bool seccomp_names_from_actions_logged(char *names, size_t size, + u32 actions_logged) +{ + const struct seccomp_log_name *cur; + bool append_space = false; + + for (cur = seccomp_log_names; cur->name && size; cur++) { + ssize_t ret; + + if (!(actions_logged & cur->log)) + continue; + + if (append_space) { + ret = strscpy(names, " ", size); + if (ret < 0) + return false; + + names += ret; + size -= ret; + } else + append_space = true; + + ret = strscpy(names, cur->name, size); + if (ret < 0) + return false; + + names += ret; + size -= ret; + } + + return true; +} + +static bool seccomp_action_logged_from_name(u32 *action_logged, + const char *name) +{ + const struct seccomp_log_name *cur; + + for (cur = seccomp_log_names; cur->name; cur++) { + if (!strcmp(cur->name, name)) { + *action_logged = cur->log; + return true; + } + } + + return false; +} + +static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names) +{ + char *name; + + *actions_logged = 0; + while ((name = strsep(&names, " ")) && *name) { + u32 action_logged = 0; + + if (!seccomp_action_logged_from_name(&action_logged, name)) + return false; + + *actions_logged |= action_logged; + } + + return true; +} + +static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + char names[sizeof(seccomp_actions_avail)]; + struct ctl_table table; + int ret; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + memset(names, 0, sizeof(names)); + + if (!write) { + if (!seccomp_names_from_actions_logged(names, sizeof(names), + seccomp_actions_logged)) + return -EINVAL; + } + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + ret = proc_dostring(&table, write, buffer, lenp, ppos); + if (ret) + return ret; + + if (write) { + u32 actions_logged; + + if (!seccomp_actions_logged_from_names(&actions_logged, + table.data)) + return -EINVAL; + + if (actions_logged & SECCOMP_LOG_ALLOW) + return -EINVAL; + + seccomp_actions_logged = actions_logged; + } + + return 0; +} + static struct ctl_path seccomp_sysctl_path[] = { { .procname = "kernel", }, { .procname = "seccomp", }, @@ -992,6 +1152,11 @@ static struct ctl_table seccomp_sysctl_table[] = { .mode = 0444, .proc_handler = proc_dostring, }, + { + .procname = "actions_logged", + .mode = 0644, + .proc_handler = seccomp_actions_logged_handler, + }, { } }; -- cgit v1.2.3-59-g8ed1b From e66a39977985b1e69e17c4042cb290768eca9b02 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:56 +0000 Subject: seccomp: Filter flag to log all actions except SECCOMP_RET_ALLOW Add a new filter flag, SECCOMP_FILTER_FLAG_LOG, that enables logging for all actions except for SECCOMP_RET_ALLOW for the given filter. SECCOMP_RET_KILL actions are always logged, when "kill" is in the actions_logged sysctl, and SECCOMP_RET_ALLOW actions are never logged, regardless of this flag. This flag can be used to create noisy filters that result in all non-allowed actions to be logged. A process may have one noisy filter, which is loaded with this flag, as well as a quiet filter that's not loaded with this flag. This allows for the actions in a set of filters to be selectively conveyed to the admin. Since a system could have a large number of allocated seccomp_filter structs, struct packing was taken in consideration. On 64 bit x86, the new log member takes up one byte of an existing four byte hole in the struct. On 32 bit x86, the new log member creates a new four byte hole (unavoidable) and consumes one of those bytes. Unfortunately, the tests added for SECCOMP_FILTER_FLAG_LOG are not capable of inspecting the audit log to verify that the actions taken in the filter were logged. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if filter-requests-logging && action in actions_logged: log else if audit_enabled && process-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- include/linux/seccomp.h | 3 +- include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 26 +++++++--- tools/testing/selftests/seccomp/seccomp_bpf.c | 69 ++++++++++++++++++++++++++- 4 files changed, 91 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index ecc296c137cd..c8bef436b61d 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,7 +3,8 @@ #include -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_LOG) #ifdef CONFIG_SECCOMP diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index aaad61cc46bc..19a611d0712e 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -17,6 +17,7 @@ /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_LOG 2 /* * All BPF programs must return a 32-bit value. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 54357e361aea..ed9fde418fc4 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -44,6 +44,7 @@ * get/put helpers should be used when accessing an instance * outside of a lifetime-guarded section. In general, this * is only needed for handling filters shared across tasks. + * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged * @prev: points to a previously installed, or inherited, filter * @prog: the BPF program to evaluate * @@ -59,6 +60,7 @@ */ struct seccomp_filter { refcount_t usage; + bool log; struct seccomp_filter *prev; struct bpf_prog *prog; }; @@ -452,6 +454,10 @@ static long seccomp_attach_filter(unsigned int flags, return ret; } + /* Set log flag, if present. */ + if (flags & SECCOMP_FILTER_FLAG_LOG) + filter->log = true; + /* * If there is an existing filter, make it the prev and don't drop its * task reference. @@ -532,15 +538,22 @@ static void seccomp_send_sigsys(int syscall, int reason) static u32 seccomp_actions_logged = SECCOMP_LOG_KILL | SECCOMP_LOG_TRAP | SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE; -static inline void seccomp_log(unsigned long syscall, long signr, u32 action) +static inline void seccomp_log(unsigned long syscall, long signr, u32 action, + bool requested) { bool log = false; switch (action) { case SECCOMP_RET_ALLOW: + break; case SECCOMP_RET_TRAP: + log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP; + break; case SECCOMP_RET_ERRNO: + log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO; + break; case SECCOMP_RET_TRACE: + log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; break; case SECCOMP_RET_KILL: default: @@ -548,8 +561,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action) } /* - * Force an audit message to be emitted when the action is RET_KILL and - * the action is allowed to be logged by the admin. + * Force an audit message to be emitted when the action is RET_KILL or + * the FILTER_FLAG_LOG bit was set and the action is allowed to be + * logged by the admin. */ if (log) return __audit_seccomp(syscall, signr, action); @@ -586,7 +600,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif - seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL); + seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true); do_exit(SIGKILL); } @@ -695,7 +709,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, case SECCOMP_RET_KILL: default: - seccomp_log(this_syscall, SIGSYS, action); + seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ if (get_nr_threads(current) == 1) { siginfo_t info; @@ -712,7 +726,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, unreachable(); skip: - seccomp_log(this_syscall, 0, action); + seccomp_log(this_syscall, 0, action, match ? match->log : false); return -1; } #else diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index abf708e09892..1c8c22ce7740 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1739,6 +1739,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) #define SECCOMP_FILTER_FLAG_TSYNC 1 #endif +#ifndef SECCOMP_FILTER_FLAG_LOG +#define SECCOMP_FILTER_FLAG_LOG 2 +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -1844,7 +1848,8 @@ TEST(seccomp_syscall_mode_lock) */ TEST(detect_seccomp_filter_flags) { - unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC }; + unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_LOG }; unsigned int flag, all_flags; int i; long ret; @@ -2533,6 +2538,67 @@ TEST(syscall_restart) _metadata->passed = 0; } +TEST_SIGNAL(filter_flag_log, SIGSYS) +{ + struct sock_filter allow_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter kill_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog allow_prog = { + .len = (unsigned short)ARRAY_SIZE(allow_filter), + .filter = allow_filter, + }; + struct sock_fprog kill_prog = { + .len = (unsigned short)ARRAY_SIZE(kill_filter), + .filter = kill_filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ + ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, + &allow_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_NE(0, ret) { + TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); + } + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); + } + + /* Verify that a simple, permissive filter can be added with no flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); + EXPECT_EQ(0, ret); + + /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, + &allow_prog); + ASSERT_NE(EINVAL, errno) { + TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); + } + EXPECT_EQ(0, ret); + + /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, + &kill_prog); + EXPECT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + TEST(get_action_avail) { __u32 actions[] = { SECCOMP_RET_KILL, SECCOMP_RET_TRAP, @@ -2573,6 +2639,7 @@ TEST(get_action_avail) * - endianness checking when appropriate * - 64-bit arg prodding * - arch value testing (x86 modes especially) + * - verify that FILTER_FLAG_LOG filters generate log messages * - ... */ -- cgit v1.2.3-59-g8ed1b From 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:57 +0000 Subject: seccomp: Action to log before allowing Add a new action, SECCOMP_RET_LOG, that logs a syscall before allowing the syscall. At the implementation level, this action is identical to the existing SECCOMP_RET_ALLOW action. However, it can be very useful when initially developing a seccomp filter for an application. The developer can set the default action to be SECCOMP_RET_LOG, maybe mark any obviously needed syscalls with SECCOMP_RET_ALLOW, and then put the application through its paces. A list of syscalls that triggered the default action (SECCOMP_RET_LOG) can be easily gleaned from the logs and that list can be used to build the syscall whitelist. Finally, the developer can change the default action to the desired value. This provides a more friendly experience than seeing the application get killed, then updating the filter and rebuilding the app, seeing the application get killed due to a different syscall, then updating the filter and rebuilding the app, etc. The functionality is similar to what's supported by the various LSMs. SELinux has permissive mode, AppArmor has complain mode, SMACK has bring-up mode, etc. SECCOMP_RET_LOG is given a lower value than SECCOMP_RET_ALLOW as allow while logging is slightly more restrictive than quietly allowing. Unfortunately, the tests added for SECCOMP_RET_LOG are not capable of inspecting the audit log to verify that the syscall was logged. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if action == RET_LOG && RET_LOG in actions_logged: log else if filter-requests-logging && action in actions_logged: log else if audit_enabled && process-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- Documentation/userspace-api/seccomp_filter.rst | 9 +++ include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 23 ++++-- tools/testing/selftests/seccomp/seccomp_bpf.c | 98 +++++++++++++++++++++++++- 4 files changed, 125 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index 2d1d8ab04ac5..f4977357daf2 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -141,6 +141,15 @@ In precedence order, they are: allow use of ptrace, even of other sandboxed processes, without extreme care; ptracers can use this mechanism to escape.) +``SECCOMP_RET_LOG``: + Results in the system call being executed after it is logged. This + should be used by application developers to learn which syscalls their + application needs without having to iterate through multiple test and + development cycles to build the list. + + This action will only be logged if "log" is present in the + actions_logged sysctl string. + ``SECCOMP_RET_ALLOW``: Results in the system call being executed. diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 19a611d0712e..f94433263e4b 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -31,6 +31,7 @@ #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ed9fde418fc4..59cde2ed3b92 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -533,10 +533,12 @@ static void seccomp_send_sigsys(int syscall, int reason) #define SECCOMP_LOG_TRAP (1 << 2) #define SECCOMP_LOG_ERRNO (1 << 3) #define SECCOMP_LOG_TRACE (1 << 4) -#define SECCOMP_LOG_ALLOW (1 << 5) +#define SECCOMP_LOG_LOG (1 << 5) +#define SECCOMP_LOG_ALLOW (1 << 6) static u32 seccomp_actions_logged = SECCOMP_LOG_KILL | SECCOMP_LOG_TRAP | - SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE; + SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE | + SECCOMP_LOG_LOG; static inline void seccomp_log(unsigned long syscall, long signr, u32 action, bool requested) @@ -555,15 +557,18 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, case SECCOMP_RET_TRACE: log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; break; + case SECCOMP_RET_LOG: + log = seccomp_actions_logged & SECCOMP_LOG_LOG; + break; case SECCOMP_RET_KILL: default: log = seccomp_actions_logged & SECCOMP_LOG_KILL; } /* - * Force an audit message to be emitted when the action is RET_KILL or - * the FILTER_FLAG_LOG bit was set and the action is allowed to be - * logged by the admin. + * Force an audit message to be emitted when the action is RET_KILL, + * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is + * allowed to be logged by the admin. */ if (log) return __audit_seccomp(syscall, signr, action); @@ -699,6 +704,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, return 0; + case SECCOMP_RET_LOG: + seccomp_log(this_syscall, 0, action, true); + return 0; + case SECCOMP_RET_ALLOW: /* * Note that the "match" filter will always be NULL for @@ -873,6 +882,7 @@ static long seccomp_get_action_avail(const char __user *uaction) case SECCOMP_RET_TRAP: case SECCOMP_RET_ERRNO: case SECCOMP_RET_TRACE: + case SECCOMP_RET_LOG: case SECCOMP_RET_ALLOW: break; default: @@ -1023,12 +1033,14 @@ out: #define SECCOMP_RET_TRAP_NAME "trap" #define SECCOMP_RET_ERRNO_NAME "errno" #define SECCOMP_RET_TRACE_NAME "trace" +#define SECCOMP_RET_LOG_NAME "log" #define SECCOMP_RET_ALLOW_NAME "allow" static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME " " SECCOMP_RET_TRAP_NAME " " SECCOMP_RET_ERRNO_NAME " " SECCOMP_RET_TRACE_NAME " " + SECCOMP_RET_LOG_NAME " " SECCOMP_RET_ALLOW_NAME; struct seccomp_log_name { @@ -1041,6 +1053,7 @@ static const struct seccomp_log_name seccomp_log_names[] = { { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, + { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME }, { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, { } }; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 1c8c22ce7740..7372958eccb5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -74,7 +74,12 @@ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#endif +#ifndef SECCOMP_RET_LOG +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#endif +#ifndef SECCOMP_RET_ACTION /* Masks for the return value sections. */ #define SECCOMP_RET_ACTION 0x7fff0000U #define SECCOMP_RET_DATA 0x0000ffffU @@ -342,6 +347,28 @@ TEST(empty_prog) EXPECT_EQ(EINVAL, errno); } +TEST(log_all) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + /* getppid() should succeed and be logged (no check for logging) */ + EXPECT_EQ(parent, syscall(__NR_getppid)); +} + TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) { struct sock_filter filter[] = { @@ -756,6 +783,7 @@ TEST_F(TRAP, handler) FIXTURE_DATA(precedence) { struct sock_fprog allow; + struct sock_fprog log; struct sock_fprog trace; struct sock_fprog error; struct sock_fprog trap; @@ -767,6 +795,13 @@ FIXTURE_SETUP(precedence) struct sock_filter allow_insns[] = { BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; + struct sock_filter log_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), + }; struct sock_filter trace_insns[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), @@ -803,6 +838,7 @@ FIXTURE_SETUP(precedence) memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) FILTER_ALLOC(allow); + FILTER_ALLOC(log); FILTER_ALLOC(trace); FILTER_ALLOC(error); FILTER_ALLOC(trap); @@ -813,6 +849,7 @@ FIXTURE_TEARDOWN(precedence) { #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) FILTER_FREE(allow); + FILTER_FREE(log); FILTER_FREE(trace); FILTER_FREE(error); FILTER_FREE(trap); @@ -830,6 +867,8 @@ TEST_F(precedence, allow_ok) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -854,6 +893,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -885,6 +926,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); @@ -906,6 +949,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -931,6 +976,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -952,6 +999,8 @@ TEST_F(precedence, errno_is_third) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -970,6 +1019,8 @@ TEST_F(precedence, errno_is_third_in_any_order) ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); @@ -992,6 +1043,8 @@ TEST_F(precedence, trace_is_fourth) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); /* Should work just fine. */ @@ -1013,12 +1066,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); /* Should work just fine. */ EXPECT_EQ(parent, syscall(__NR_getppid)); /* No ptracer */ EXPECT_EQ(-1, syscall(__NR_getpid)); } +TEST_F(precedence, log_is_fifth) +{ + pid_t mypid, parent; + long ret; + + mypid = getpid(); + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* Should also work just fine */ + EXPECT_EQ(mypid, syscall(__NR_getpid)); +} + +TEST_F(precedence, log_is_fifth_in_any_order) +{ + pid_t mypid, parent; + long ret; + + mypid = getpid(); + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* Should also work just fine */ + EXPECT_EQ(mypid, syscall(__NR_getpid)); +} + #ifndef PTRACE_O_TRACESECCOMP #define PTRACE_O_TRACESECCOMP 0x00000080 #endif @@ -2603,7 +2698,7 @@ TEST(get_action_avail) { __u32 actions[] = { SECCOMP_RET_KILL, SECCOMP_RET_TRAP, SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, - SECCOMP_RET_ALLOW }; + SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; __u32 unknown_action = 0x10000000U; int i; long ret; @@ -2640,6 +2735,7 @@ TEST(get_action_avail) * - 64-bit arg prodding * - arch value testing (x86 modes especially) * - verify that FILTER_FLAG_LOG filters generate log messages + * - verify that RET_LOG generates log messages * - ... */ -- cgit v1.2.3-59-g8ed1b From fd76875ca289a3d4722f266fd2d5532a27083903 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 12:53:18 -0700 Subject: seccomp: Rename SECCOMP_RET_KILL to SECCOMP_RET_KILL_THREAD In preparation for adding SECCOMP_RET_KILL_PROCESS, rename SECCOMP_RET_KILL to the more accurate SECCOMP_RET_KILL_THREAD. The existing selftest values are intentionally left as SECCOMP_RET_KILL just to be sure we're exercising the alias. Signed-off-by: Kees Cook --- Documentation/networking/filter.txt | 2 +- Documentation/userspace-api/seccomp_filter.rst | 4 +-- include/uapi/linux/seccomp.h | 3 +- kernel/seccomp.c | 39 ++++++++++++++------------ samples/seccomp/bpf-direct.c | 4 +-- samples/seccomp/bpf-helper.h | 2 +- tools/testing/selftests/seccomp/seccomp_bpf.c | 17 ++++++----- 7 files changed, 39 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index b69b205501de..73aa0f12156d 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -337,7 +337,7 @@ Examples for low-level BPF: jeq #14, good /* __NR_rt_sigprocmask */ jeq #13, good /* __NR_rt_sigaction */ jeq #35, good /* __NR_nanosleep */ - bad: ret #0 /* SECCOMP_RET_KILL */ + bad: ret #0 /* SECCOMP_RET_KILL_THREAD */ good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */ The above example code can be placed into a file (here called "foo"), and diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index f4977357daf2..d76396f2d8ed 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -87,11 +87,11 @@ Return values A seccomp filter may return any of the following values. If multiple filters exist, the return value for the evaluation of a given system call will always use the highest precedent value. (For example, -``SECCOMP_RET_KILL`` will always take precedence.) +``SECCOMP_RET_KILL_THREAD`` will always take precedence.) In precedence order, they are: -``SECCOMP_RET_KILL``: +``SECCOMP_RET_KILL_THREAD``: Results in the task exiting immediately without executing the system call. The exit status of the task (``status & 0x7f``) will be ``SIGSYS``, not ``SIGKILL``. diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index f94433263e4b..5a03f699eb17 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -27,7 +27,8 @@ * The ordering ensures that a min_t() over composed return values always * selects the least permissive choice. */ -#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 59cde2ed3b92..95ac54cff00f 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, /* Ensure unexpected behavior doesn't result in failing open. */ if (unlikely(WARN_ON(f == NULL))) - return SECCOMP_RET_KILL; + return SECCOMP_RET_KILL_THREAD; if (!sd) { populate_seccomp_data(&sd_local); @@ -529,15 +529,17 @@ static void seccomp_send_sigsys(int syscall, int reason) #endif /* CONFIG_SECCOMP_FILTER */ /* For use with seccomp_actions_logged */ -#define SECCOMP_LOG_KILL (1 << 0) +#define SECCOMP_LOG_KILL_THREAD (1 << 0) #define SECCOMP_LOG_TRAP (1 << 2) #define SECCOMP_LOG_ERRNO (1 << 3) #define SECCOMP_LOG_TRACE (1 << 4) #define SECCOMP_LOG_LOG (1 << 5) #define SECCOMP_LOG_ALLOW (1 << 6) -static u32 seccomp_actions_logged = SECCOMP_LOG_KILL | SECCOMP_LOG_TRAP | - SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE | +static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD | + SECCOMP_LOG_TRAP | + SECCOMP_LOG_ERRNO | + SECCOMP_LOG_TRACE | SECCOMP_LOG_LOG; static inline void seccomp_log(unsigned long syscall, long signr, u32 action, @@ -560,13 +562,13 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, case SECCOMP_RET_LOG: log = seccomp_actions_logged & SECCOMP_LOG_LOG; break; - case SECCOMP_RET_KILL: + case SECCOMP_RET_KILL_THREAD: default: - log = seccomp_actions_logged & SECCOMP_LOG_KILL; + log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD; } /* - * Force an audit message to be emitted when the action is RET_KILL, + * Force an audit message to be emitted when the action is RET_KILL_*, * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is * allowed to be logged by the admin. */ @@ -605,7 +607,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif - seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true); + seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); do_exit(SIGKILL); } @@ -716,7 +718,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, */ return 0; - case SECCOMP_RET_KILL: + case SECCOMP_RET_KILL_THREAD: default: seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ @@ -878,7 +880,7 @@ static long seccomp_get_action_avail(const char __user *uaction) return -EFAULT; switch (action) { - case SECCOMP_RET_KILL: + case SECCOMP_RET_KILL_THREAD: case SECCOMP_RET_TRAP: case SECCOMP_RET_ERRNO: case SECCOMP_RET_TRACE: @@ -1029,19 +1031,20 @@ out: #ifdef CONFIG_SYSCTL /* Human readable action names for friendly sysctl interaction */ -#define SECCOMP_RET_KILL_NAME "kill" +#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" #define SECCOMP_RET_TRAP_NAME "trap" #define SECCOMP_RET_ERRNO_NAME "errno" #define SECCOMP_RET_TRACE_NAME "trace" #define SECCOMP_RET_LOG_NAME "log" #define SECCOMP_RET_ALLOW_NAME "allow" -static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME " " - SECCOMP_RET_TRAP_NAME " " - SECCOMP_RET_ERRNO_NAME " " - SECCOMP_RET_TRACE_NAME " " - SECCOMP_RET_LOG_NAME " " - SECCOMP_RET_ALLOW_NAME; +static const char seccomp_actions_avail[] = + SECCOMP_RET_KILL_THREAD_NAME " " + SECCOMP_RET_TRAP_NAME " " + SECCOMP_RET_ERRNO_NAME " " + SECCOMP_RET_TRACE_NAME " " + SECCOMP_RET_LOG_NAME " " + SECCOMP_RET_ALLOW_NAME; struct seccomp_log_name { u32 log; @@ -1049,7 +1052,7 @@ struct seccomp_log_name { }; static const struct seccomp_log_name seccomp_log_names[] = { - { SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME }, + { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c index 151ec3f52189..235ce3c49ee9 100644 --- a/samples/seccomp/bpf-direct.c +++ b/samples/seccomp/bpf-direct.c @@ -129,7 +129,7 @@ static int install_filter(void) /* Check that read is only using stdin. */ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD), /* Check that write is only using stdout */ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), @@ -139,7 +139,7 @@ static int install_filter(void) BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD), }; struct sock_fprog prog = { .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 1d8de9edd858..83dbe79cbe2c 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -44,7 +44,7 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #define ALLOW \ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) #define DENY \ - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD) #define JUMP(labels, label) \ BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \ JUMP_JT, JUMP_JF) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7372958eccb5..a3ba39a32449 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -68,15 +68,18 @@ #define SECCOMP_MODE_FILTER 2 #endif +#ifndef SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#endif #ifndef SECCOMP_RET_KILL -#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ -#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ -#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ -#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ #endif #ifndef SECCOMP_RET_LOG -#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #endif #ifndef SECCOMP_RET_ACTION @@ -2696,7 +2699,7 @@ TEST_SIGNAL(filter_flag_log, SIGSYS) TEST(get_action_avail) { - __u32 actions[] = { SECCOMP_RET_KILL, SECCOMP_RET_TRAP, + __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; __u32 unknown_action = 0x10000000U; -- cgit v1.2.3-59-g8ed1b From 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 13:01:39 -0700 Subject: seccomp: Introduce SECCOMP_RET_KILL_PROCESS This introduces the BPF return value for SECCOMP_RET_KILL_PROCESS to kill an entire process. This cannot yet be reached by seccomp, but it changes the default-kill behavior (for unknown return values) from kill-thread to kill-process. Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 18 ++++++++++-------- kernel/seccomp.c | 22 ++++++++++++++++------ 2 files changed, 26 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 5a03f699eb17..7e77c92df78a 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -22,18 +22,20 @@ /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. - * The upper 16-bits are ordered from least permissive values to most. + * The upper 16-bits are ordered from least permissive values to most, + * as a signed value (so 0x8000000 is negative). * * The ordering ensures that a min_t() over composed return values always * selects the least permissive choice. */ -#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ -#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD -#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ -#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ -#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ -#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ #define SECCOMP_RET_ACTION 0x7fff0000U diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 95ac54cff00f..5c7299b9d953 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, /* Ensure unexpected behavior doesn't result in failing open. */ if (unlikely(WARN_ON(f == NULL))) - return SECCOMP_RET_KILL_THREAD; + return SECCOMP_RET_KILL_PROCESS; if (!sd) { populate_seccomp_data(&sd_local); @@ -529,14 +529,16 @@ static void seccomp_send_sigsys(int syscall, int reason) #endif /* CONFIG_SECCOMP_FILTER */ /* For use with seccomp_actions_logged */ -#define SECCOMP_LOG_KILL_THREAD (1 << 0) +#define SECCOMP_LOG_KILL_PROCESS (1 << 0) +#define SECCOMP_LOG_KILL_THREAD (1 << 1) #define SECCOMP_LOG_TRAP (1 << 2) #define SECCOMP_LOG_ERRNO (1 << 3) #define SECCOMP_LOG_TRACE (1 << 4) #define SECCOMP_LOG_LOG (1 << 5) #define SECCOMP_LOG_ALLOW (1 << 6) -static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD | +static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS | + SECCOMP_LOG_KILL_THREAD | SECCOMP_LOG_TRAP | SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE | @@ -563,8 +565,11 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, log = seccomp_actions_logged & SECCOMP_LOG_LOG; break; case SECCOMP_RET_KILL_THREAD: - default: log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD; + break; + case SECCOMP_RET_KILL_PROCESS: + default: + log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS; } /* @@ -719,10 +724,12 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, return 0; case SECCOMP_RET_KILL_THREAD: + case SECCOMP_RET_KILL_PROCESS: default: seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ - if (get_nr_threads(current) == 1) { + if (action == SECCOMP_RET_KILL_PROCESS || + get_nr_threads(current) == 1) { siginfo_t info; /* Show the original registers in the dump. */ @@ -731,7 +738,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, seccomp_init_siginfo(&info, this_syscall, data); do_coredump(&info); } - do_exit(SIGSYS); + if (action == SECCOMP_RET_KILL_PROCESS) + do_group_exit(SIGSYS); + else + do_exit(SIGSYS); } unreachable(); -- cgit v1.2.3-59-g8ed1b From 0466bdb99e8744bc9befa8d62a317f0fd7fd7421 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 13:12:11 -0700 Subject: seccomp: Implement SECCOMP_RET_KILL_PROCESS action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, SECCOMP_RET_KILL_THREAD (neƩ SECCOMP_RET_KILL) kills the current thread. There have been a few requests for this to kill the entire process (the thread group). This cannot be just changed (discovered when adding coredump support since coredumping kills the entire process) because there are userspace programs depending on the thread-kill behavior. Instead, implement SECCOMP_RET_KILL_PROCESS, which is 0x80000000, and can be processed as "-1" by the kernel, below the existing RET_KILL that is ABI-set to "0". For userspace, SECCOMP_RET_ACTION_FULL is added to expand the mask to the signed bit. Old userspace using the SECCOMP_RET_ACTION mask will see SECCOMP_RET_KILL_PROCESS as 0 still, but this would only be visible when examining the siginfo in a core dump from a RET_KILL_*, where it will think it was thread-killed instead of process-killed. Attempts to introduce this behavior via other ways (filter flags, seccomp struct flags, masked RET_DATA bits) all come with weird side-effects and baggage. This change preserves the central behavioral expectations of the seccomp filter engine without putting too great a burden on changes needed in userspace to use the new action. The new action is discoverable by userspace through either the new actions_avail sysctl or through the SECCOMP_GET_ACTION_AVAIL seccomp operation. If used without checking for availability, old kernels will treat RET_KILL_PROCESS as RET_KILL_THREAD (since the old mask will produce RET_KILL_THREAD). Cc: Paul Moore Cc: Fabricio Voznika Signed-off-by: Kees Cook --- Documentation/userspace-api/seccomp_filter.rst | 7 ++++++- include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 9 +++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index d76396f2d8ed..099c412951d6 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -87,10 +87,15 @@ Return values A seccomp filter may return any of the following values. If multiple filters exist, the return value for the evaluation of a given system call will always use the highest precedent value. (For example, -``SECCOMP_RET_KILL_THREAD`` will always take precedence.) +``SECCOMP_RET_KILL_PROCESS`` will always take precedence.) In precedence order, they are: +``SECCOMP_RET_KILL_PROCESS``: + Results in the entire process exiting immediately without executing + the system call. The exit status of the task (``status & 0x7f``) + will be ``SIGSYS``, not ``SIGKILL``. + ``SECCOMP_RET_KILL_THREAD``: Results in the task exiting immediately without executing the system call. The exit status of the task (``status & 0x7f``) will diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 7e77c92df78a..f6bc1dea3247 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -38,6 +38,7 @@ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION_FULL 0xffff0000U #define SECCOMP_RET_ACTION 0x7fff0000U #define SECCOMP_RET_DATA 0x0000ffffU diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5c7299b9d953..c24579dfa7a1 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -181,6 +181,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ +#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL))) static u32 seccomp_run_filters(const struct seccomp_data *sd, struct seccomp_filter **match) { @@ -206,7 +207,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, for (; f; f = f->prev) { u32 cur_ret = BPF_PROG_RUN(f->prog, sd); - if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) { + if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) { ret = cur_ret; *match = f; } @@ -650,7 +651,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, filter_ret = seccomp_run_filters(sd, &match); data = filter_ret & SECCOMP_RET_DATA; - action = filter_ret & SECCOMP_RET_ACTION; + action = filter_ret & SECCOMP_RET_ACTION_FULL; switch (action) { case SECCOMP_RET_ERRNO: @@ -890,6 +891,7 @@ static long seccomp_get_action_avail(const char __user *uaction) return -EFAULT; switch (action) { + case SECCOMP_RET_KILL_PROCESS: case SECCOMP_RET_KILL_THREAD: case SECCOMP_RET_TRAP: case SECCOMP_RET_ERRNO: @@ -1041,6 +1043,7 @@ out: #ifdef CONFIG_SYSCTL /* Human readable action names for friendly sysctl interaction */ +#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process" #define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" #define SECCOMP_RET_TRAP_NAME "trap" #define SECCOMP_RET_ERRNO_NAME "errno" @@ -1049,6 +1052,7 @@ out: #define SECCOMP_RET_ALLOW_NAME "allow" static const char seccomp_actions_avail[] = + SECCOMP_RET_KILL_PROCESS_NAME " " SECCOMP_RET_KILL_THREAD_NAME " " SECCOMP_RET_TRAP_NAME " " SECCOMP_RET_ERRNO_NAME " " @@ -1062,6 +1066,7 @@ struct seccomp_log_name { }; static const struct seccomp_log_name seccomp_log_names[] = { + { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME }, { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, -- cgit v1.2.3-59-g8ed1b