From 7b9a7ec565505699f503b4fcf61500dceb36e744 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Mon, 17 Dec 2012 16:03:10 -0800 Subject: proc: don't show nonexistent capabilities Without this patch it is really hard to interpret a bounding set, if CAP_LAST_CAP is unknown for a current kernel. Non-existant capabilities can not be deleted from a bounding set with help of prctl. E.g.: Here are two examples without/with this patch. CapBnd: ffffffe0fdecffff CapBnd: 00000000fdecffff I suggest to hide non-existent capabilities. Here is two reasons. * It's logically and easier for using. * It helps to checkpoint-restore capabilities of tasks, because tasks can be restored on another kernel, where CAP_LAST_CAP is bigger. Signed-off-by: Andrew Vagin Cc: Andrew G. Morgan Reviewed-by: Serge E. Hallyn Cc: Pavel Emelyanov Reviewed-by: Kees Cook Cc: KAMEZAWA Hiroyuki Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/proc/array.c') diff --git a/fs/proc/array.c b/fs/proc/array.c index d3696708fc1a..377a37366dde 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -308,6 +308,10 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_putc(m, '\n'); } +/* Remove non-existent capabilities */ +#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \ + CAP_TO_MASK(CAP_LAST_CAP + 1) - 1) + static inline void task_cap(struct seq_file *m, struct task_struct *p) { const struct cred *cred; @@ -321,6 +325,11 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) cap_bset = cred->cap_bset; rcu_read_unlock(); + NORM_CAPS(cap_inheritable); + NORM_CAPS(cap_permitted); + NORM_CAPS(cap_effective); + NORM_CAPS(cap_bset); + render_cap_t(m, "CapInh:\t", &cap_inheritable); render_cap_t(m, "CapPrm:\t", &cap_permitted); render_cap_t(m, "CapEff:\t", &cap_effective); -- cgit v1.2.3-59-g8ed1b From 2f4b3bf6b2318cfaa177ec5a802f4d8d6afbd816 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 Dec 2012 16:03:14 -0800 Subject: /proc/pid/status: add "Seccomp" field It is currently impossible to examine the state of seccomp for a given process. While attaching with gdb and attempting "call prctl(PR_GET_SECCOMP,...)" will work with some situations, it is not reliable. If the process is in seccomp mode 1, this query will kill the process (prctl not allowed), if the process is in mode 2 with prctl not allowed, it will similarly be killed, and in weird cases, if prctl is filtered to return errno 0, it can look like seccomp is disabled. When reviewing the state of running processes, there should be a way to externally examine the seccomp mode. ("Did this build of Chrome end up using seccomp?" "Did my distro ship ssh with seccomp enabled?") This adds the "Seccomp" line to /proc/$pid/status. Signed-off-by: Kees Cook Reviewed-by: Cyrill Gorcunov Cc: Andrea Arcangeli Cc: James Morris Acked-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 2 ++ fs/proc/array.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'fs/proc/array.c') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 74cb394e6888..12665ee7094a 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -181,6 +181,7 @@ read the file /proc/PID/status: CapPrm: 0000000000000000 CapEff: 0000000000000000 CapBnd: ffffffffffffffff + Seccomp: 0 voluntary_ctxt_switches: 0 nonvoluntary_ctxt_switches: 1 @@ -237,6 +238,7 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7) CapPrm bitmap of permitted capabilities CapEff bitmap of effective capabilities CapBnd bitmap of capabilities bounding set + Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...) Cpus_allowed mask of CPUs on which this process may run Cpus_allowed_list Same as previous, but in "list format" Mems_allowed mask of memory nodes allowed to this process diff --git a/fs/proc/array.c b/fs/proc/array.c index 377a37366dde..077235ffb38b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -336,6 +336,13 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) render_cap_t(m, "CapBnd:\t", &cap_bset); } +static inline void task_seccomp(struct seq_file *m, struct task_struct *p) +{ +#ifdef CONFIG_SECCOMP + seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); +#endif +} + static inline void task_context_switch_counts(struct seq_file *m, struct task_struct *p) { @@ -369,6 +376,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, } task_sig(m, task); task_cap(m, task); + task_seccomp(m, task); task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); task_context_switch_counts(m, task); -- cgit v1.2.3-59-g8ed1b From 8d238027b87e654be552eabdf492042a34c5c300 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 17 Dec 2012 16:03:17 -0800 Subject: proc: pid/status: show all supplementary groups We display a list of supplementary group for each process in /proc//status. However, we show only the first 32 groups, not all of them. Although this is rare, but sometimes processes do have more than 32 supplementary groups, and this kernel limitation breaks user-space apps that rely on the group list in /proc//status. Number 32 comes from the internal NGROUPS_SMALL macro which defines the length for the internal kernel "small" groups buffer. There is no apparent reason to limit to this value. This patch removes the 32 groups printing limit. The Linux kernel limits the amount of supplementary groups by NGROUPS_MAX, which is currently set to 65536. And this is the maximum count of groups we may possibly print. Signed-off-by: Artem Bityutskiy Acked-by: Serge E. Hallyn Acked-by: Kees Cook Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/proc/array.c') diff --git a/fs/proc/array.c b/fs/proc/array.c index 077235ffb38b..439544fec388 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -212,7 +212,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, group_info = cred->group_info; task_unlock(p); - for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) + for (g = 0; g < group_info->ngroups; g++) seq_printf(m, "%d ", from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); -- cgit v1.2.3-59-g8ed1b From 138d22b58696c506799f8de759804083ff9effae Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 17 Dec 2012 16:05:02 -0800 Subject: fs, epoll: add procfs fdinfo helper This allows us to print out eventpoll target file descriptor, events and data, the /proc/pid/fdinfo/fd consists of | pos: 0 | flags: 02 | tfd: 5 events: 1d data: ffffffffffffffff enabled: 1 [avagin@: fix for unitialized ret variable] Signed-off-by: Cyrill Gorcunov Acked-by: Pavel Emelyanov Cc: Oleg Nesterov Cc: Andrey Vagin Cc: Al Viro Cc: Alexey Dobriyan Cc: James Bottomley Cc: "Aneesh Kumar K.V" Cc: Alexey Dobriyan Cc: Matthew Helsley Cc: "J. Bruce Fields" Cc: "Aneesh Kumar K.V" Cc: Tvrtko Ursulin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 28 ++++++++++++++++++++++++++++ fs/proc/array.c | 2 +- fs/signalfd.c | 18 ++++++++++++++++++ include/linux/proc_fs.h | 3 +++ 4 files changed, 50 insertions(+), 1 deletion(-) (limited to 'fs/proc/array.c') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cd96649bfe62..be56b21435f8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -38,6 +38,8 @@ #include #include #include +#include +#include /* * LOCKING: @@ -783,8 +785,34 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) return pollflags != -1 ? pollflags : 0; } +#ifdef CONFIG_PROC_FS +static int ep_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct eventpoll *ep = f->private_data; + struct rb_node *rbp; + int ret = 0; + + mutex_lock(&ep->mtx); + for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + struct epitem *epi = rb_entry(rbp, struct epitem, rbn); + + ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n", + epi->ffd.fd, epi->event.events, + (long long)epi->event.data); + if (ret) + break; + } + mutex_unlock(&ep->mtx); + + return ret; +} +#endif + /* File callbacks that implement the eventpoll file behaviour */ static const struct file_operations eventpoll_fops = { +#ifdef CONFIG_PROC_FS + .show_fdinfo = ep_show_fdinfo, +#endif .release = ep_eventpoll_release, .poll = ep_eventpoll_poll, .llseek = noop_llseek, diff --git a/fs/proc/array.c b/fs/proc/array.c index 439544fec388..060a56a91278 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -220,7 +220,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, seq_putc(m, '\n'); } -static void render_sigset_t(struct seq_file *m, const char *header, +void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set) { int i; diff --git a/fs/signalfd.c b/fs/signalfd.c index 8bee4e570911..b53486961735 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -29,6 +29,7 @@ #include #include #include +#include void signalfd_cleanup(struct sighand_struct *sighand) { @@ -227,7 +228,24 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, return total ? total: ret; } +#ifdef CONFIG_PROC_FS +static int signalfd_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct signalfd_ctx *ctx = f->private_data; + sigset_t sigmask; + + sigmask = ctx->sigmask; + signotset(&sigmask); + render_sigset_t(m, "sigmask:\t", &sigmask); + + return 0; +} +#endif + static const struct file_operations signalfd_fops = { +#ifdef CONFIG_PROC_FS + .show_fdinfo = signalfd_show_fdinfo, +#endif .release = signalfd_release, .poll = signalfd_poll, .read = signalfd_read, diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3fd2e871ff1b..b4f70f0a9a48 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -290,4 +290,7 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde) return pde->parent->data; } +#include + +void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set); #endif /* _LINUX_PROC_FS_H */ -- cgit v1.2.3-59-g8ed1b