From 637241a900cbd982f744d44646b48a273d609b34 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jun 2013 14:04:39 -0700 Subject: kmsg: honor dmesg_restrict sysctl on /dev/kmsg The dmesg_restrict sysctl currently covers the syslog method for access dmesg, however /dev/kmsg isn't covered by the same protections. Most people haven't noticed because util-linux dmesg(1) defaults to using the syslog method for access in older versions. With util-linux dmesg(1) defaults to reading directly from /dev/kmsg. To fix /dev/kmsg, let's compare the existing interfaces and what they allow: - /proc/kmsg allows: - open (SYSLOG_ACTION_OPEN) if CAP_SYSLOG since it uses a destructive single-reader interface (SYSLOG_ACTION_READ). - everything, after an open. - syslog syscall allows: - anything, if CAP_SYSLOG. - SYSLOG_ACTION_READ_ALL and SYSLOG_ACTION_SIZE_BUFFER, if dmesg_restrict==0. - nothing else (EPERM). The use-cases were: - dmesg(1) needs to do non-destructive SYSLOG_ACTION_READ_ALLs. - sysklog(1) needs to open /proc/kmsg, drop privs, and still issue the destructive SYSLOG_ACTION_READs. AIUI, dmesg(1) is moving to /dev/kmsg, and systemd-journald doesn't clear the ring buffer. Based on the comments in devkmsg_llseek, it sounds like actions besides reading aren't going to be supported by /dev/kmsg (i.e. SYSLOG_ACTION_CLEAR), so we have a strict subset of the non-destructive syslog syscall actions. To this end, move the check as Josh had done, but also rename the constants to reflect their new uses (SYSLOG_FROM_CALL becomes SYSLOG_FROM_READER, and SYSLOG_FROM_FILE becomes SYSLOG_FROM_PROC). SYSLOG_FROM_READER allows non-destructive actions, and SYSLOG_FROM_PROC allows destructive actions after a capabilities-constrained SYSLOG_ACTION_OPEN check. - /dev/kmsg allows: - open if CAP_SYSLOG or dmesg_restrict==0 - reading/polling, after open Addresses https://bugzilla.redhat.com/show_bug.cgi?id=903192 [akpm@linux-foundation.org: use pr_warn_once()] Signed-off-by: Kees Cook Reported-by: Christian Kujau Tested-by: Josh Boyer Cc: Kay Sievers Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/kmsg.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index bd4b5a740ff1..bdfabdaefdce 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait; static int kmsg_open(struct inode * inode, struct file * file) { - return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE); + return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC); } static int kmsg_release(struct inode * inode, struct file * file) { - (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE); + (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_PROC); return 0; } @@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { if ((file->f_flags & O_NONBLOCK) && - !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) + !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC)) return -EAGAIN; - return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE); + return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_PROC); } static unsigned int kmsg_poll(struct file *file, poll_table *wait) { poll_wait(file, &log_wait, wait); - if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) + if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC)) return POLLIN | POLLRDNORM; return 0; } -- cgit v1.2.3-59-g8ed1b From 7869e590679ed71cd1a1e676e8c1c179762c3efe Mon Sep 17 00:00:00 2001 From: "Xiaowei.Hu" Date: Wed, 12 Jun 2013 14:04:41 -0700 Subject: ocfs2: ocfs2_prep_new_orphaned_file() should return ret If an error occurs, for example an EIO in __ocfs2_prepare_orphan_dir, ocfs2_prep_new_orphaned_file will release the inode_ac, then when the caller of ocfs2_prep_new_orphaned_file gets a 0 return, it will refer to a NULL ocfs2_alloc_context struct in the following functions. A kernel panic happens. Signed-off-by: "Xiaowei.Hu" Reviewed-by: shencanquan Acked-by: Sunil Mushran Cc: Joe Jin Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 04ee1b57c243..b563351753f1 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2216,7 +2216,7 @@ out: brelse(orphan_dir_bh); - return 0; + return ret; } int ocfs2_create_inode_in_orphan(struct inode *dir, -- cgit v1.2.3-59-g8ed1b From e099127169429c19544a8f55dd26937fddd5b1f4 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 12 Jun 2013 14:04:51 -0700 Subject: fs/ocfs2/namei.c: remove unecessary ERROR when removing non-empty directory While removing a non-empty directory, the kernel dumps a message: (rmdir,21743,1):ocfs2_unlink:953 ERROR: status = -39 Suppress the error message from being printed in the dmesg so users don't panic. Signed-off-by: Goldwyn Rodrigues Cc: Mark Fasheh Cc: Joel Becker Acked-by: Sunil Mushran Reviewed-by: Jie Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b563351753f1..b4a5cdf9dbc5 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -947,7 +947,7 @@ leave: ocfs2_free_dir_lookup_result(&orphan_insert); ocfs2_free_dir_lookup_result(&lookup); - if (status) + if (status && (status != -ENOTEMPTY)) mlog_errno(status); return status; -- cgit v1.2.3-59-g8ed1b From 4fcc712f5c48b1e32cdbf9b9cfba42a27b2e3160 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Jun 2013 14:04:59 -0700 Subject: aio: fix io_destroy() regression by using call_rcu() There was a regression introduced by 36f5588905c1 ("aio: refcounting cleanup"), reported by Jens Axboe - the refcounting cleanup switched to using RCU in the shutdown path, but the synchronize_rcu() was done in the context of the io_destroy() syscall greatly increasing the time it could block. This patch switches it to call_rcu() and makes shutdown asynchronous (more asynchronous than it was originally; before the refcount changes io_destroy() would still wait on pending kiocbs). Note that there's a global quota on the max outstanding kiocbs, and that quota must be manipulated synchronously; otherwise io_setup() could return -EAGAIN when there isn't quota available, and userspace won't have any way of waiting until shutdown of the old kioctxs has finished (besides busy looping). So we release our quota before kioctx shutdown has finished, which should be fine since the quota never corresponded to anything real anyways. Signed-off-by: Kent Overstreet Cc: Zach Brown Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Mark Fasheh Cc: Joel Becker Cc: Rusty Russell Reported-by: Jens Axboe Tested-by: Jens Axboe Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Jeff Moyer Cc: Al Viro Signed-off-by: Benjamin LaHaise Tested-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 7fe5bdee1630..2bbcacf74d0c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -141,9 +141,6 @@ static void aio_free_ring(struct kioctx *ctx) for (i = 0; i < ctx->nr_pages; i++) put_page(ctx->ring_pages[i]); - if (ctx->mmap_size) - vm_munmap(ctx->mmap_base, ctx->mmap_size); - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) kfree(ctx->ring_pages); } @@ -322,11 +319,6 @@ static void free_ioctx(struct kioctx *ctx) aio_free_ring(ctx); - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - ctx->max_reqs > aio_nr); - aio_nr -= ctx->max_reqs; - spin_unlock(&aio_nr_lock); - pr_debug("freeing %p\n", ctx); /* @@ -435,17 +427,24 @@ static void kill_ioctx(struct kioctx *ctx) { if (!atomic_xchg(&ctx->dead, 1)) { hlist_del_rcu(&ctx->list); - /* Between hlist_del_rcu() and dropping the initial ref */ - synchronize_rcu(); /* - * We can't punt to workqueue here because put_ioctx() -> - * free_ioctx() will unmap the ringbuffer, and that has to be - * done in the original process's context. kill_ioctx_rcu/work() - * exist for exit_aio(), as in that path free_ioctx() won't do - * the unmap. + * It'd be more correct to do this in free_ioctx(), after all + * the outstanding kiocbs have finished - but by then io_destroy + * has already returned, so io_setup() could potentially return + * -EAGAIN with no ioctxs actually in use (as far as userspace + * could tell). */ - kill_ioctx_work(&ctx->rcu_work); + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - ctx->max_reqs > aio_nr); + aio_nr -= ctx->max_reqs; + spin_unlock(&aio_nr_lock); + + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); + + /* Between hlist_del_rcu() and dropping the initial ref */ + call_rcu(&ctx->rcu_head, kill_ioctx_rcu); } } @@ -495,10 +494,7 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - if (!atomic_xchg(&ctx->dead, 1)) { - hlist_del_rcu(&ctx->list); - call_rcu(&ctx->rcu_head, kill_ioctx_rcu); - } + kill_ioctx(ctx); } } -- cgit v1.2.3-59-g8ed1b From 27749f2ff0717e115680922000839ad6a576eddf Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 12 Jun 2013 14:05:03 -0700 Subject: ocfs2: add missing lockres put in dlm_mig_lockres_handler dlm_mig_lockres_handler() is missing a dlm_lockres_put() on an error path. Signed-off-by: joyce Reviewed-by: shencanquan Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmrecovery.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index b3fdd1a323d6..e68588e6b1e8 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1408,6 +1408,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, mres->lockname_len, mres->lockname); ret = -EFAULT; spin_unlock(&res->spinlock); + dlm_lockres_put(res); goto leave; } res->state |= DLM_LOCK_RES_MIGRATING; -- cgit v1.2.3-59-g8ed1b