aboutsummaryrefslogtreecommitdiffstats
path: root/ipc/mqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'ipc/mqueue.c')
-rw-r--r--ipc/mqueue.c126
1 files changed, 80 insertions, 46 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 49a05ba3000d..467a194b8a2e 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -45,6 +45,7 @@
struct mqueue_fs_context {
struct ipc_namespace *ipc_ns;
+ bool newns; /* Set if newly created ipc namespace */
};
#define MQUEUE_MAGIC 0x19800202
@@ -142,8 +143,9 @@ struct mqueue_inode_info {
struct sigevent notify;
struct pid *notify_owner;
+ u32 notify_self_exec_id;
struct user_namespace *notify_user_ns;
- struct user_struct *user; /* user who created, for accounting */
+ struct ucounts *ucounts; /* user who created, for accounting */
struct sock *notify_sock;
struct sk_buff *notify_cookie;
@@ -162,8 +164,6 @@ static void remove_notification(struct mqueue_inode_info *info);
static struct kmem_cache *mqueue_inode_cachep;
-static struct ctl_table_header *mq_sysctl_table;
-
static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
{
return container_of(inode, struct mqueue_inode_info, vfs_inode);
@@ -239,11 +239,10 @@ static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
info->msg_tree_rightmost = rb_prev(node);
rb_erase(node, &info->msg_tree);
- if (info->node_cache) {
+ if (info->node_cache)
kfree(leaf);
- } else {
+ else
info->node_cache = leaf;
- }
}
static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
@@ -292,7 +291,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
struct ipc_namespace *ipc_ns, umode_t mode,
struct mq_attr *attr)
{
- struct user_struct *u = current_user();
struct inode *inode;
int ret = -ENOMEM;
@@ -321,7 +319,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
info->notify_owner = NULL;
info->notify_user_ns = NULL;
info->qsize = 0;
- info->user = NULL; /* set when all is ok */
+ info->ucounts = NULL; /* set when all is ok */
info->msg_tree = RB_ROOT;
info->msg_tree_rightmost = NULL;
info->node_cache = NULL;
@@ -371,19 +369,23 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
if (mq_bytes + mq_treesize < mq_bytes)
goto out_inode;
mq_bytes += mq_treesize;
- spin_lock(&mq_lock);
- if (u->mq_bytes + mq_bytes < u->mq_bytes ||
- u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
+ info->ucounts = get_ucounts(current_ucounts());
+ if (info->ucounts) {
+ long msgqueue;
+
+ spin_lock(&mq_lock);
+ msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
+ if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
+ dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
+ spin_unlock(&mq_lock);
+ put_ucounts(info->ucounts);
+ info->ucounts = NULL;
+ /* mqueue_evict_inode() releases info->messages */
+ ret = -EMFILE;
+ goto out_inode;
+ }
spin_unlock(&mq_lock);
- /* mqueue_evict_inode() releases info->messages */
- ret = -EMFILE;
- goto out_inode;
}
- u->mq_bytes += mq_bytes;
- spin_unlock(&mq_lock);
-
- /* all is ok */
- info->user = get_uid(u);
} else if (S_ISDIR(mode)) {
inc_nlink(inode);
/* Some things misbehave if size == 0 on a directory */
@@ -424,6 +426,14 @@ static int mqueue_get_tree(struct fs_context *fc)
{
struct mqueue_fs_context *ctx = fc->fs_private;
+ /*
+ * With a newly created ipc namespace, we don't need to do a search
+ * for an ipc namespace match, but we still need to set s_fs_info.
+ */
+ if (ctx->newns) {
+ fc->s_fs_info = ctx->ipc_ns;
+ return get_tree_nodev(fc, mqueue_fill_super);
+ }
return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns);
}
@@ -451,6 +461,10 @@ static int mqueue_init_fs_context(struct fs_context *fc)
return 0;
}
+/*
+ * mq_init_ns() is currently the only caller of mq_create_mount().
+ * So the ns parameter is always a newly created ipc namespace.
+ */
static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
{
struct mqueue_fs_context *ctx;
@@ -462,6 +476,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
return ERR_CAST(fc);
ctx = fc->fs_private;
+ ctx->newns = true;
put_ipc_ns(ctx->ipc_ns);
ctx->ipc_ns = get_ipc_ns(ns);
put_user_ns(fc->user_ns);
@@ -474,7 +489,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
static void init_once(void *foo)
{
- struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
+ struct mqueue_inode_info *p = foo;
inode_init_once(&p->vfs_inode);
}
@@ -483,7 +498,7 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
{
struct mqueue_inode_info *ei;
- ei = kmem_cache_alloc(mqueue_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, mqueue_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
@@ -497,7 +512,6 @@ static void mqueue_free_inode(struct inode *inode)
static void mqueue_evict_inode(struct inode *inode)
{
struct mqueue_inode_info *info;
- struct user_struct *user;
struct ipc_namespace *ipc_ns;
struct msg_msg *msg, *nmsg;
LIST_HEAD(tmp_msg);
@@ -520,8 +534,7 @@ static void mqueue_evict_inode(struct inode *inode)
free_msg(msg);
}
- user = info->user;
- if (user) {
+ if (info->ucounts) {
unsigned long mq_bytes, mq_treesize;
/* Total amount of bytes accounted for the mqueue */
@@ -533,7 +546,7 @@ static void mqueue_evict_inode(struct inode *inode)
info->attr.mq_msgsize);
spin_lock(&mq_lock);
- user->mq_bytes -= mq_bytes;
+ dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
/*
* get_ns_from_inode() ensures that the
* (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
@@ -543,7 +556,8 @@ static void mqueue_evict_inode(struct inode *inode)
if (ipc_ns)
ipc_ns->mq_queues_count--;
spin_unlock(&mq_lock);
- free_uid(user);
+ put_ucounts(info->ucounts);
+ info->ucounts = NULL;
}
if (ipc_ns)
put_ipc_ns(ipc_ns);
@@ -594,8 +608,8 @@ out_unlock:
return error;
}
-static int mqueue_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, bool excl)
+static int mqueue_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
return mqueue_create_attr(dentry, mode, NULL);
}
@@ -774,28 +788,44 @@ static void __do_notify(struct mqueue_inode_info *info)
* synchronously. */
if (info->notify_owner &&
info->attr.mq_curmsgs == 1) {
- struct kernel_siginfo sig_i;
switch (info->notify.sigev_notify) {
case SIGEV_NONE:
break;
- case SIGEV_SIGNAL:
- /* sends signal */
+ case SIGEV_SIGNAL: {
+ struct kernel_siginfo sig_i;
+ struct task_struct *task;
+
+ /* do_mq_notify() accepts sigev_signo == 0, why?? */
+ if (!info->notify.sigev_signo)
+ break;
clear_siginfo(&sig_i);
sig_i.si_signo = info->notify.sigev_signo;
sig_i.si_errno = 0;
sig_i.si_code = SI_MESGQ;
sig_i.si_value = info->notify.sigev_value;
- /* map current pid/uid into info->owner's namespaces */
rcu_read_lock();
+ /* map current pid/uid into info->owner's namespaces */
sig_i.si_pid = task_tgid_nr_ns(current,
ns_of_pid(info->notify_owner));
- sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid());
+ sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
+ current_uid());
+ /*
+ * We can't use kill_pid_info(), this signal should
+ * bypass check_kill_permission(). It is from kernel
+ * but si_fromuser() can't know this.
+ * We do check the self_exec_id, to avoid sending
+ * signals to programs that don't expect them.
+ */
+ task = pid_task(info->notify_owner, PIDTYPE_TGID);
+ if (task && task->self_exec_id ==
+ info->notify_self_exec_id) {
+ do_send_sig_info(info->notify.sigev_signo,
+ &sig_i, task, PIDTYPE_TGID);
+ }
rcu_read_unlock();
-
- kill_pid_info(info->notify.sigev_signo,
- &sig_i, info->notify_owner);
break;
+ }
case SIGEV_THREAD:
set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
netlink_sendskb(info->notify_sock, info->notify_cookie);
@@ -857,7 +887,7 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro,
if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
return -EINVAL;
acc = oflag2acc[oflag & O_ACCMODE];
- return inode_permission(d_inode(dentry), acc);
+ return inode_permission(&init_user_ns, d_inode(dentry), acc);
}
static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
@@ -949,14 +979,14 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
err = -ENOENT;
} else {
ihold(inode);
- err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL);
+ err = vfs_unlink(&init_user_ns, d_inode(dentry->d_parent),
+ dentry, NULL);
}
dput(dentry);
out_unlock:
inode_unlock(d_inode(mnt->mnt_root));
- if (inode)
- iput(inode);
+ iput(inode);
mnt_drop_write(mnt);
out_name:
putname(name);
@@ -987,12 +1017,14 @@ static inline void __pipelined_op(struct wake_q_head *wake_q,
struct mqueue_inode_info *info,
struct ext_wait_queue *this)
{
+ struct task_struct *task;
+
list_del(&this->list);
- get_task_struct(this->task);
+ task = get_task_struct(this->task);
/* see MQ_BARRIER for purpose/pairing */
smp_store_release(&this->state, STATE_READY);
- wake_q_add_safe(wake_q, this->task);
+ wake_q_add_safe(wake_q, task);
}
/* pipelined_send() - send a message directly to the task waiting in
@@ -1384,6 +1416,7 @@ retry:
info->notify.sigev_signo = notification->sigev_signo;
info->notify.sigev_value = notification->sigev_value;
info->notify.sigev_notify = SIGEV_SIGNAL;
+ info->notify_self_exec_id = current->self_exec_id;
break;
}
@@ -1691,8 +1724,10 @@ static int __init init_mqueue_fs(void)
if (mqueue_inode_cachep == NULL)
return -ENOMEM;
- /* ignore failures - they are not fatal */
- mq_sysctl_table = mq_register_sysctl_table();
+ if (!setup_mq_sysctls(&init_ipc_ns)) {
+ pr_warn("sysctl registration failed\n");
+ return -ENOMEM;
+ }
error = register_filesystem(&mqueue_fs_type);
if (error)
@@ -1709,9 +1744,8 @@ static int __init init_mqueue_fs(void)
out_filesystem:
unregister_filesystem(&mqueue_fs_type);
out_sysctl:
- if (mq_sysctl_table)
- unregister_sysctl_table(mq_sysctl_table);
kmem_cache_destroy(mqueue_inode_cachep);
+ retire_mq_sysctls(&init_ipc_ns);
return error;
}